From d14d0c1aea8fe0eb61b7788a3b546475e183b079 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:56:08 +0200
Subject: scripts: sphinx-pre-install: improve distro detection check

The Arch-linux detection is hit by catting /etc/issue, whose
contents is (nowadays):

	Arch Linux \r (\l)

It sounds a little ackward to print such string, so,
instead, let's use the /etc/os-release file, with exists
on lots of distributions and should provide a more reliable
result.

We'll keep the old tests before it, in order to avoid possible
regressions with the other distros, although the new way should
probably work on all the currently supported distributions.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/472924557afdf2b5492ae2a48c5ecfae216d54e2.1586883286.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/sphinx-pre-install | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index fa3fb05cd54b..ea941c6e0647 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -780,6 +780,24 @@ $system_release = catcheck("/etc/system-release") if !$system_release;
 $system_release = catcheck("/etc/redhat-release") if !$system_release;
 $system_release = catcheck("/etc/lsb-release") if !$system_release;
 $system_release = catcheck("/etc/gentoo-release") if !$system_release;
+
+# This seems more common than LSB these days
+if (!$system_release) {
+	my %os_var;
+	if (open IN, "cat /etc/os-release|") {
+		while (<IN>) {
+			if (m/^([\w\d\_]+)=\"?([^\"]*)\"?\n/) {
+				$os_var{$1}=$2;
+			}
+		}
+		$system_release = $os_var{"NAME"};
+		if (defined($os_var{"VERSION_ID"})) {
+			$system_release .= " " . $os_var{"VERSION_ID"} if (defined($os_var{"VERSION_ID"}));
+		} else {
+			$system_release .= " " . $os_var{"VERSION"};
+		}
+	}
+}
 $system_release = catcheck("/etc/issue") if !$system_release;
 $system_release =~ s/\s+$//;
 
-- 
cgit v1.2.3


From b3df6223bdea763ed666b06a4e7431f40b33da67 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:56:09 +0200
Subject: scripts: sphinx-pre-install: improve openSuse Tumbleweed check

Currently, with openSUSE Tumbleweed 20200303, it keeps
recommending this forever:

	sudo zypper install --no-recommends rsvg-view

This dependency will never be fulfilled there, as the package
now is named as on other distros: rsvg-convert.

So, improve the detection to avoid such issue.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/c3774f72ac36c5e5b5f446ae5db5b795d1f274f4.1586883286.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/sphinx-pre-install | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index ea941c6e0647..6d6d2849fbb1 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -446,9 +446,11 @@ sub give_opensuse_hints()
 		"convert"		=> "ImageMagick",
 		"Pod::Usage"		=> "perl-Pod-Usage",
 		"xelatex"		=> "texlive-xetex-bin",
-		"rsvg-convert"		=> "rsvg-view",
 	);
 
+	# On Tumbleweed, this package is also named rsvg-convert
+	$map{"rsvg-convert"} = "rsvg-view" if (!($system_release =~ /Tumbleweed/));
+
 	my @suse_tex_pkgs = (
 		"texlive-babel-english",
 		"texlive-caption",
-- 
cgit v1.2.3


From bfc7f4281066154e6b420a87335d6387c9b0835a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:56:10 +0200
Subject: scripts: sphinx-pre-install: fix a dependency hint with Ubuntu 16.04

Avoid the scripts to keep asking to install fonts-noto-cjk
on Ubuntu 16.04.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/912b664a8ca54e8c5c5767c3fe9171973eeddd6b.1586883286.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/sphinx-pre-install | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index 6d6d2849fbb1..ba750f7b9127 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -349,7 +349,8 @@ sub give_debian_hints()
 				   "fonts-dejavu", 2);
 
 		check_missing_file(["/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
-				   "/usr/share/fonts/opentype/noto/NotoSerifCJK-Regular.ttc"],
+				    "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
+				    "/usr/share/fonts/opentype/noto/NotoSerifCJK-Regular.ttc"],
 				   "fonts-noto-cjk", 2);
 	}
 
-- 
cgit v1.2.3


From e45a631742fadd7c9feb5a0049382102e5d43fe7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:56:11 +0200
Subject: scripts: sphinx-pre-install: address some issues with Gentoo

There are some small misdetections with Gentoo. While they
don't cause too much trouble, it keeps recomending to
install things that are already there.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/7f631edce102b02ccbdbfb18be1376a86b41373d.1586883286.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/sphinx-pre-install | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index ba750f7b9127..86e14b9fd537 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -560,7 +560,8 @@ sub give_gentoo_hints()
 			   "media-fonts/dejavu", 2) if ($pdf);
 
 	if ($pdf) {
-		check_missing_file(["/usr/share/fonts/noto-cjk/NotoSansCJKsc-Regular.otf"],
+		check_missing_file(["/usr/share/fonts/noto-cjk/NotoSansCJKsc-Regular.otf",
+				    "/usr/share/fonts/noto-cjk/NotoSerifCJK-Regular.ttc"],
 				   "media-fonts/noto-cjk", 2);
 	}
 
@@ -575,10 +576,10 @@ sub give_gentoo_hints()
 	my $portage_imagemagick = "/etc/portage/package.use/imagemagick";
 	my $portage_cairo = "/etc/portage/package.use/graphviz";
 
-	if (qx(cat $portage_imagemagick) ne "$imagemagick\n") {
+	if (qx(grep imagemagick $portage_imagemagick 2>/dev/null) eq "") {
 		printf("\tsudo su -c 'echo \"$imagemagick\" > $portage_imagemagick'\n")
 	}
-	if (qx(cat $portage_cairo) ne  "$cairo\n") {
+	if (qx(grep graphviz $portage_cairo 2>/dev/null) eq  "") {
 		printf("\tsudo su -c 'echo \"$cairo\" > $portage_cairo'\n");
 	}
 
-- 
cgit v1.2.3


From d6ebf1890c8ba6c9340681cb24bdbdb0ea7c71b4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:56:12 +0200
Subject: scripts: sphinx-pre-install: add support for OpenMandriva

It seems that Mageia and OpenMandriva will reunite on a single
distribution. In any case, both came from Mandriva. So, it is
close enough to use the same logic.

So, add support for it.

Tested with OpenMandriva 4.1 and with Mageia 7.1.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/692809729c6818a0b0f75513da15970c53d5565c.1586883286.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/sphinx-pre-install | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index 86e14b9fd537..89b033285caf 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -494,7 +494,7 @@ sub give_mageia_hints()
 		"convert"		=> "ImageMagick",
 		"Pod::Usage"		=> "perl-Pod-Usage",
 		"xelatex"		=> "texlive",
-		"rsvg-convert"		=> "librsvg2-tools",
+		"rsvg-convert"		=> "librsvg2",
 	);
 
 	my @tex_pkgs = (
@@ -503,16 +503,29 @@ sub give_mageia_hints()
 
 	$map{"latexmk"} = "texlive-collection-basic";
 
+	my $packager_cmd;
+	my $noto_sans;
+	if ($system_release =~ /OpenMandriva/) {
+		$packager_cmd = "dnf install";
+		$noto_sans = "noto-sans-cjk-fonts";
+		@tex_pkgs = ( "texlive-collection-fontsextra" );
+	} else {
+		$packager_cmd = "urpmi";
+		$noto_sans = "google-noto-sans-cjk-ttc-fonts";
+	}
+
+
 	if ($pdf) {
-		check_missing_file(["/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc"],
-				   "google-noto-sans-cjk-ttc-fonts", 2);
+		check_missing_file(["/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc",
+				    "/usr/share/fonts/TTF/NotoSans-Regular.ttf"],
+				   $noto_sans, 2);
 	}
 
 	check_rpm_missing(\@tex_pkgs, 2) if ($pdf);
 	check_missing(\%map);
 
 	return if (!$need && !$optional);
-	printf("You should run:\n\n\tsudo urpmi $install\n");
+	printf("You should run:\n\n\tsudo $packager_cmd $install\n");
 }
 
 sub give_arch_linux_hints()
@@ -626,6 +639,10 @@ sub check_distros()
 		give_mageia_hints;
 		return;
 	}
+	if ($system_release =~ /OpenMandriva/) {
+		give_mageia_hints;
+		return;
+	}
 	if ($system_release =~ /Arch Linux/) {
 		give_arch_linux_hints;
 		return;
-- 
cgit v1.2.3


From 2f9c502552cd3e791db91ee80f7766e122b3b026 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:56:13 +0200
Subject: scripts: sphinx-pre-install: add support for python -m venv

Since python 3.3, the recommended way to setup a virtual env is
via "python -m venv".

Set this as a default, if python version is compatible with
such feature.

While here, add more comments to it, as the script is
getting more complex. So, better to add more things, to avoid
accidentally breaking it while improving it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/252cc849c79527ad496247e4c481961478adf41c.1586883286.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/sphinx-pre-install | 113 ++++++++++++++++++++++++++++++---------------
 1 file changed, 75 insertions(+), 38 deletions(-)

diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index 89b033285caf..d4dfe1e59989 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0-or-later
 use strict;
 
-# Copyright (c) 2017-2019 Mauro Carvalho Chehab <mchehab@kernel.org>
+# Copyright (c) 2017-2020 Mauro Carvalho Chehab <mchehab@kernel.org>
 #
 
 my $prefix = "./";
@@ -22,9 +22,12 @@ my $need = 0;
 my $optional = 0;
 my $need_symlink = 0;
 my $need_sphinx = 0;
+my $need_venv = 0;
+my $need_virtualenv = 0;
 my $rec_sphinx_upgrade = 0;
 my $install = "";
 my $virtenv_dir = "";
+my $python_cmd = "";
 my $min_version;
 
 #
@@ -147,7 +150,7 @@ sub check_program($$)
 	my $prog = shift;
 	my $is_optional = shift;
 
-	return if findprog($prog);
+	return $prog if findprog($prog);
 
 	add_package($prog, $is_optional);
 }
@@ -168,9 +171,9 @@ sub check_python_module($$)
 	my $prog = shift;
 	my $is_optional = shift;
 
-	my $err = system("python3 -c 'import $prog' 2>/dev/null /dev/null");
-	return if ($err == 0);
-	my $err = system("python -c 'import $prog' 2>/dev/null /dev/null");
+	return if (!$python_cmd);
+
+	my $err = system("$python_cmd -c 'import $prog' 2>/dev/null /dev/null");
 	return if ($err == 0);
 
 	add_package($prog, $is_optional);
@@ -225,16 +228,6 @@ sub get_sphinx_fname()
 		return $fname;
 	}
 
-	if ($virtualenv) {
-		my $prog = findprog("virtualenv-3");
-		$prog = findprog("virtualenv-3.5") if (!$prog);
-
-		check_program("virtualenv", 0) if (!$prog);
-		$need_sphinx = 1;
-	} else {
-		add_package("python-sphinx", 0);
-	}
-
 	return "";
 }
 
@@ -268,7 +261,10 @@ sub check_sphinx()
 	$virtenv_dir = $virtenv_prefix . $rec_version;
 
 	my $sphinx = get_sphinx_fname();
-	return if ($sphinx eq "");
+	if ($sphinx eq "") {
+		$need_sphinx = 1;
+		return;
+	}
 
 	open IN, "$sphinx --version 2>&1 |" or die "$sphinx returned an error";
 	while (<IN>) {
@@ -336,6 +332,7 @@ sub give_debian_hints()
 	my %map = (
 		"python-sphinx"		=> "python3-sphinx",
 		"sphinx_rtd_theme"	=> "python3-sphinx-rtd-theme",
+		"ensurepip"		=> "python3-venv",
 		"virtualenv"		=> "virtualenv",
 		"dot"			=> "graphviz",
 		"convert"		=> "imagemagick",
@@ -672,13 +669,13 @@ sub check_distros()
 
 sub deactivate_help()
 {
-	printf "\tIf you want to exit the virtualenv, you can use:\n";
+	printf "\nIf you want to exit the virtualenv, you can use:\n";
 	printf "\tdeactivate\n";
 }
 
 sub check_needs()
 {
-	# Check for needed programs/tools
+	# Check if Sphinx is already accessible from current environment
 	check_sphinx();
 
 	if ($system_release) {
@@ -689,6 +686,43 @@ sub check_needs()
 
 	print "To upgrade Sphinx, use:\n\n" if ($rec_sphinx_upgrade);
 
+	# Check python command line, trying first python3
+	$python_cmd = findprog("python3");
+	$python_cmd = check_program("python", 0) if (!$python_cmd);
+
+	# Check the type of virtual env, depending on Python version
+	if ($python_cmd) {
+		if ($virtualenv) {
+			my $tmp = qx($python_cmd --version 2>&1);
+			if ($tmp =~ m/(\d+\.)(\d+\.)/) {
+				if ($1 >= 3 && $2 >= 3) {
+					$need_venv = 1;		# python 3.3 or upper
+				} else {
+					$need_virtualenv = 1;
+				}
+				if ($1 < 3) {
+					# Complain if it finds python2 (or worse)
+					printf "Warning: python$1 support is deprecated. Use it with caution!\n";
+				}
+			} else {
+				die "Warning: couldn't identify $python_cmd version!";
+			}
+		} else {
+			add_package("python-sphinx", 0);
+		}
+	}
+
+	# Set virtualenv command line, if python < 3.3
+	my $virtualenv_cmd;
+	if ($need_virtualenv) {
+		$virtualenv_cmd = findprog("virtualenv-3");
+		$virtualenv_cmd = findprog("virtualenv-3.5") if (!$virtualenv_cmd);
+		if (!$virtualenv_cmd) {
+			check_program("virtualenv", 0);
+			$virtualenv_cmd = "virtualenv";
+		}
+	}
+
 	# Check for needed programs/tools
 	check_perl_module("Pod::Usage", 0);
 	check_program("make", 0);
@@ -702,12 +736,30 @@ sub check_needs()
 	check_program("rsvg-convert", 2) if ($pdf);
 	check_program("latexmk", 2) if ($pdf);
 
+	if ($need_sphinx || $rec_sphinx_upgrade) {
+		check_python_module("ensurepip", 0) if ($need_venv);
+	}
+
+	# Do distro-specific checks and output distro-install commands
 	check_distros();
 
+	if (!$python_cmd) {
+		if ($need == 1) {
+			die "Can't build as $need mandatory dependency is missing";
+		} elsif ($need) {
+			die "Can't build as $need mandatory dependencies are missing";
+		}
+	}
+
+	# Check if sphinx-build is called sphinx-build-3
 	if ($need_symlink) {
 		printf "\tsudo ln -sf %s /usr/bin/sphinx-build\n\n",
 		       which("sphinx-build-3");
 	}
+
+	# NOTE: if the system has a too old Sphinx version installed,
+	# it will recommend installing a newer version using virtualenv
+
 	if ($need_sphinx || $rec_sphinx_upgrade) {
 		my $min_activate = "$ENV{'PWD'}/${virtenv_prefix}${min_version}/bin/activate";
 		my @activates = glob "$ENV{'PWD'}/${virtenv_prefix}*/bin/activate";
@@ -721,27 +773,12 @@ sub check_needs()
 			exit (1);
 		} else {
 			my $rec_activate = "$virtenv_dir/bin/activate";
-			my $virtualenv = findprog("virtualenv-3");
-			my $rec_python3 = "";
-			$virtualenv = findprog("virtualenv-3.5") if (!$virtualenv);
-			$virtualenv = findprog("virtualenv") if (!$virtualenv);
-			$virtualenv = "virtualenv" if (!$virtualenv);
-
-			my $rel = "";
-			if (index($system_release, "Ubuntu") != -1) {
-				$rel = $1 if ($system_release =~ /Ubuntu\s+(\d+)[.]/);
-				if ($rel && $rel >= 16) {
-					$rec_python3 = " -p python3";
-				}
-			}
-			if (index($system_release, "Debian") != -1) {
-				$rel = $1 if ($system_release =~ /Debian\s+(\d+)/);
-				if ($rel && $rel >= 7) {
-					$rec_python3 = " -p python3";
-				}
-			}
 
-			printf "\t$virtualenv$rec_python3 $virtenv_dir\n";
+			if ($need_venv) {
+				printf "\t$python_cmd -m venv $virtenv_dir\n";
+			} else {
+				printf "\t$virtualenv_cmd $virtenv_dir\n";
+			}
 			printf "\t. $rec_activate\n";
 			printf "\tpip install -r $requirement_file\n";
 			deactivate_help();
-- 
cgit v1.2.3


From 346282db9c6bc7ce2dad8ae640494f3f88d23889 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:27 +0200
Subject: scripts: kernel-doc: proper handle @foo->bar()

The pattern @foo->bar() is valid, as it can be used by a
function pointer inside a struct passed as a parameter.

Right now, it causes a warning:

	./drivers/firewire/core-transaction.c:606: WARNING: Inline strong start-string without end-string.

In this specific case, the kernel-doc markup is:

	/**
	 * fw_core_remove_address_handler() - unregister an address handler
	 * @handler: callback
	 *
	 * To be called in process context.
	 *
	 * When fw_core_remove_address_handler() returns, @handler->callback() is
	 * guaranteed to not run on any CPU anymore.
	 */

With seems valid on my eyes. So, instead of trying to hack
the kernel-doc markup, let's teach it about how to handle
such things. This should likely remove lots of other similar
warnings as well.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/48b46426d7bf6ff7529f20e5718fbf4e9758e62c.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/kernel-doc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index f746ca8fa403..a9f6d4cea866 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -214,6 +214,7 @@ my $type_constant2 = '\%([-_\w]+)';
 my $type_func = '(\w+)\(\)';
 my $type_param = '\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)';
 my $type_fp_param = '\@(\w+)\(\)';  # Special RST handling for func ptr params
+my $type_fp_param2 = '\@(\w+->\S+)\(\)';  # Special RST handling for structs with func ptr params
 my $type_env = '(\$\w+)';
 my $type_enum = '\&(enum\s*([_\w]+))';
 my $type_struct = '\&(struct\s*([_\w]+))';
@@ -249,6 +250,7 @@ my @highlights_rst = (
                        [$type_member_func, "\\:c\\:type\\:`\$1\$2\$3\\\\(\\\\) <\$1>`"],
                        [$type_member, "\\:c\\:type\\:`\$1\$2\$3 <\$1>`"],
 		       [$type_fp_param, "**\$1\\\\(\\\\)**"],
+		       [$type_fp_param2, "**\$1\\\\(\\\\)**"],
                        [$type_func, "\$1()"],
                        [$type_enum, "\\:c\\:type\\:`\$1 <\$2>`"],
                        [$type_struct, "\\:c\\:type\\:`\$1 <\$2>`"],
-- 
cgit v1.2.3


From ee2aa7590398ecc0877552191e38d8a282f21cb8 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:28 +0200
Subject: scripts: kernel-doc: accept negation like !@var

On a few places, it sometimes need to indicate a negation of a
parameter, like:

	!@fshared

This pattern happens, for example, at:

	kernel/futex.c

and it is perfectly valid. However, kernel-doc currently
transforms it into:

	!**fshared**

This won't do what it would be expected.

Fortunately, fixing the script is a simple matter of storing
the "!" before "@" and adding it after the bold markup, like:

	**!fshared**

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/0314b47f8c3e1f9db00d5375a73dc3cddd8a21f2.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/kernel-doc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index a9f6d4cea866..83b7260e44dc 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -213,6 +213,7 @@ my $type_constant = '\b``([^\`]+)``\b';
 my $type_constant2 = '\%([-_\w]+)';
 my $type_func = '(\w+)\(\)';
 my $type_param = '\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)';
+my $type_param_ref = '([\!]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)';
 my $type_fp_param = '\@(\w+)\(\)';  # Special RST handling for func ptr params
 my $type_fp_param2 = '\@(\w+->\S+)\(\)';  # Special RST handling for structs with func ptr params
 my $type_env = '(\$\w+)';
@@ -237,6 +238,7 @@ my @highlights_man = (
                       [$type_typedef, "\\\\fI\$1\\\\fP"],
                       [$type_union, "\\\\fI\$1\\\\fP"],
                       [$type_param, "\\\\fI\$1\\\\fP"],
+                      [$type_param_ref, "\\\\fI\$1\$2\\\\fP"],
                       [$type_member, "\\\\fI\$1\$2\$3\\\\fP"],
                       [$type_fallback, "\\\\fI\$1\\\\fP"]
 		     );
@@ -258,7 +260,7 @@ my @highlights_rst = (
                        [$type_union, "\\:c\\:type\\:`\$1 <\$2>`"],
                        # in rst this can refer to any type
                        [$type_fallback, "\\:c\\:type\\:`\$1`"],
-                       [$type_param, "**\$1**"]
+                       [$type_param_ref, "**\$1\$2**"]
 		      );
 my $blankline_rst = "\n";
 
-- 
cgit v1.2.3


From 0d55d48b19ff3a31b295836da23223a52de7ef06 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:29 +0200
Subject: scripts: kernel-doc: accept blank lines on parameter description

Sphinx is very pedantic with respect to blank lines. Sometimes,
in order to make it to properly handle something, we need to
add a blank line. However, currently, any blank line inside a
kernel-doc comment like:

	/*
	 * @foo: bar
         *
	 *       foobar
	 *
	 * some description

will be considered as if "foobar" was part of the description.

This patch changes kernel-doc behavior. After it, foobar will
be considered as part of the parameter text. The description
will only be considered as such if it starts with:

zero spaces after asterisk:

	*foo

one space after asterisk:
	* foo

or have a explicit Description section:

	*   Description:

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/c07d2862792d75a2691d69c9eceb7b89a0164cc0.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/kernel-doc | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 83b7260e44dc..f68d76dd97ba 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -331,13 +331,14 @@ my $lineprefix="";
 
 # Parser states
 use constant {
-    STATE_NORMAL        => 0, # normal code
-    STATE_NAME          => 1, # looking for function name
-    STATE_BODY_MAYBE    => 2, # body - or maybe more description
-    STATE_BODY          => 3, # the body of the comment
-    STATE_PROTO         => 4, # scanning prototype
-    STATE_DOCBLOCK      => 5, # documentation block
-    STATE_INLINE        => 6, # gathering documentation outside main block
+    STATE_NORMAL        => 0,        # normal code
+    STATE_NAME          => 1,        # looking for function name
+    STATE_BODY_MAYBE    => 2,        # body - or maybe more description
+    STATE_BODY          => 3,        # the body of the comment
+    STATE_BODY_WITH_BLANK_LINE => 4, # the body, which has a blank line
+    STATE_PROTO         => 5,        # scanning prototype
+    STATE_DOCBLOCK      => 6,        # documentation block
+    STATE_INLINE        => 7,        # gathering doc outside main block
 };
 my $state;
 my $in_doc_sect;
@@ -1957,6 +1958,12 @@ sub process_body($$) {
 	}
     }
 
+    if ($state == STATE_BODY_WITH_BLANK_LINE && /^\s*\*\s?\S/) {
+	dump_section($file, $section, $contents);
+	$section = $section_default;
+	$contents = "";
+    }
+
     if (/$doc_sect/i) { # case insensitive for supported section names
 	$newsection = $1;
 	$newcontents = $2;
@@ -2010,18 +2017,21 @@ sub process_body($$) {
 	$state = STATE_PROTO;
 	$brcount = 0;
     } elsif (/$doc_content/) {
-	# miguel-style comment kludge, look for blank lines after
-	# @parameter line to signify start of description
 	if ($1 eq "") {
-	    if ($section =~ m/^@/ || $section eq $section_context) {
+	    if ($section eq $section_context) {
 		dump_section($file, $section, $contents);
 		$section = $section_default;
 		$contents = "";
 		$new_start_line = $.;
+		$state = STATE_BODY;
 	    } else {
+		if ($section ne $section_default) {
+		    $state = STATE_BODY_WITH_BLANK_LINE;
+		} else {
+		    $state = STATE_BODY;
+		}
 		$contents .= "\n";
 	    }
-	    $state = STATE_BODY;
 	} elsif ($state == STATE_BODY_MAYBE) {
 	    # Continued declaration purpose
 	    chomp($declaration_purpose);
@@ -2173,7 +2183,8 @@ sub process_file($) {
 	    process_normal();
 	} elsif ($state == STATE_NAME) {
 	    process_name($file, $_);
-	} elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE) {
+	} elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE ||
+		 $state == STATE_BODY_WITH_BLANK_LINE) {
 	    process_body($file, $_);
 	} elsif ($state == STATE_INLINE) { # scanning for inline parameters
 	    process_inline($file, $_);
-- 
cgit v1.2.3


From d5afc9640a6d4596e57a2c4906f903ab1c83ada5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:30 +0200
Subject: docs: update recommended Sphinx version to 2.4.4

There are some docs that have nested tables. While this was
always part of the spec, only Sphinx version 2.4.x can
translate it to LaTeX.

In other words, if someone is using a Sphinx version < 2.4,
the LaTeX and PDF output won't work for some of the docs.

So, it seems that it is time to raise the bar again
for the recommented version.

The Sphinx check script is already smart enough to keep
working, with older versions, warning the users that
an upgrade is recommended (and explaining how):

	Sphinx version 1.7.9
	Warning: It is recommended at least Sphinx version 2.4.4.
	Detected OS: Fedora release 31 (Thirty One).

	To upgrade Sphinx, use:

		/usr/bin/virtualenv sphinx_2.4.4
		. sphinx_2.4.4/bin/activate
		pip install -r ./Documentation/sphinx/requirements.txt

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/498f701c618f7d0cf5f0a37e5889ee926f7c8bf4.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/sphinx/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/sphinx/requirements.txt b/Documentation/sphinx/requirements.txt
index 14e29a0ae480..489f6626de67 100644
--- a/Documentation/sphinx/requirements.txt
+++ b/Documentation/sphinx/requirements.txt
@@ -1,3 +1,3 @@
 docutils
-Sphinx==1.7.9
+Sphinx==2.4.4
 sphinx_rtd_theme
-- 
cgit v1.2.3


From 25813cae1eebbd77240b19ba45a1854020fa2cf6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:31 +0200
Subject: docs: LaTeX/PDF: drop list of documents

The building system can auto-generate a list of documents since
commit: 9d42afbe6bd4 ("docs: pdf: add all Documentation/*/index.rst to PDF output").

The added logic there allows keeping the existing list, but
there's not real reason to keep it. Now, the media document
has gone (it was split into tree).

So, it sounds about time to get rid of the manual entries,
and let the script to generate it automatically instead.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/9345dba7164497dbf28578f6ec271e479379610c.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/conf.py | 38 --------------------------------------
 1 file changed, 38 deletions(-)

diff --git a/Documentation/conf.py b/Documentation/conf.py
index 9ae8e9abf846..f6a1bc07c410 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -388,44 +388,6 @@ if major == 1 and minor < 6:
 #  author, documentclass [howto, manual, or own class]).
 # Sorted in alphabetical order
 latex_documents = [
-    ('admin-guide/index', 'linux-user.tex', 'Linux Kernel User Documentation',
-     'The kernel development community', 'manual'),
-    ('core-api/index', 'core-api.tex', 'The kernel core API manual',
-     'The kernel development community', 'manual'),
-    ('crypto/index', 'crypto-api.tex', 'Linux Kernel Crypto API manual',
-     'The kernel development community', 'manual'),
-    ('dev-tools/index', 'dev-tools.tex', 'Development tools for the Kernel',
-     'The kernel development community', 'manual'),
-    ('doc-guide/index', 'kernel-doc-guide.tex', 'Linux Kernel Documentation Guide',
-     'The kernel development community', 'manual'),
-    ('driver-api/index', 'driver-api.tex', 'The kernel driver API manual',
-     'The kernel development community', 'manual'),
-    ('filesystems/index', 'filesystems.tex', 'Linux Filesystems API',
-     'The kernel development community', 'manual'),
-    ('admin-guide/ext4', 'ext4-admin-guide.tex', 'ext4 Administration Guide',
-     'ext4 Community', 'manual'),
-    ('filesystems/ext4/index', 'ext4-data-structures.tex',
-     'ext4 Data Structures and Algorithms', 'ext4 Community', 'manual'),
-    ('gpu/index', 'gpu.tex', 'Linux GPU Driver Developer\'s Guide',
-     'The kernel development community', 'manual'),
-    ('input/index', 'linux-input.tex', 'The Linux input driver subsystem',
-     'The kernel development community', 'manual'),
-    ('kernel-hacking/index', 'kernel-hacking.tex', 'Unreliable Guide To Hacking The Linux Kernel',
-     'The kernel development community', 'manual'),
-    ('media/index', 'media.tex', 'Linux Media Subsystem Documentation',
-     'The kernel development community', 'manual'),
-    ('networking/index', 'networking.tex', 'Linux Networking Documentation',
-     'The kernel development community', 'manual'),
-    ('process/index', 'development-process.tex', 'Linux Kernel Development Documentation',
-     'The kernel development community', 'manual'),
-    ('security/index', 'security.tex', 'The kernel security subsystem manual',
-     'The kernel development community', 'manual'),
-    ('sh/index', 'sh.tex', 'SuperH architecture implementation manual',
-     'The kernel development community', 'manual'),
-    ('sound/index', 'sound.tex', 'Linux Sound Subsystem Documentation',
-     'The kernel development community', 'manual'),
-    ('userspace-api/index', 'userspace-api.tex', 'The Linux kernel user-space API guide',
-     'The kernel development community', 'manual'),
 ]
 
 # Add all other index files from Documentation/ subdirectories
-- 
cgit v1.2.3


From 101e330fd3f275ea553224bece4cf11ba20c6def Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:32 +0200
Subject: MAINTAINERS: dt: update display/allwinner file entry

Changeset f5a98bfe7b37 ("dt-bindings: display: Convert Allwinner display pipeline to schemas")
split Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
into several files. Yet, it kept the old place at MAINTAINERS.

Update it to point to the new place.

Fixes: f5a98bfe7b37 ("dt-bindings: display: Convert Allwinner display pipeline to schemas")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/1be758765272ba4c2acbc3904bdf71c863a90186.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index e64e5db31497..86f98c3e6cfc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5552,7 +5552,7 @@ M:	Chen-Yu Tsai <wens@csie.org>
 L:	dri-devel@lists.freedesktop.org
 S:	Supported
 T:	git git://anongit.freedesktop.org/drm/drm-misc
-F:	Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
+F:	Documentation/devicetree/bindings/display/allwinner*
 F:	drivers/gpu/drm/sun4i/
 
 DRM DRIVERS FOR AMLOGIC SOCS
-- 
cgit v1.2.3


From 0855a36e22483b8e56eea84112eb0d5c48a2fa3a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:33 +0200
Subject: MAINTAINERS: dt: fix pointers for ARM Integrator, Versatile and
 RealView

There's a conversion from a plain text binding file into 4 yaml ones.
The old file got removed, causing this new warning:

	Warning: MAINTAINERS references a file that doesn't exist: Documentation/devicetree/bindings/arm/arm-boards

Address it by replacing the old reference by the new ones

Fixes: 2d483550b6d2 ("dt-bindings: arm: Drop the non-YAML bindings")
Fixes: 33fbfb3eaf4e ("dt-bindings: arm: Add Integrator YAML schema")
Fixes: 4b900070d50d ("dt-bindings: arm: Add Versatile YAML schema")
Fixes: 7db625b9fa75 ("dt-bindings: arm: Add RealView YAML schema")
Fixes: 4fb00d9066c1 ("dt-bindings: arm: Add Versatile Express and Juno YAML schema")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/eae3440fb70c1b1666973e34fd3fd6b8ab4a3bc7.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 MAINTAINERS | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 86f98c3e6cfc..82e4b0a0c921 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1323,7 +1323,10 @@ ARM INTEGRATOR, VERSATILE AND REALVIEW SUPPORT
 M:	Linus Walleij <linus.walleij@linaro.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
-F:	Documentation/devicetree/bindings/arm/arm-boards
+F:	Documentation/devicetree/bindings/arm/arm,integrator.yaml
+F:	Documentation/devicetree/bindings/arm/arm,realview.yaml
+F:	Documentation/devicetree/bindings/arm/arm,versatile.yaml
+F:	Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml
 F:	Documentation/devicetree/bindings/auxdisplay/arm-charlcd.txt
 F:	Documentation/devicetree/bindings/clock/arm,syscon-icst.yaml
 F:	Documentation/devicetree/bindings/i2c/i2c-versatile.txt
-- 
cgit v1.2.3


From f9faa90899a2fa6e830bb785d473ebbedebcf80b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:34 +0200
Subject: docs: dt: fix broken reference to phy-cadence-torrent.yaml

This file was removed, and another file was added instead of
it, on two separate commits.

Splitting a single logical change (doc conversion) on two
patches is a bad thing, as it makes harder to discover what
crap happened.

Anyway, this patch fixes the broken reference, making it
pointing to the new location of the file.

Fixes: 922003733d42 ("dt-bindings: phy: Remove Cadence MHDP PHY dt binding")
Fixes: c6d8eef38b7f ("dt-bindings: phy: Add Cadence MHDP PHY bindings in YAML format.")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/3f1cf6d74e392f3ee9f894d82cb7ee29d04c1b6d.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml
index fd1982c56104..3f913d6d1c3d 100644
--- a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml
+++ b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml
@@ -146,7 +146,7 @@ patternProperties:
       bindings specified in
       Documentation/devicetree/bindings/phy/phy-cadence-sierra.txt
       Torrent SERDES should follow the bindings specified in
-      Documentation/devicetree/bindings/phy/phy-cadence-dp.txt
+      Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
 
 required:
   - compatible
-- 
cgit v1.2.3


From 72ef5e52b3f74c0be47b20f5c434b7ecc830cf40 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:35 +0200
Subject: docs: fix broken references to text files

Several references got broken due to txt to ReST conversion.

Several of them can be automatically fixed with:

	scripts/documentation-file-ref-check --fix

Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org> # hwtracing/coresight/Kconfig
Reviewed-by: Paul E. McKenney <paulmck@kernel.org> # memory-barrier.txt
Acked-by: Alex Shi <alex.shi@linux.alibaba.com> # translations/zh_CN
Acked-by: Federico Vaga <federico.vaga@vaga.pv.it> # translations/it_IT
Acked-by: Marc Zyngier <maz@kernel.org> # kvm/arm64
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/6f919ddb83a33b5f2a63b6b5f0575737bb2b36aa.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/memory-barriers.txt                            |  2 +-
 Documentation/process/submit-checklist.rst                   |  2 +-
 .../translations/it_IT/process/submit-checklist.rst          |  2 +-
 Documentation/translations/ko_KR/memory-barriers.txt         |  2 +-
 Documentation/translations/zh_CN/filesystems/sysfs.txt       |  2 +-
 .../translations/zh_CN/process/submit-checklist.rst          |  2 +-
 Documentation/virt/kvm/arm/pvtime.rst                        |  2 +-
 Documentation/virt/kvm/devices/vcpu.rst                      |  2 +-
 Documentation/virt/kvm/hypercalls.rst                        |  4 ++--
 arch/powerpc/include/uapi/asm/kvm_para.h                     |  2 +-
 drivers/gpu/drm/Kconfig                                      |  2 +-
 drivers/gpu/drm/drm_ioctl.c                                  |  2 +-
 drivers/hwtracing/coresight/Kconfig                          |  2 +-
 fs/fat/Kconfig                                               |  8 ++++----
 fs/fuse/Kconfig                                              |  2 +-
 fs/fuse/dev.c                                                |  2 +-
 fs/overlayfs/Kconfig                                         |  6 +++---
 include/linux/mm.h                                           |  4 ++--
 include/uapi/linux/ethtool_netlink.h                         |  2 +-
 include/uapi/rdma/rdma_user_ioctl_cmds.h                     |  2 +-
 mm/gup.c                                                     | 12 ++++++------
 virt/kvm/arm/vgic/vgic-mmio-v3.c                             |  2 +-
 virt/kvm/arm/vgic/vgic.h                                     |  4 ++--
 23 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index e1c355e84edd..eaabc3134294 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -620,7 +620,7 @@ because the CPUs that the Linux kernel supports don't do writes
 until they are certain (1) that the write will actually happen, (2)
 of the location of the write, and (3) of the value to be written.
 But please carefully read the "CONTROL DEPENDENCIES" section and the
-Documentation/RCU/rcu_dereference.txt file:  The compiler can and does
+Documentation/RCU/rcu_dereference.rst file:  The compiler can and does
 break dependencies in a great many highly creative ways.
 
 	CPU 1		      CPU 2
diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst
index 8e56337d422d..3f8e9d5d95c2 100644
--- a/Documentation/process/submit-checklist.rst
+++ b/Documentation/process/submit-checklist.rst
@@ -107,7 +107,7 @@ and elsewhere regarding submitting Linux kernel patches.
     and why.
 
 26) If any ioctl's are added by the patch, then also update
-    ``Documentation/ioctl/ioctl-number.rst``.
+    ``Documentation/userspace-api/ioctl/ioctl-number.rst``.
 
 27) If your modified source code depends on or uses any of the kernel
     APIs or features that are related to the following ``Kconfig`` symbols,
diff --git a/Documentation/translations/it_IT/process/submit-checklist.rst b/Documentation/translations/it_IT/process/submit-checklist.rst
index 995ee69fab11..3e575502690f 100644
--- a/Documentation/translations/it_IT/process/submit-checklist.rst
+++ b/Documentation/translations/it_IT/process/submit-checklist.rst
@@ -117,7 +117,7 @@ sottomissione delle patch, in particolare
     sorgenti che ne spieghi la logica: cosa fanno e perché.
 
 25) Se la patch aggiunge nuove chiamate ioctl, allora aggiornate
-    ``Documentation/ioctl/ioctl-number.rst``.
+    ``Documentation/userspace-api/ioctl/ioctl-number.rst``.
 
 26) Se il codice che avete modificato dipende o usa una qualsiasi interfaccia o
     funzionalità del kernel che è associata a uno dei seguenti simboli
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index 2e831ece6e26..e50fe6541335 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -641,7 +641,7 @@ P 는 짝수 번호 캐시 라인에 저장되어 있고, 변수 B 는 홀수 
 리눅스 커널이 지원하는 CPU 들은 (1) 쓰기가 정말로 일어날지, (2) 쓰기가 어디에
 이루어질지, 그리고 (3) 쓰여질 값을 확실히 알기 전까지는 쓰기를 수행하지 않기
 때문입니다.  하지만 "컨트롤 의존성" 섹션과
-Documentation/RCU/rcu_dereference.txt 파일을 주의 깊게 읽어 주시기 바랍니다:
+Documentation/RCU/rcu_dereference.rst 파일을 주의 깊게 읽어 주시기 바랍니다:
 컴파일러는 매우 창의적인 많은 방법으로 종속성을 깰 수 있습니다.
 
 	CPU 1		      CPU 2
diff --git a/Documentation/translations/zh_CN/filesystems/sysfs.txt b/Documentation/translations/zh_CN/filesystems/sysfs.txt
index ee1f37da5b23..a15c3ebdfa82 100644
--- a/Documentation/translations/zh_CN/filesystems/sysfs.txt
+++ b/Documentation/translations/zh_CN/filesystems/sysfs.txt
@@ -281,7 +281,7 @@ drivers/ 包含了每个已为特定总线上的设备而挂载的驱动程序
 假定驱动没有跨越多个总线类型)。
 
 fs/ 包含了一个为文件系统设立的目录。现在每个想要导出属性的文件系统必须
-在 fs/ 下创建自己的层次结构(参见Documentation/filesystems/fuse.txt)。
+在 fs/ 下创建自己的层次结构(参见Documentation/filesystems/fuse.rst)。
 
 dev/ 包含两个子目录： char/ 和 block/。在这两个子目录中，有以
 <major>:<minor> 格式命名的符号链接。这些符号链接指向 sysfs 目录
diff --git a/Documentation/translations/zh_CN/process/submit-checklist.rst b/Documentation/translations/zh_CN/process/submit-checklist.rst
index 8738c55e42a2..50386e0e42e7 100644
--- a/Documentation/translations/zh_CN/process/submit-checklist.rst
+++ b/Documentation/translations/zh_CN/process/submit-checklist.rst
@@ -97,7 +97,7 @@ Linux内核补丁提交清单
 24) 所有内存屏障例如 ``barrier()``, ``rmb()``, ``wmb()`` 都需要源代码中的注
     释来解释它们正在执行的操作及其原因的逻辑。
 
-25) 如果补丁添加了任何ioctl，那么也要更新 ``Documentation/ioctl/ioctl-number.rst``
+25) 如果补丁添加了任何ioctl，那么也要更新 ``Documentation/userspace-api/ioctl/ioctl-number.rst``
 
 26) 如果修改后的源代码依赖或使用与以下 ``Kconfig`` 符号相关的任何内核API或
     功能，则在禁用相关 ``Kconfig`` 符号和/或 ``=m`` （如果该选项可用）的情况
diff --git a/Documentation/virt/kvm/arm/pvtime.rst b/Documentation/virt/kvm/arm/pvtime.rst
index 2357dd2d8655..687b60d76ca9 100644
--- a/Documentation/virt/kvm/arm/pvtime.rst
+++ b/Documentation/virt/kvm/arm/pvtime.rst
@@ -76,5 +76,5 @@ It is advisable that one or more 64k pages are set aside for the purpose of
 these structures and not used for other purposes, this enables the guest to map
 the region using 64k pages and avoids conflicting attributes with other memory.
 
-For the user space interface see Documentation/virt/kvm/devices/vcpu.txt
+For the user space interface see Documentation/virt/kvm/devices/vcpu.rst
 section "3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL".
diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst
index 9963e680770a..ca374d3fe085 100644
--- a/Documentation/virt/kvm/devices/vcpu.rst
+++ b/Documentation/virt/kvm/devices/vcpu.rst
@@ -110,5 +110,5 @@ Returns:
 
 Specifies the base address of the stolen time structure for this VCPU. The
 base address must be 64 byte aligned and exist within a valid guest memory
-region. See Documentation/virt/kvm/arm/pvtime.txt for more information
+region. See Documentation/virt/kvm/arm/pvtime.rst for more information
 including the layout of the stolen time structure.
diff --git a/Documentation/virt/kvm/hypercalls.rst b/Documentation/virt/kvm/hypercalls.rst
index dbaf207e560d..ed4fddd364ea 100644
--- a/Documentation/virt/kvm/hypercalls.rst
+++ b/Documentation/virt/kvm/hypercalls.rst
@@ -22,7 +22,7 @@ S390:
   number in R1.
 
   For further information on the S390 diagnose call as supported by KVM,
-  refer to Documentation/virt/kvm/s390-diag.txt.
+  refer to Documentation/virt/kvm/s390-diag.rst.
 
 PowerPC:
   It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers.
@@ -30,7 +30,7 @@ PowerPC:
 
   KVM hypercalls uses 4 byte opcode, that are patched with 'hypercall-instructions'
   property inside the device tree's /hypervisor node.
-  For more information refer to Documentation/virt/kvm/ppc-pv.txt
+  For more information refer to Documentation/virt/kvm/ppc-pv.rst
 
 MIPS:
   KVM hypercalls use the HYPCALL instruction with code 0 and the hypercall
diff --git a/arch/powerpc/include/uapi/asm/kvm_para.h b/arch/powerpc/include/uapi/asm/kvm_para.h
index be48c2215fa2..a809b1b44ddf 100644
--- a/arch/powerpc/include/uapi/asm/kvm_para.h
+++ b/arch/powerpc/include/uapi/asm/kvm_para.h
@@ -31,7 +31,7 @@
  * Struct fields are always 32 or 64 bit aligned, depending on them being 32
  * or 64 bit wide respectively.
  *
- * See Documentation/virt/kvm/ppc-pv.txt
+ * See Documentation/virt/kvm/ppc-pv.rst
  */
 struct kvm_vcpu_arch_shared {
 	__u64 scratch1;
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 43594978958e..fb92be7e8aa7 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -161,7 +161,7 @@ config DRM_LOAD_EDID_FIRMWARE
 	  monitor are unable to provide appropriate EDID data. Since this
 	  feature is provided as a workaround for broken hardware, the
 	  default case is N. Details and instructions how to build your own
-	  EDID data are given in Documentation/driver-api/edid.rst.
+	  EDID data are given in Documentation/admin-guide/edid.rst.
 
 config DRM_DP_CEC
 	bool "Enable DisplayPort CEC-Tunneling-over-AUX HDMI support"
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 9e41972c4bbc..c2b8d2a953ae 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -741,7 +741,7 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
  *     };
  *
  * Please make sure that you follow all the best practices from
- * ``Documentation/ioctl/botching-up-ioctls.rst``. Note that drm_ioctl()
+ * ``Documentation/process/botching-up-ioctls.rst``. Note that drm_ioctl()
  * automatically zero-extends structures, hence make sure you can add more stuff
  * at the end, i.e. don't put a variable sized array there.
  *
diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig
index 83e841be1081..02dbb5ca3bcf 100644
--- a/drivers/hwtracing/coresight/Kconfig
+++ b/drivers/hwtracing/coresight/Kconfig
@@ -107,7 +107,7 @@ config CORESIGHT_CPU_DEBUG
 	  can quickly get to know program counter (PC), secure state,
 	  exception level, etc. Before use debugging functionality, platform
 	  needs to ensure the clock domain and power domain are enabled
-	  properly, please refer Documentation/trace/coresight-cpu-debug.rst
+	  properly, please refer Documentation/trace/coresight/coresight-cpu-debug.rst
 	  for detailed description and the example for usage.
 
 config CORESIGHT_CTI
diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig
index 718163d0c621..ca31993dcb47 100644
--- a/fs/fat/Kconfig
+++ b/fs/fat/Kconfig
@@ -69,7 +69,7 @@ config VFAT_FS
 
 	  The VFAT support enlarges your kernel by about 10 KB and it only
 	  works if you said Y to the "DOS FAT fs support" above.  Please read
-	  the file <file:Documentation/filesystems/vfat.txt> for details.  If
+	  the file <file:Documentation/filesystems/vfat.rst> for details.  If
 	  unsure, say Y.
 
 	  To compile this as a module, choose M here: the module will be called
@@ -82,7 +82,7 @@ config FAT_DEFAULT_CODEPAGE
 	help
 	  This option should be set to the codepage of your FAT filesystems.
 	  It can be overridden with the "codepage" mount option.
-	  See <file:Documentation/filesystems/vfat.txt> for more information.
+	  See <file:Documentation/filesystems/vfat.rst> for more information.
 
 config FAT_DEFAULT_IOCHARSET
 	string "Default iocharset for FAT"
@@ -96,7 +96,7 @@ config FAT_DEFAULT_IOCHARSET
 	  Note that "utf8" is not recommended for FAT filesystems.
 	  If unsure, you shouldn't set "utf8" here - select the next option
 	  instead if you would like to use UTF-8 encoded file names by default.
-	  See <file:Documentation/filesystems/vfat.txt> for more information.
+	  See <file:Documentation/filesystems/vfat.rst> for more information.
 
 	  Enable any character sets you need in File Systems/Native Language
 	  Support.
@@ -114,4 +114,4 @@ config FAT_DEFAULT_UTF8
 
 	  Say Y if you use UTF-8 encoding for file names, N otherwise.
 
-	  See <file:Documentation/filesystems/vfat.txt> for more information.
+	  See <file:Documentation/filesystems/vfat.rst> for more information.
diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index eb2a585572dc..774b2618018a 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -12,7 +12,7 @@ config FUSE_FS
 	  although chances are your distribution already has that library
 	  installed if you've installed the "fuse" package itself.
 
-	  See <file:Documentation/filesystems/fuse.txt> for more information.
+	  See <file:Documentation/filesystems/fuse.rst> for more information.
 	  See <file:Documentation/Changes> for needed library/utility version.
 
 	  If you want to develop a userspace FS, or if you want to use
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 97eec7522bf2..c7a65cf2bcca 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2081,7 +2081,7 @@ static void end_polls(struct fuse_conn *fc)
  * The same effect is usually achievable through killing the filesystem daemon
  * and all users of the filesystem.  The exception is the combination of an
  * asynchronous request and the tricky deadlock (see
- * Documentation/filesystems/fuse.txt).
+ * Documentation/filesystems/fuse.rst).
  *
  * Aborting requests under I/O goes as follows: 1: Separate out unlocked
  * requests, they should be finished off immediately.  Locked requests will be
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 714c14c47ca5..dd188c7996b3 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -9,7 +9,7 @@ config OVERLAY_FS
 	  'lower' filesystem is either hidden or, in the case of directories,
 	  merged with the 'upper' object.
 
-	  For more information see Documentation/filesystems/overlayfs.txt
+	  For more information see Documentation/filesystems/overlayfs.rst
 
 config OVERLAY_FS_REDIRECT_DIR
 	bool "Overlayfs: turn on redirect directory feature by default"
@@ -38,7 +38,7 @@ config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
 	  If backward compatibility is not an issue, then it is safe and
 	  recommended to say N here.
 
-	  For more information, see Documentation/filesystems/overlayfs.txt
+	  For more information, see Documentation/filesystems/overlayfs.rst
 
 	  If unsure, say Y.
 
@@ -103,7 +103,7 @@ config OVERLAY_FS_XINO_AUTO
 	  If compatibility with applications that expect 32bit inodes is not an
 	  issue, then it is safe and recommended to say Y here.
 
-	  For more information, see Documentation/filesystems/overlayfs.txt
+	  For more information, see Documentation/filesystems/overlayfs.rst
 
 	  If unsure, say N.
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5a323422d783..1f2850465f59 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1219,7 +1219,7 @@ void unpin_user_pages(struct page **pages, unsigned long npages);
  * used to track the pincount (instead using of the GUP_PIN_COUNTING_BIAS
  * scheme).
  *
- * For more information, please see Documentation/vm/pin_user_pages.rst.
+ * For more information, please see Documentation/core-api/pin_user_pages.rst.
  *
  * @page:	pointer to page to be queried.
  * @Return:	True, if it is likely that the page has been "dma-pinned".
@@ -2834,7 +2834,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
  * releasing pages: get_user_pages*() pages must be released via put_page(),
  * while pin_user_pages*() pages must be released via unpin_user_page().
  *
- * Please see Documentation/vm/pin_user_pages.rst for more information.
+ * Please see Documentation/core-api/pin_user_pages.rst for more information.
  */
 
 static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 7fde76366ba4..1711e57f7848 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -2,7 +2,7 @@
 /*
  * include/uapi/linux/ethtool_netlink.h - netlink interface for ethtool
  *
- * See Documentation/networking/ethtool-netlink.txt in kernel source tree for
+ * See Documentation/networking/ethtool-netlink.rst in kernel source tree for
  * doucumentation of the interface.
  */
 
diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h
index 7b1ec806f8f9..38ab7accb7be 100644
--- a/include/uapi/rdma/rdma_user_ioctl_cmds.h
+++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h
@@ -36,7 +36,7 @@
 #include <linux/types.h>
 #include <linux/ioctl.h>
 
-/* Documentation/ioctl/ioctl-number.rst */
+/* Documentation/userspace-api/ioctl/ioctl-number.rst */
 #define RDMA_IOCTL_MAGIC	0x1b
 #define RDMA_VERBS_IOCTL \
 	_IOWR(RDMA_IOCTL_MAGIC, 1, struct ib_uverbs_ioctl_hdr)
diff --git a/mm/gup.c b/mm/gup.c
index 6076df8e04a4..81e4d0b377fd 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2843,9 +2843,9 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast);
  * the arguments here are identical.
  *
  * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
- * see Documentation/vm/pin_user_pages.rst for further details.
+ * see Documentation/core-api/pin_user_pages.rst for further details.
  *
- * This is intended for Case 1 (DIO) in Documentation/vm/pin_user_pages.rst. It
+ * This is intended for Case 1 (DIO) in Documentation/core-api/pin_user_pages.rst. It
  * is NOT intended for Case 2 (RDMA: long-term pins).
  */
 int pin_user_pages_fast(unsigned long start, int nr_pages,
@@ -2883,9 +2883,9 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast);
  * the arguments here are identical.
  *
  * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
- * see Documentation/vm/pin_user_pages.rst for details.
+ * see Documentation/core-api/pin_user_pages.rst for details.
  *
- * This is intended for Case 1 (DIO) in Documentation/vm/pin_user_pages.rst. It
+ * This is intended for Case 1 (DIO) in Documentation/core-api/pin_user_pages.rst. It
  * is NOT intended for Case 2 (RDMA: long-term pins).
  */
 long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
@@ -2919,9 +2919,9 @@ EXPORT_SYMBOL(pin_user_pages_remote);
  * FOLL_PIN is set.
  *
  * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
- * see Documentation/vm/pin_user_pages.rst for details.
+ * see Documentation/core-api/pin_user_pages.rst for details.
  *
- * This is intended for Case 1 (DIO) in Documentation/vm/pin_user_pages.rst. It
+ * This is intended for Case 1 (DIO) in Documentation/core-api/pin_user_pages.rst. It
  * is NOT intended for Case 2 (RDMA: long-term pins).
  */
 long pin_user_pages(unsigned long start, unsigned long nr_pages,
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index e72dcc454247..859464fd413f 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -301,7 +301,7 @@ static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
 	 * pending state of interrupt is latched in pending_latch variable.
 	 * Userspace will save and restore pending state and line_level
 	 * separately.
-	 * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.txt
+	 * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
 	 * for handling of ISPENDR and ICPENDR.
 	 */
 	for (i = 0; i < len * 8; i++) {
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 769e4802645e..64fcd7511110 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -42,7 +42,7 @@
 			    VGIC_AFFINITY_LEVEL(val, 3))
 
 /*
- * As per Documentation/virt/kvm/devices/arm-vgic-v3.txt,
+ * As per Documentation/virt/kvm/devices/arm-vgic-v3.rst,
  * below macros are defined for CPUREG encoding.
  */
 #define KVM_REG_ARM_VGIC_SYSREG_OP0_MASK   0x000000000000c000
@@ -63,7 +63,7 @@
 				      KVM_REG_ARM_VGIC_SYSREG_OP2_MASK)
 
 /*
- * As per Documentation/virt/kvm/devices/arm-vgic-its.txt,
+ * As per Documentation/virt/kvm/devices/arm-vgic-its.rst,
  * below macros are defined for ITS table entry encoding.
  */
 #define KVM_ITS_CTE_VALID_SHIFT		63
-- 
cgit v1.2.3


From 3ecad8c2c1ff333e204c26e2f0dddfa623153f87 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:36 +0200
Subject: docs: fix broken references for ReST files that moved around

Some broken references happened due to shifting files around
and ReST renames. Those can't be auto-fixed by the script,
so let's fix them manually.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Link: https://lore.kernel.org/r/64773a12b4410aaf3e3be89e3ec7e34de2484eea.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/doc-guide/maintainer-profile.rst      | 2 +-
 Documentation/virt/kvm/mmu.rst                      | 2 +-
 Documentation/virt/kvm/review-checklist.rst         | 2 +-
 arch/x86/kvm/mmu/mmu.c                              | 2 +-
 drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c | 2 +-
 drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c   | 2 +-
 drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c | 2 +-
 drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c   | 2 +-
 drivers/media/v4l2-core/v4l2-fwnode.c               | 2 +-
 include/uapi/linux/kvm.h                            | 4 ++--
 tools/include/uapi/linux/kvm.h                      | 4 ++--
 11 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/Documentation/doc-guide/maintainer-profile.rst b/Documentation/doc-guide/maintainer-profile.rst
index 5afc0ddba40a..755d39f0d407 100644
--- a/Documentation/doc-guide/maintainer-profile.rst
+++ b/Documentation/doc-guide/maintainer-profile.rst
@@ -6,7 +6,7 @@ Documentation subsystem maintainer entry profile
 The documentation "subsystem" is the central coordinating point for the
 kernel's documentation and associated infrastructure.  It covers the
 hierarchy under Documentation/ (with the exception of
-Documentation/device-tree), various utilities under scripts/ and, at least
+Documentation/devicetree), various utilities under scripts/ and, at least
 some of the time, LICENSES/.
 
 It's worth noting, though, that the boundaries of this subsystem are rather
diff --git a/Documentation/virt/kvm/mmu.rst b/Documentation/virt/kvm/mmu.rst
index 60981887d20b..46126ecc70f7 100644
--- a/Documentation/virt/kvm/mmu.rst
+++ b/Documentation/virt/kvm/mmu.rst
@@ -319,7 +319,7 @@ Handling a page fault is performed as follows:
 
  - If both P bit and R/W bit of error code are set, this could possibly
    be handled as a "fast page fault" (fixed without taking the MMU lock).  See
-   the description in Documentation/virt/kvm/locking.txt.
+   the description in Documentation/virt/kvm/locking.rst.
 
  - if needed, walk the guest page tables to determine the guest translation
    (gva->gpa or ngpa->gpa)
diff --git a/Documentation/virt/kvm/review-checklist.rst b/Documentation/virt/kvm/review-checklist.rst
index 1f86a9d3f705..dc01aea4057b 100644
--- a/Documentation/virt/kvm/review-checklist.rst
+++ b/Documentation/virt/kvm/review-checklist.rst
@@ -10,7 +10,7 @@ Review checklist for kvm patches
 2.  Patches should be against kvm.git master branch.
 
 3.  If the patch introduces or modifies a new userspace API:
-    - the API must be documented in Documentation/virt/kvm/api.txt
+    - the API must be documented in Documentation/virt/kvm/api.rst
     - the API must be discoverable using KVM_CHECK_EXTENSION
 
 4.  New state must include support for save/restore.
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8071952e9cf2..fd59fee84631 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3586,7 +3586,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 		/*
 		 * Currently, fast page fault only works for direct mapping
 		 * since the gfn is not stable for indirect shadow page. See
-		 * Documentation/virt/kvm/locking.txt to get more detail.
+		 * Documentation/virt/kvm/locking.rst to get more detail.
 		 */
 		fault_handled = fast_pf_fix_direct_spte(vcpu, sp,
 							iterator.sptep, spte,
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
index a5fd8975f3d3..a6abb701bfc6 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c
@@ -8,7 +8,7 @@
  * This file add support for AES cipher with 128,192,256 bits keysize in
  * CBC and ECB mode.
  *
- * You could find a link for the datasheet in Documentation/arm/sunxi/README
+ * You could find a link for the datasheet in Documentation/arm/sunxi.rst
  */
 
 #include <linux/crypto.h>
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
index 3e4e4bbda34c..b957061424a1 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c
@@ -7,7 +7,7 @@
  *
  * Core file which registers crypto algorithms supported by the CryptoEngine.
  *
- * You could find a link for the datasheet in Documentation/arm/sunxi/README
+ * You could find a link for the datasheet in Documentation/arm/sunxi.rst
  */
 #include <linux/clk.h>
 #include <linux/crypto.h>
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
index 84d52fc3a2da..c89cb2ee2496 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
@@ -8,7 +8,7 @@
  * This file add support for AES cipher with 128,192,256 bits keysize in
  * CBC and ECB mode.
  *
- * You could find a link for the datasheet in Documentation/arm/sunxi/README
+ * You could find a link for the datasheet in Documentation/arm/sunxi.rst
  */
 
 #include <linux/crypto.h>
diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
index 6b301afffd11..8ba4f9c81dac 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c
@@ -7,7 +7,7 @@
  *
  * Core file which registers crypto algorithms supported by the SecuritySystem
  *
- * You could find a link for the datasheet in Documentation/arm/sunxi/README
+ * You could find a link for the datasheet in Documentation/arm/sunxi.rst
  */
 #include <linux/clk.h>
 #include <linux/crypto.h>
diff --git a/drivers/media/v4l2-core/v4l2-fwnode.c b/drivers/media/v4l2-core/v4l2-fwnode.c
index 97f0f8b23b5d..8a1e1b95b379 100644
--- a/drivers/media/v4l2-core/v4l2-fwnode.c
+++ b/drivers/media/v4l2-core/v4l2-fwnode.c
@@ -980,7 +980,7 @@ static int v4l2_fwnode_reference_parse(struct device *dev,
  *
  * THIS EXAMPLE EXISTS MERELY TO DOCUMENT THIS FUNCTION. DO NOT USE IT AS A
  * REFERENCE IN HOW ACPI TABLES SHOULD BE WRITTEN!! See documentation under
- * Documentation/acpi/dsd instead and especially graph.txt,
+ * Documentation/firmware-guide/acpi/dsd/ instead and especially graph.txt,
  * data-node-references.txt and leds.txt .
  *
  *	Scope (\_SB.PCI0.I2C2)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 428c7dde6b4b..fdd632c833b4 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -116,7 +116,7 @@ struct kvm_irq_level {
 	 * ACPI gsi notion of irq.
 	 * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
 	 * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
-	 * For ARM: See Documentation/virt/kvm/api.txt
+	 * For ARM: See Documentation/virt/kvm/api.rst
 	 */
 	union {
 		__u32 irq;
@@ -1107,7 +1107,7 @@ struct kvm_xen_hvm_config {
  *
  * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
  * the irqfd to operate in resampling mode for level triggered interrupt
- * emulation.  See Documentation/virt/kvm/api.txt.
+ * emulation.  See Documentation/virt/kvm/api.rst.
  */
 #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
 
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 4b95f9a31a2f..e5f32fcec68f 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -116,7 +116,7 @@ struct kvm_irq_level {
 	 * ACPI gsi notion of irq.
 	 * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
 	 * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
-	 * For ARM: See Documentation/virt/kvm/api.txt
+	 * For ARM: See Documentation/virt/kvm/api.rst
 	 */
 	union {
 		__u32 irq;
@@ -1100,7 +1100,7 @@ struct kvm_xen_hvm_config {
  *
  * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
  * the irqfd to operate in resampling mode for level triggered interrupt
- * emulation.  See Documentation/virt/kvm/api.txt.
+ * emulation.  See Documentation/virt/kvm/api.rst.
  */
 #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
 
-- 
cgit v1.2.3


From 0c1bc6b84525b96aa9fb8f6fbe8c5cb26a5c0ead Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:37 +0200
Subject: docs: filesystems: fix renamed references

Some filesystem references got broken by a previous patch
series I submitted. Address those.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: David Sterba <dsterba@suse.com> # fs/affs/Kconfig
Link: https://lore.kernel.org/r/57318c53008dbda7f6f4a5a9e5787f4d37e8565a.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/ABI/stable/sysfs-devices-node                  | 2 +-
 Documentation/ABI/testing/procfs-smaps_rollup                | 2 +-
 Documentation/admin-guide/cpu-load.rst                       | 2 +-
 Documentation/admin-guide/nfs/nfsroot.rst                    | 2 +-
 Documentation/driver-api/driver-model/device.rst             | 4 ++--
 Documentation/driver-api/driver-model/overview.rst           | 2 +-
 Documentation/filesystems/dax.txt                            | 2 +-
 Documentation/filesystems/dnotify.txt                        | 2 +-
 Documentation/filesystems/ramfs-rootfs-initramfs.rst         | 2 +-
 Documentation/filesystems/sysfs.rst                          | 2 +-
 Documentation/powerpc/firmware-assisted-dump.rst             | 2 +-
 Documentation/process/adding-syscalls.rst                    | 2 +-
 Documentation/translations/it_IT/process/adding-syscalls.rst | 2 +-
 Documentation/translations/zh_CN/filesystems/sysfs.txt       | 6 +++---
 drivers/base/core.c                                          | 2 +-
 drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h                      | 2 +-
 fs/Kconfig                                                   | 2 +-
 fs/Kconfig.binfmt                                            | 2 +-
 fs/adfs/Kconfig                                              | 2 +-
 fs/affs/Kconfig                                              | 2 +-
 fs/afs/Kconfig                                               | 6 +++---
 fs/bfs/Kconfig                                               | 2 +-
 fs/cramfs/Kconfig                                            | 2 +-
 fs/ecryptfs/Kconfig                                          | 2 +-
 fs/hfs/Kconfig                                               | 2 +-
 fs/hpfs/Kconfig                                              | 2 +-
 fs/isofs/Kconfig                                             | 2 +-
 fs/namespace.c                                               | 2 +-
 fs/notify/inotify/Kconfig                                    | 2 +-
 fs/ntfs/Kconfig                                              | 2 +-
 fs/ocfs2/Kconfig                                             | 2 +-
 fs/proc/Kconfig                                              | 4 ++--
 fs/romfs/Kconfig                                             | 2 +-
 fs/sysfs/dir.c                                               | 2 +-
 fs/sysfs/file.c                                              | 2 +-
 fs/sysfs/mount.c                                             | 2 +-
 fs/sysfs/symlink.c                                           | 2 +-
 fs/sysv/Kconfig                                              | 2 +-
 fs/udf/Kconfig                                               | 2 +-
 include/linux/kobject.h                                      | 2 +-
 include/linux/kobject_ns.h                                   | 2 +-
 include/linux/relay.h                                        | 2 +-
 include/linux/sysfs.h                                        | 2 +-
 kernel/relay.c                                               | 2 +-
 lib/kobject.c                                                | 4 ++--
 45 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
index df8413cf1468..484fc04bcc25 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -54,7 +54,7 @@ Date:		October 2002
 Contact:	Linux Memory Management list <linux-mm@kvack.org>
 Description:
 		Provides information about the node's distribution and memory
-		utilization. Similar to /proc/meminfo, see Documentation/filesystems/proc.txt
+		utilization. Similar to /proc/meminfo, see Documentation/filesystems/proc.rst
 
 What:		/sys/devices/system/node/nodeX/numastat
 Date:		October 2002
diff --git a/Documentation/ABI/testing/procfs-smaps_rollup b/Documentation/ABI/testing/procfs-smaps_rollup
index 274df44d8b1b..046978193368 100644
--- a/Documentation/ABI/testing/procfs-smaps_rollup
+++ b/Documentation/ABI/testing/procfs-smaps_rollup
@@ -11,7 +11,7 @@ Description:
 		Additionally, the fields Pss_Anon, Pss_File and Pss_Shmem
 		are not present in /proc/pid/smaps.  These fields represent
 		the sum of the Pss field of each type (anon, file, shmem).
-		For more details, see Documentation/filesystems/proc.txt
+		For more details, see Documentation/filesystems/proc.rst
 		and the procfs man page.
 
 		Typical output looks like this:
diff --git a/Documentation/admin-guide/cpu-load.rst b/Documentation/admin-guide/cpu-load.rst
index 2d01ce43d2a2..ebdecf864080 100644
--- a/Documentation/admin-guide/cpu-load.rst
+++ b/Documentation/admin-guide/cpu-load.rst
@@ -105,7 +105,7 @@ References
 ----------
 
 - http://lkml.org/lkml/2007/2/12/6
-- Documentation/filesystems/proc.txt (1.8)
+- Documentation/filesystems/proc.rst (1.8)
 
 
 Thanks
diff --git a/Documentation/admin-guide/nfs/nfsroot.rst b/Documentation/admin-guide/nfs/nfsroot.rst
index 82a4fda057f9..c6772075c80c 100644
--- a/Documentation/admin-guide/nfs/nfsroot.rst
+++ b/Documentation/admin-guide/nfs/nfsroot.rst
@@ -18,7 +18,7 @@ Mounting the root filesystem via NFS (nfsroot)
 In order to use a diskless system, such as an X-terminal or printer server for
 example, it is necessary for the root filesystem to be present on a non-disk
 device. This may be an initramfs (see
-Documentation/filesystems/ramfs-rootfs-initramfs.txt), a ramdisk (see
+Documentation/filesystems/ramfs-rootfs-initramfs.rst), a ramdisk (see
 Documentation/admin-guide/initrd.rst) or a filesystem mounted via NFS. The
 following text describes on how to use NFS for the root filesystem. For the rest
 of this text 'client' means the diskless system, and 'server' means the NFS
diff --git a/Documentation/driver-api/driver-model/device.rst b/Documentation/driver-api/driver-model/device.rst
index 2b868d49d349..b9b022371e85 100644
--- a/Documentation/driver-api/driver-model/device.rst
+++ b/Documentation/driver-api/driver-model/device.rst
@@ -50,10 +50,10 @@ Attributes
 
 Attributes of devices can be exported by a device driver through sysfs.
 
-Please see Documentation/filesystems/sysfs.txt for more information
+Please see Documentation/filesystems/sysfs.rst for more information
 on how sysfs works.
 
-As explained in Documentation/kobject.txt, device attributes must be
+As explained in Documentation/core-api/kobject.rst, device attributes must be
 created before the KOBJ_ADD uevent is generated. The only way to realize
 that is by defining an attribute group.
 
diff --git a/Documentation/driver-api/driver-model/overview.rst b/Documentation/driver-api/driver-model/overview.rst
index d4d1e9b40e0c..e98d0ab4a9b6 100644
--- a/Documentation/driver-api/driver-model/overview.rst
+++ b/Documentation/driver-api/driver-model/overview.rst
@@ -121,4 +121,4 @@ device-specific data or tunable interfaces.
 
 More information about the sysfs directory layout can be found in
 the other documents in this directory and in the file
-Documentation/filesystems/sysfs.txt.
+Documentation/filesystems/sysfs.rst.
diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt
index 679729442fd2..735f3859b19f 100644
--- a/Documentation/filesystems/dax.txt
+++ b/Documentation/filesystems/dax.txt
@@ -74,7 +74,7 @@ are zeroed out and converted to written extents before being returned to avoid
 exposure of uninitialized data through mmap.
 
 These filesystems may be used for inspiration:
-- ext2: see Documentation/filesystems/ext2.txt
+- ext2: see Documentation/filesystems/ext2.rst
 - ext4: see Documentation/filesystems/ext4/
 - xfs:  see Documentation/admin-guide/xfs.rst
 
diff --git a/Documentation/filesystems/dnotify.txt b/Documentation/filesystems/dnotify.txt
index 15156883d321..08d575ece45d 100644
--- a/Documentation/filesystems/dnotify.txt
+++ b/Documentation/filesystems/dnotify.txt
@@ -67,4 +67,4 @@ See tools/testing/selftests/filesystems/dnotify_test.c for an example.
 NOTE
 ----
 Beginning with Linux 2.6.13, dnotify has been replaced by inotify.
-See Documentation/filesystems/inotify.txt for more information on it.
+See Documentation/filesystems/inotify.rst for more information on it.
diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.rst b/Documentation/filesystems/ramfs-rootfs-initramfs.rst
index 6c576e241d86..3fddacc6bf14 100644
--- a/Documentation/filesystems/ramfs-rootfs-initramfs.rst
+++ b/Documentation/filesystems/ramfs-rootfs-initramfs.rst
@@ -71,7 +71,7 @@ be allowed write access to a ramfs mount.
 
 A ramfs derivative called tmpfs was created to add size limits, and the ability
 to write the data to swap space.  Normal users can be allowed write access to
-tmpfs mounts.  See Documentation/filesystems/tmpfs.txt for more information.
+tmpfs mounts.  See Documentation/filesystems/tmpfs.rst for more information.
 
 What is rootfs?
 ---------------
diff --git a/Documentation/filesystems/sysfs.rst b/Documentation/filesystems/sysfs.rst
index 290891c3fecb..ab0f7795792b 100644
--- a/Documentation/filesystems/sysfs.rst
+++ b/Documentation/filesystems/sysfs.rst
@@ -20,7 +20,7 @@ a means to export kernel data structures, their attributes, and the
 linkages between them to userspace.
 
 sysfs is tied inherently to the kobject infrastructure. Please read
-Documentation/kobject.txt for more information concerning the kobject
+Documentation/core-api/kobject.rst for more information concerning the kobject
 interface.
 
 
diff --git a/Documentation/powerpc/firmware-assisted-dump.rst b/Documentation/powerpc/firmware-assisted-dump.rst
index b3f3ee135dbe..20ea8cdee0aa 100644
--- a/Documentation/powerpc/firmware-assisted-dump.rst
+++ b/Documentation/powerpc/firmware-assisted-dump.rst
@@ -344,7 +344,7 @@ Here is the list of files under powerpc debugfs:
 
 
 NOTE:
-      Please refer to Documentation/filesystems/debugfs.txt on
+      Please refer to Documentation/filesystems/debugfs.rst on
       how to mount the debugfs filesystem.
 
 
diff --git a/Documentation/process/adding-syscalls.rst b/Documentation/process/adding-syscalls.rst
index 1c3a840d06b9..a6b4a3a5bf3f 100644
--- a/Documentation/process/adding-syscalls.rst
+++ b/Documentation/process/adding-syscalls.rst
@@ -33,7 +33,7 @@ interface.
        to a somewhat opaque API.
 
  - If you're just exposing runtime system information, a new node in sysfs
-   (see ``Documentation/filesystems/sysfs.txt``) or the ``/proc`` filesystem may
+   (see ``Documentation/filesystems/sysfs.rst``) or the ``/proc`` filesystem may
    be more appropriate.  However, access to these mechanisms requires that the
    relevant filesystem is mounted, which might not always be the case (e.g.
    in a namespaced/sandboxed/chrooted environment).  Avoid adding any API to
diff --git a/Documentation/translations/it_IT/process/adding-syscalls.rst b/Documentation/translations/it_IT/process/adding-syscalls.rst
index c3a3439595a6..bff0a82bf127 100644
--- a/Documentation/translations/it_IT/process/adding-syscalls.rst
+++ b/Documentation/translations/it_IT/process/adding-syscalls.rst
@@ -39,7 +39,7 @@ vostra interfaccia.
        un qualche modo opaca.
 
  - Se dovete esporre solo delle informazioni sul sistema, un nuovo nodo in
-   sysfs (vedere ``Documentation/filesystems/sysfs.txt``) o
+   sysfs (vedere ``Documentation/filesystems/sysfs.rst``) o
    in procfs potrebbe essere sufficiente.  Tuttavia, l'accesso a questi
    meccanismi richiede che il filesystem sia montato, il che potrebbe non
    essere sempre vero (per esempio, in ambienti come namespace/sandbox/chroot).
diff --git a/Documentation/translations/zh_CN/filesystems/sysfs.txt b/Documentation/translations/zh_CN/filesystems/sysfs.txt
index a15c3ebdfa82..fcf620049d11 100644
--- a/Documentation/translations/zh_CN/filesystems/sysfs.txt
+++ b/Documentation/translations/zh_CN/filesystems/sysfs.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/filesystems/sysfs.txt
+Chinese translated version of Documentation/filesystems/sysfs.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -10,7 +10,7 @@ Maintainer: Patrick Mochel	<mochel@osdl.org>
 		Mike Murphy <mamurph@cs.clemson.edu>
 Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
 ---------------------------------------------------------------------
-Documentation/filesystems/sysfs.txt 的中文翻译
+Documentation/filesystems/sysfs.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
@@ -40,7 +40,7 @@ sysfs 是一个最初基于 ramfs 且位于内存的文件系统。它提供导
 数据结构及其属性，以及它们之间的关联到用户空间的方法。
 
 sysfs 始终与 kobject 的底层结构紧密相关。请阅读
-Documentation/kobject.txt 文档以获得更多关于 kobject 接口的
+Documentation/core-api/kobject.rst 文档以获得更多关于 kobject 接口的
 信息。
 
 
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 139cdf7e7327..89fcc0a09f94 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1374,7 +1374,7 @@ static void device_release(struct kobject *kobj)
 	else if (dev->class && dev->class->dev_release)
 		dev->class->dev_release(dev);
 	else
-		WARN(1, KERN_ERR "Device '%s' does not have a release() function, it is broken and must be fixed. See Documentation/kobject.txt.\n",
+		WARN(1, KERN_ERR "Device '%s' does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n",
 			dev_name(dev));
 	kfree(p);
 }
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
index 211f5de99a44..9aba2910d83a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h
@@ -170,7 +170,7 @@ struct dpu_global_state
  *
  * Main debugfs documentation is located at,
  *
- * Documentation/filesystems/debugfs.txt
+ * Documentation/filesystems/debugfs.rst
  *
  * @dpu_debugfs_setup_regset32: Initialize data for dpu_debugfs_create_regset32
  * @dpu_debugfs_create_regset32: Create 32-bit register dump file
diff --git a/fs/Kconfig b/fs/Kconfig
index f08fbbfafd9a..d1ad3935fb85 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -166,7 +166,7 @@ config TMPFS
 	  space. If you unmount a tmpfs instance, everything stored therein is
 	  lost.
 
-	  See <file:Documentation/filesystems/tmpfs.txt> for details.
+	  See <file:Documentation/filesystems/tmpfs.rst> for details.
 
 config TMPFS_POSIX_ACL
 	bool "Tmpfs POSIX Access Control Lists"
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 62dc4f577ba1..3fbbd54f50fd 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -72,7 +72,7 @@ config CORE_DUMP_DEFAULT_ELF_HEADERS
 
 	  The core dump behavior can be controlled per process using
 	  the /proc/PID/coredump_filter pseudo-file; this setting is
-	  inherited.  See Documentation/filesystems/proc.txt for details.
+	  inherited.  See Documentation/filesystems/proc.rst for details.
 
 	  This config option changes the default setting of coredump_filter
 	  seen at boot time.  If unsure, say Y.
diff --git a/fs/adfs/Kconfig b/fs/adfs/Kconfig
index df4650dccf68..44738fed6625 100644
--- a/fs/adfs/Kconfig
+++ b/fs/adfs/Kconfig
@@ -12,7 +12,7 @@ config ADFS_FS
 
 	  The ADFS partition should be the first partition (i.e.,
 	  /dev/[hs]d?1) on each of your drives. Please read the file
-	  <file:Documentation/filesystems/adfs.txt> for further details.
+	  <file:Documentation/filesystems/adfs.rst> for further details.
 
 	  To compile this code as a module, choose M here: the module will be
 	  called adfs.
diff --git a/fs/affs/Kconfig b/fs/affs/Kconfig
index 84c46b9025c5..eb9d0ab850cb 100644
--- a/fs/affs/Kconfig
+++ b/fs/affs/Kconfig
@@ -9,7 +9,7 @@ config AFFS_FS
 	  FFS partition on your hard drive.  Amiga floppies however cannot be
 	  read with this driver due to an incompatibility of the floppy
 	  controller used in an Amiga and the standard floppy controller in
-	  PCs and workstations. Read <file:Documentation/filesystems/affs.txt>
+	  PCs and workstations. Read <file:Documentation/filesystems/affs.rst>
 	  and <file:fs/affs/Changes>.
 
 	  With this driver you can also mount disk files used by Bernd
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index 3fb1f559e317..1ad211d72b3b 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -8,7 +8,7 @@ config AFS_FS
 	  If you say Y here, you will get an experimental Andrew File System
 	  driver. It currently only supports unsecured read-only AFS access.
 
-	  See <file:Documentation/filesystems/afs.txt> for more information.
+	  See <file:Documentation/filesystems/afs.rst> for more information.
 
 	  If unsure, say N.
 
@@ -18,7 +18,7 @@ config AFS_DEBUG
 	help
 	  Say Y here to make runtime controllable debugging messages appear.
 
-	  See <file:Documentation/filesystems/afs.txt> for more information.
+	  See <file:Documentation/filesystems/afs.rst> for more information.
 
 	  If unsure, say N.
 
@@ -37,6 +37,6 @@ config AFS_DEBUG_CURSOR
 	  the dmesg log if the server rotation algorithm fails to successfully
 	  contact a server.
 
-	  See <file:Documentation/filesystems/afs.txt> for more information.
+	  See <file:Documentation/filesystems/afs.rst> for more information.
 
 	  If unsure, say N.
diff --git a/fs/bfs/Kconfig b/fs/bfs/Kconfig
index 3e1247f07913..3a757805b585 100644
--- a/fs/bfs/Kconfig
+++ b/fs/bfs/Kconfig
@@ -11,7 +11,7 @@ config BFS_FS
 	  on your /stand slice from within Linux.  You then also need to say Y
 	  to "UnixWare slices support", below.  More information about the BFS
 	  file system is contained in the file
-	  <file:Documentation/filesystems/bfs.txt>.
+	  <file:Documentation/filesystems/bfs.rst>.
 
 	  If you don't know what this is about, say N.
 
diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig
index c8bebb70a971..d98cef0dbb6b 100644
--- a/fs/cramfs/Kconfig
+++ b/fs/cramfs/Kconfig
@@ -9,7 +9,7 @@ config CRAMFS
 	  limited to 256MB file systems (with 16MB files), and doesn't support
 	  16/32 bits uid/gid, hard links and timestamps.
 
-	  See <file:Documentation/filesystems/cramfs.txt> and
+	  See <file:Documentation/filesystems/cramfs.rst> and
 	  <file:fs/cramfs/README> for further information.
 
 	  To compile this as a module, choose M here: the module will be called
diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig
index 522c35d5292b..1bdeaa6d5790 100644
--- a/fs/ecryptfs/Kconfig
+++ b/fs/ecryptfs/Kconfig
@@ -7,7 +7,7 @@ config ECRYPT_FS
 	select CRYPTO_MD5
 	help
 	  Encrypted filesystem that operates on the VFS layer.  See
-	  <file:Documentation/filesystems/ecryptfs.txt> to learn more about
+	  <file:Documentation/filesystems/ecryptfs.rst> to learn more about
 	  eCryptfs.  Userspace components are required and can be
 	  obtained from <http://ecryptfs.sf.net>.
 
diff --git a/fs/hfs/Kconfig b/fs/hfs/Kconfig
index 44f6e89bcb75..129926b5142d 100644
--- a/fs/hfs/Kconfig
+++ b/fs/hfs/Kconfig
@@ -6,7 +6,7 @@ config HFS_FS
 	help
 	  If you say Y here, you will be able to mount Macintosh-formatted
 	  floppy disks and hard drive partitions with full read-write access.
-	  Please read <file:Documentation/filesystems/hfs.txt> to learn about
+	  Please read <file:Documentation/filesystems/hfs.rst> to learn about
 	  the available mount options.
 
 	  To compile this file system support as a module, choose M here: the
diff --git a/fs/hpfs/Kconfig b/fs/hpfs/Kconfig
index 56aa0336254a..2b36dc6f0a10 100644
--- a/fs/hpfs/Kconfig
+++ b/fs/hpfs/Kconfig
@@ -9,7 +9,7 @@ config HPFS_FS
 	  write files to an OS/2 HPFS partition on your hard drive. OS/2
 	  floppies however are in regular MSDOS format, so you don't need this
 	  option in order to be able to read them. Read
-	  <file:Documentation/filesystems/hpfs.txt>.
+	  <file:Documentation/filesystems/hpfs.rst>.
 
 	  To compile this file system support as a module, choose M here: the
 	  module will be called hpfs.  If unsure, say N.
diff --git a/fs/isofs/Kconfig b/fs/isofs/Kconfig
index 5e7419599f50..08ffd37b9bb8 100644
--- a/fs/isofs/Kconfig
+++ b/fs/isofs/Kconfig
@@ -8,7 +8,7 @@ config ISO9660_FS
 	  long Unix filenames and symbolic links are also supported by this
 	  driver.  If you have a CD-ROM drive and want to do more with it than
 	  just listen to audio CDs and watch its LEDs, say Y (and read
-	  <file:Documentation/filesystems/isofs.txt> and the CD-ROM-HOWTO,
+	  <file:Documentation/filesystems/isofs.rst> and the CD-ROM-HOWTO,
 	  available from <http://www.tldp.org/docs.html#howto>), thereby
 	  enlarging your kernel by about 27 KB; otherwise say N.
 
diff --git a/fs/namespace.c b/fs/namespace.c
index a28e4db075ed..5f036dc711b6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3595,7 +3595,7 @@ EXPORT_SYMBOL(path_is_under);
  * file system may be mounted on put_old. After all, new_root is a mountpoint.
  *
  * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
- * See Documentation/filesystems/ramfs-rootfs-initramfs.txt for alternatives
+ * See Documentation/filesystems/ramfs-rootfs-initramfs.rst for alternatives
  * in this situation.
  *
  * Notes:
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index 6736e47d94d8..7715fadd5fff 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -12,6 +12,6 @@ config INOTIFY_USER
 	  new features including multiple file events, one-shot support, and
 	  unmount notification.
 
-	  For more information, see <file:Documentation/filesystems/inotify.txt>
+	  For more information, see <file:Documentation/filesystems/inotify.rst>
 
 	  If unsure, say Y.
diff --git a/fs/ntfs/Kconfig b/fs/ntfs/Kconfig
index de9fb5cff226..1667a7e590d8 100644
--- a/fs/ntfs/Kconfig
+++ b/fs/ntfs/Kconfig
@@ -18,7 +18,7 @@ config NTFS_FS
 	  the Linux 2.4 kernel series is separately available as a patch
 	  from the project web site.
 
-	  For more information see <file:Documentation/filesystems/ntfs.txt>
+	  For more information see <file:Documentation/filesystems/ntfs.rst>
 	  and <http://www.linux-ntfs.org/>.
 
 	  To compile this file system support as a module, choose M here: the
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
index 46bba20da6b5..1177c33df895 100644
--- a/fs/ocfs2/Kconfig
+++ b/fs/ocfs2/Kconfig
@@ -21,7 +21,7 @@ config OCFS2_FS
 	  OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
 
 	  For more information on OCFS2, see the file
-	  <file:Documentation/filesystems/ocfs2.txt>.
+	  <file:Documentation/filesystems/ocfs2.rst>.
 
 config OCFS2_FS_O2CB
 	tristate "O2CB Kernelspace Clustering"
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 27ef84d99f59..971a42f6357d 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -23,7 +23,7 @@ config PROC_FS
 	  /proc" or the equivalent line in /etc/fstab does the job.
 
 	  The /proc file system is explained in the file
-	  <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
+	  <file:Documentation/filesystems/proc.rst> and on the proc(5) manpage
 	  ("man 5 proc").
 
 	  This option will enlarge your kernel by about 67 KB. Several
@@ -95,7 +95,7 @@ config PROC_CHILDREN
 	default n
 	help
 	  Provides a fast way to retrieve first level children pids of a task. See
-	  <file:Documentation/filesystems/proc.txt> for more information.
+	  <file:Documentation/filesystems/proc.rst> for more information.
 
 	  Say Y if you are running any user-space software which takes benefit from
 	  this interface. For example, rkt is such a piece of software.
diff --git a/fs/romfs/Kconfig b/fs/romfs/Kconfig
index ad4c45788896..9737b8e68878 100644
--- a/fs/romfs/Kconfig
+++ b/fs/romfs/Kconfig
@@ -6,7 +6,7 @@ config ROMFS_FS
 	  This is a very small read-only file system mainly intended for
 	  initial ram disks of installation disks, but it could be used for
 	  other read-only media as well.  Read
-	  <file:Documentation/filesystems/romfs.txt> for details.
+	  <file:Documentation/filesystems/romfs.rst> for details.
 
 	  To compile this file system support as a module, choose M here: the
 	  module will be called romfs.  Note that the file system of your
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index aa85f2874a9f..59dffd5ca517 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -6,7 +6,7 @@
  * Copyright (c) 2007 SUSE Linux Products GmbH
  * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
  *
- * Please see Documentation/filesystems/sysfs.txt for more information.
+ * Please see Documentation/filesystems/sysfs.rst for more information.
  */
 
 #define pr_fmt(fmt)	"sysfs: " fmt
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 26bbf960e2a2..f275fcda62fb 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -6,7 +6,7 @@
  * Copyright (c) 2007 SUSE Linux Products GmbH
  * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
  *
- * Please see Documentation/filesystems/sysfs.txt for more information.
+ * Please see Documentation/filesystems/sysfs.rst for more information.
  */
 
 #include <linux/module.h>
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index db81cfbab9d6..e747c135c1d1 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -6,7 +6,7 @@
  * Copyright (c) 2007 SUSE Linux Products GmbH
  * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
  *
- * Please see Documentation/filesystems/sysfs.txt for more information.
+ * Please see Documentation/filesystems/sysfs.rst for more information.
  */
 
 #include <linux/fs.h>
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index c4deecc80f67..5603530a1a52 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -6,7 +6,7 @@
  * Copyright (c) 2007 SUSE Linux Products GmbH
  * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
  *
- * Please see Documentation/filesystems/sysfs.txt for more information.
+ * Please see Documentation/filesystems/sysfs.rst for more information.
  */
 
 #include <linux/fs.h>
diff --git a/fs/sysv/Kconfig b/fs/sysv/Kconfig
index d4edf7d9ae10..b4e23e03fbeb 100644
--- a/fs/sysv/Kconfig
+++ b/fs/sysv/Kconfig
@@ -28,7 +28,7 @@ config SYSV_FS
 	  tar" or preferably "info tar").  Note also that this option has
 	  nothing whatsoever to do with the option "System V IPC". Read about
 	  the System V file system in
-	  <file:Documentation/filesystems/sysv-fs.txt>.
+	  <file:Documentation/filesystems/sysv-fs.rst>.
 	  Saying Y here will enlarge your kernel by about 27 KB.
 
 	  To compile this as a module, choose M here: the module will be called
diff --git a/fs/udf/Kconfig b/fs/udf/Kconfig
index 6848de581ce1..26e1a49f3ba7 100644
--- a/fs/udf/Kconfig
+++ b/fs/udf/Kconfig
@@ -9,7 +9,7 @@ config UDF_FS
 	  compatible with standard unix file systems, it is also suitable for
 	  removable USB disks. Say Y if you intend to mount DVD discs or CDRW's
 	  written in packet mode, or if you want to use UDF for removable USB
-	  disks. Please read <file:Documentation/filesystems/udf.txt>.
+	  disks. Please read <file:Documentation/filesystems/udf.rst>.
 
 	  To compile this file system support as a module, choose M here: the
 	  module will be called udf.
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index e2ca0a292e21..fc8d83e91379 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -7,7 +7,7 @@
  * Copyright (c) 2006-2008 Greg Kroah-Hartman <greg@kroah.com>
  * Copyright (c) 2006-2008 Novell Inc.
  *
- * Please read Documentation/kobject.txt before using the kobject
+ * Please read Documentation/core-api/kobject.rst before using the kobject
  * interface, ESPECIALLY the parts about reference counts and object
  * destructors.
  */
diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h
index 069aa2ebef90..2b5b64256cf4 100644
--- a/include/linux/kobject_ns.h
+++ b/include/linux/kobject_ns.h
@@ -8,7 +8,7 @@
  *
  * Split from kobject.h by David Howells (dhowells@redhat.com)
  *
- * Please read Documentation/kobject.txt before using the kobject
+ * Please read Documentation/core-api/kobject.rst before using the kobject
  * interface, ESPECIALLY the parts about reference counts and object
  * destructors.
  */
diff --git a/include/linux/relay.h b/include/linux/relay.h
index c759f96e39c1..e13a333e7c37 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -141,7 +141,7 @@ struct rchan_callbacks
 	 * cause relay_open() to create a single global buffer rather
 	 * than the default set of per-cpu buffers.
 	 *
-	 * See Documentation/filesystems/relay.txt for more info.
+	 * See Documentation/filesystems/relay.rst for more info.
 	 */
 	struct dentry *(*create_buf_file)(const char *filename,
 					  struct dentry *parent,
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 80bb865b3a33..86067dbe7745 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -7,7 +7,7 @@
  * Copyright (c) 2007 SUSE Linux Products GmbH
  * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
  *
- * Please see Documentation/filesystems/sysfs.txt for more information.
+ * Please see Documentation/filesystems/sysfs.rst for more information.
  */
 
 #ifndef _SYSFS_H_
diff --git a/kernel/relay.c b/kernel/relay.c
index ade14fb7ce2e..d0c9c287680a 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1,7 +1,7 @@
 /*
  * Public API and common code for kernel->userspace relay file support.
  *
- * See Documentation/filesystems/relay.txt for an overview.
+ * See Documentation/filesystems/relay.rst for an overview.
  *
  * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp
  * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
diff --git a/lib/kobject.c b/lib/kobject.c
index 83198cb37d8d..65fa7bf70c57 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -6,7 +6,7 @@
  * Copyright (c) 2006-2007 Greg Kroah-Hartman <greg@kroah.com>
  * Copyright (c) 2006-2007 Novell Inc.
  *
- * Please see the file Documentation/kobject.txt for critical information
+ * Please see the file Documentation/core-api/kobject.rst for critical information
  * about using the kobject interface.
  */
 
@@ -670,7 +670,7 @@ static void kobject_cleanup(struct kobject *kobj)
 		 kobject_name(kobj), kobj, __func__, kobj->parent);
 
 	if (t && !t->release)
-		pr_debug("kobject: '%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/kobject.txt.\n",
+		pr_debug("kobject: '%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n",
 			 kobject_name(kobj), kobj);
 
 	/* send "remove" if the caller did not do it but sent "add" */
-- 
cgit v1.2.3


From d91589556b6a096d53f47ed3d91172a2788d4cdb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:38 +0200
Subject: docs: amu: supress some Sphinx warnings

Add extra blank lines on some places, in order to avoid those
warnings when building the docs:

    Documentation/arm64/amu.rst:26: WARNING: Unexpected indentation.
    Documentation/arm64/amu.rst:60: WARNING: Unexpected indentation.
    Documentation/arm64/amu.rst:81: WARNING: Unexpected indentation.
    Documentation/arm64/amu.rst:108: WARNING: Unexpected indentation.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/ab0881638fc41ed790b3307a8e022ec84b7cce7e.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/arm64/amu.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Documentation/arm64/amu.rst b/Documentation/arm64/amu.rst
index 5057b11100ed..452ec8b115c2 100644
--- a/Documentation/arm64/amu.rst
+++ b/Documentation/arm64/amu.rst
@@ -23,6 +23,7 @@ optional external memory-mapped interface.
 
 Version 1 of the Activity Monitors architecture implements a counter group
 of four fixed and architecturally defined 64-bit event counters.
+
   - CPU cycle counter: increments at the frequency of the CPU.
   - Constant counter: increments at the fixed frequency of the system
     clock.
@@ -57,6 +58,7 @@ counters, only the presence of the extension.
 
 Firmware (code running at higher exception levels, e.g. arm-tf) support is
 needed to:
+
  - Enable access for lower exception levels (EL2 and EL1) to the AMU
    registers.
  - Enable the counters. If not enabled these will read as 0.
@@ -78,6 +80,7 @@ are not trapped in EL2/EL3.
 
 The fixed counters of AMUv1 are accessible though the following system
 register definitions:
+
  - SYS_AMEVCNTR0_CORE_EL0
  - SYS_AMEVCNTR0_CONST_EL0
  - SYS_AMEVCNTR0_INST_RET_EL0
@@ -93,6 +96,7 @@ Userspace access
 ----------------
 
 Currently, access from userspace to the AMU registers is disabled due to:
+
  - Security reasons: they might expose information about code executed in
    secure mode.
  - Purpose: AMU counters are intended for system management use.
@@ -105,6 +109,7 @@ Virtualization
 
 Currently, access from userspace (EL0) and kernelspace (EL1) on the KVM
 guest side is disabled due to:
+
  - Security reasons: they might expose information about code executed
    by other guests or the host.
 
-- 
cgit v1.2.3


From 877a37d31e0f9f97d04f8d211924dc16df74d31a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:39 +0200
Subject: docs: arm64: booting.rst: get rid of some warnings

Get rid of those warnings:

    Documentation/arm64/booting.rst:253: WARNING: Unexpected indentation.
    Documentation/arm64/booting.rst:259: WARNING: Block quote ends without a blank line; unexpected unindent.

By adding an extra blank lines where needed.

While here, use list markups on some places, as otherwise Sphinx
will consider the next lines as continuation of the privious ones.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/121b267be0a102fde73498c31792e5a9309013cc.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/arm64/booting.rst | 36 ++++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst
index a3f1a47b6f1c..e50186092948 100644
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@@ -173,7 +173,9 @@ Before jumping into the kernel, the following conditions must be met:
 - Caches, MMUs
 
   The MMU must be off.
+
   Instruction cache may be on or off.
+
   The address range corresponding to the loaded kernel image must be
   cleaned to the PoC. In the presence of a system cache or other
   coherent masters with caches enabled, this will typically require
@@ -238,6 +240,7 @@ Before jumping into the kernel, the following conditions must be met:
   - The DT or ACPI tables must describe a GICv2 interrupt controller.
 
   For CPUs with pointer authentication functionality:
+
   - If EL3 is present:
 
     - SCR_EL3.APK (bit 16) must be initialised to 0b1
@@ -249,18 +252,22 @@ Before jumping into the kernel, the following conditions must be met:
     - HCR_EL2.API (bit 41) must be initialised to 0b1
 
   For CPUs with Activity Monitors Unit v1 (AMUv1) extension present:
+
   - If EL3 is present:
-    CPTR_EL3.TAM (bit 30) must be initialised to 0b0
-    CPTR_EL2.TAM (bit 30) must be initialised to 0b0
-    AMCNTENSET0_EL0 must be initialised to 0b1111
-    AMCNTENSET1_EL0 must be initialised to a platform specific value
-    having 0b1 set for the corresponding bit for each of the auxiliary
-    counters present.
+
+    - CPTR_EL3.TAM (bit 30) must be initialised to 0b0
+    - CPTR_EL2.TAM (bit 30) must be initialised to 0b0
+    - AMCNTENSET0_EL0 must be initialised to 0b1111
+    - AMCNTENSET1_EL0 must be initialised to a platform specific value
+      having 0b1 set for the corresponding bit for each of the auxiliary
+      counters present.
+
   - If the kernel is entered at EL1:
-    AMCNTENSET0_EL0 must be initialised to 0b1111
-    AMCNTENSET1_EL0 must be initialised to a platform specific value
-    having 0b1 set for the corresponding bit for each of the auxiliary
-    counters present.
+
+    - AMCNTENSET0_EL0 must be initialised to 0b1111
+    - AMCNTENSET1_EL0 must be initialised to a platform specific value
+      having 0b1 set for the corresponding bit for each of the auxiliary
+      counters present.
 
 The requirements described above for CPU mode, caches, MMUs, architected
 timers, coherency and system registers apply to all CPUs.  All CPUs must
@@ -304,7 +311,8 @@ following manner:
   Documentation/devicetree/bindings/arm/psci.yaml.
 
 - Secondary CPU general-purpose register settings
-  x0 = 0 (reserved for future use)
-  x1 = 0 (reserved for future use)
-  x2 = 0 (reserved for future use)
-  x3 = 0 (reserved for future use)
+
+  - x0 = 0 (reserved for future use)
+  - x1 = 0 (reserved for future use)
+  - x2 = 0 (reserved for future use)
+  - x3 = 0 (reserved for future use)
-- 
cgit v1.2.3


From a588332fba0b13d5090537812865f3170fdf19c2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:40 +0200
Subject: docs: pci: boot-interrupts.rst: improve html output

There are some warnings with this file:

    /Documentation/PCI/boot-interrupts.rst:42: WARNING: Unexpected indentation.
    /Documentation/PCI/boot-interrupts.rst:52: WARNING: Block quote ends without a blank line; unexpected unindent.
    /Documentation/PCI/boot-interrupts.rst:92: WARNING: Unexpected indentation.
    /Documentation/PCI/boot-interrupts.rst:98: WARNING: Unexpected indentation.
    /Documentation/PCI/boot-interrupts.rst:136: WARNING: Unexpected indentation.

It turns that this file conversion to ReST could be improved,
in order to remove the warnings and provide a better output.

So, fix the warnings by adjusting blank lines, add a table and
some list markups. Also, mark endnodes as such.

Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/a6a9eb16eede10731bcce69a600ab12d92e6ba47.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/PCI/boot-interrupts.rst | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/Documentation/PCI/boot-interrupts.rst b/Documentation/PCI/boot-interrupts.rst
index d078ef3eb192..2ec70121bfca 100644
--- a/Documentation/PCI/boot-interrupts.rst
+++ b/Documentation/PCI/boot-interrupts.rst
@@ -32,12 +32,13 @@ interrupt goes unhandled over time, they are tracked by the Linux kernel as
 Spurious Interrupts. The IRQ will be disabled by the Linux kernel after it
 reaches a specific count with the error "nobody cared". This disabled IRQ
 now prevents valid usage by an existing interrupt which may happen to share
-the IRQ line.
+the IRQ line::
 
   irq 19: nobody cared (try booting with the "irqpoll" option)
   CPU: 0 PID: 2988 Comm: irq/34-nipalk Tainted: 4.14.87-rt49-02410-g4a640ec-dirty #1
   Hardware name: National Instruments NI PXIe-8880/NI PXIe-8880, BIOS 2.1.5f1 01/09/2020
   Call Trace:
+
   <IRQ>
    ? dump_stack+0x46/0x5e
    ? __report_bad_irq+0x2e/0xb0
@@ -85,15 +86,18 @@ Mitigations
 The mitigations take the form of PCI quirks. The preference has been to
 first identify and make use of a means to disable the routing to the PCH.
 In such a case a quirk to disable boot interrupt generation can be
-added.[1]
+added. [1]_
 
-  Intel® 6300ESB I/O Controller Hub
+Intel® 6300ESB I/O Controller Hub
   Alternate Base Address Register:
    BIE: Boot Interrupt Enable
-	  0 = Boot interrupt is enabled.
-	  1 = Boot interrupt is disabled.
 
-  Intel® Sandy Bridge through Sky Lake based Xeon servers:
+	  ==  ===========================
+	  0   Boot interrupt is enabled.
+	  1   Boot interrupt is disabled.
+	  ==  ===========================
+
+Intel® Sandy Bridge through Sky Lake based Xeon servers:
   Coherent Interface Protocol Interrupt Control
    dis_intx_route2pch/dis_intx_route2ich/dis_intx_route2dmi2:
 	  When this bit is set. Local INTx messages received from the
@@ -109,12 +113,12 @@ line by default.  Therefore, on chipsets where this INTx routing cannot be
 disabled, the Linux kernel will reroute the valid interrupt to its legacy
 interrupt. This redirection of the handler will prevent the occurrence of
 the spurious interrupt detection which would ordinarily disable the IRQ
-line due to excessive unhandled counts.[2]
+line due to excessive unhandled counts. [2]_
 
 The config option X86_REROUTE_FOR_BROKEN_BOOT_IRQS exists to enable (or
 disable) the redirection of the interrupt handler to the PCH interrupt
 line. The option can be overridden by either pci=ioapicreroute or
-pci=noioapicreroute.[3]
+pci=noioapicreroute. [3]_
 
 
 More Documentation
@@ -127,19 +131,19 @@ into the evolution of its handling with chipsets.
 Example of disabling of the boot interrupt
 ------------------------------------------
 
-Intel® 6300ESB I/O Controller Hub (Document # 300641-004US)
+      - Intel® 6300ESB I/O Controller Hub (Document # 300641-004US)
 	5.7.3 Boot Interrupt
 	https://www.intel.com/content/dam/doc/datasheet/6300esb-io-controller-hub-datasheet.pdf
 
-Intel® Xeon® Processor E5-1600/2400/2600/4600 v3 Product Families
-Datasheet - Volume 2: Registers (Document # 330784-003)
+      - Intel® Xeon® Processor E5-1600/2400/2600/4600 v3 Product Families
+	Datasheet - Volume 2: Registers (Document # 330784-003)
 	6.6.41 cipintrc Coherent Interface Protocol Interrupt Control
 	https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e5-v3-datasheet-vol-2.pdf
 
 Example of handler rerouting
 ----------------------------
 
-Intel® 6700PXH 64-bit PCI Hub (Document # 302628)
+      - Intel® 6700PXH 64-bit PCI Hub (Document # 302628)
 	2.15.2 PCI Express Legacy INTx Support and Boot Interrupt
 	https://www.intel.com/content/dam/doc/datasheet/6700pxh-64-bit-pci-hub-datasheet.pdf
 
@@ -150,6 +154,6 @@ Cheers,
     Sean V Kelley
     sean.v.kelley@linux.intel.com
 
-[1] https://lore.kernel.org/r/12131949181903-git-send-email-sassmann@suse.de/
-[2] https://lore.kernel.org/r/12131949182094-git-send-email-sassmann@suse.de/
-[3] https://lore.kernel.org/r/487C8EA7.6020205@suse.de/
+.. [1] https://lore.kernel.org/r/12131949181903-git-send-email-sassmann@suse.de/
+.. [2] https://lore.kernel.org/r/12131949182094-git-send-email-sassmann@suse.de/
+.. [3] https://lore.kernel.org/r/487C8EA7.6020205@suse.de/
-- 
cgit v1.2.3


From cfa204984d57ef205b09c7f6db9b562821b51dbd Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:41 +0200
Subject: docs: ras: get rid of some warnings

Sphinx produce some warnings due to a bad table format:

    Documentation/admin-guide/ras.rst:358: WARNING: Definition list ends without a blank line; unexpected unindent.
    Documentation/admin-guide/ras.rst:358: WARNING: Definition list ends without a blank line; unexpected unindent.
    Documentation/admin-guide/ras.rst:363: WARNING: Definition list ends without a blank line; unexpected unindent.
    Documentation/admin-guide/ras.rst:363: WARNING: Definition list ends without a blank line; unexpected unindent.

Rearrange the things there in order to supress the warnings
while being precise at the Sphinx output about how ranks are
mapped into csrows.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/9e1bb44d6dbedb5b6f049d081b47da1f9620de16.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/ras.rst | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/Documentation/admin-guide/ras.rst b/Documentation/admin-guide/ras.rst
index 0310db624964..22b31bc7e129 100644
--- a/Documentation/admin-guide/ras.rst
+++ b/Documentation/admin-guide/ras.rst
@@ -351,15 +351,17 @@ controllers. The following example will assume 2 channels:
 	+------------+-----------+-----------+
 	|            |  ``ch0``  |  ``ch1``  |
 	+============+===========+===========+
-	| ``csrow0`` |  DIMM_A0  |  DIMM_B0  |
-	|            |   rank0   |   rank0   |
-	+------------+     -     |     -     |
+	|            |**DIMM_A0**|**DIMM_B0**|
+	+------------+-----------+-----------+
+	| ``csrow0`` |   rank0   |   rank0   |
+	+------------+-----------+-----------+
 	| ``csrow1`` |   rank1   |   rank1   |
 	+------------+-----------+-----------+
-	| ``csrow2`` |  DIMM_A1  | DIMM_B1   |
-	|            |   rank0   |   rank0   |
-	+------------+     -     |     -     |
-	| ``csrow3`` |   rank1   |   rank1   |
+	|            |**DIMM_A1**|**DIMM_B1**|
+	+------------+-----------+-----------+
+	| ``csrow2`` |    rank0  |  rank0    |
+	+------------+-----------+-----------+
+	| ``csrow3`` |    rank1  |  rank1    |
 	+------------+-----------+-----------+
 
 In the above example, there are 4 physical slots on the motherboard
-- 
cgit v1.2.3


From 00aff95659616a131ae4b461aa21e429c33907b5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:42 +0200
Subject: docs: ras: don't need to repeat twice the same thing

We don't need to say twice "for the first time" at the same
paragraph.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/f76dcae96de8b1bb8ee37a79781c111c825e26d6.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/ras.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/ras.rst b/Documentation/admin-guide/ras.rst
index 22b31bc7e129..6cbaab975ee5 100644
--- a/Documentation/admin-guide/ras.rst
+++ b/Documentation/admin-guide/ras.rst
@@ -212,7 +212,7 @@ EDAC - Error Detection And Correction
    purposes.
 
    When the subsystem was pushed upstream for the first time, on
-   Kernel 2.6.16, for the first time, it was renamed to ``EDAC``.
+   Kernel 2.6.16, it was renamed to ``EDAC``.
 
 Purpose
 -------
-- 
cgit v1.2.3


From ad89c8852fde7ab0c4007f5b753517cf508d12be Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:44 +0200
Subject: docs: spi: spi.h: fix a doc building warning

We need to add a blank line to avoid this warning:

	./include/linux/spi/spi.h:401: WARNING: Unexpected indentation.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/1c701b3ac903dc0bc304dca958fbdee53bd38dc3.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/spi/spi.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 38286de779e3..aac57b5b7c21 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -394,6 +394,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  *                   for example doing DMA mapping.  Called from threaded
  *                   context.
  * @transfer_one: transfer a single spi_transfer.
+ *
  *                  - return 0 if the transfer is finished,
  *                  - return 1 if the transfer is still in progress. When
  *                    the driver is finished with this transfer it must
-- 
cgit v1.2.3


From f08252469ef5b10863424e502bb1ed049d18390c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:45 +0200
Subject: docs: drivers: fix some warnings at base/platform.c when building
 docs

Currrently, two warnings are generated when building docs:

	./drivers/base/platform.c:136: WARNING: Unexpected indentation.
	./drivers/base/platform.c:214: WARNING: Unexpected indentation.

As examples are code blocks, they should use "::" markup. However,

	Example::

Is currently interpreted as a new section.

While we could fix kernel-doc to accept such new syntax, it is
easier to just replace it with:

	For Example::

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/564273815a76136fb5e453969b1012a786d99e28.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 drivers/base/platform.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 5255550b7c34..a07e28eab7d7 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -147,7 +147,8 @@ EXPORT_SYMBOL_GPL(devm_platform_ioremap_resource_byname);
  * request_irq() APIs. This is the same as platform_get_irq(), except that it
  * does not print an error message if an IRQ can not be obtained.
  *
- * Example:
+ * For example::
+ *
  *		int irq = platform_get_irq_optional(pdev, 0);
  *		if (irq < 0)
  *			return irq;
@@ -226,7 +227,8 @@ EXPORT_SYMBOL_GPL(platform_get_irq_optional);
  * IRQ fails. Device drivers should check the return value for errors so as to
  * not pass a negative integer value to the request_irq() APIs.
  *
- * Example:
+ * For example::
+ *
  *		int irq = platform_get_irq(pdev, 0);
  *		if (irq < 0)
  *			return irq;
-- 
cgit v1.2.3


From 14a7e51ff184281a8c02c1cf803df15fd1ac19c8 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:46 +0200
Subject: docs: mm: userfaultfd.rst: use ``foo`` for literals

Several parts of this document define literals: ioctl names,
function calls, directory patches, etc. Mark those as literal
blocks, in order to improve its readability (both at text mode
and after parsed by Sphinx.

This fixes those two warnings:

	Documentation/admin-guide/mm/userfaultfd.rst:139: WARNING: Inline emphasis start-string without end-string.
	Documentation/admin-guide/mm/userfaultfd.rst:139: WARNING: Inline emphasis start-string without end-string.

produced during documentation build.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/2ae061761baf8fe00cdf8a7e6dae293756849a05.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/mm/userfaultfd.rst | 207 ++++++++++++++-------------
 1 file changed, 104 insertions(+), 103 deletions(-)

diff --git a/Documentation/admin-guide/mm/userfaultfd.rst b/Documentation/admin-guide/mm/userfaultfd.rst
index c30176e67900..740d111faf1c 100644
--- a/Documentation/admin-guide/mm/userfaultfd.rst
+++ b/Documentation/admin-guide/mm/userfaultfd.rst
@@ -12,107 +12,107 @@ and more generally they allow userland to take control of various
 memory page faults, something otherwise only the kernel code could do.
 
 For example userfaults allows a proper and more optimal implementation
-of the PROT_NONE+SIGSEGV trick.
+of the ``PROT_NONE+SIGSEGV`` trick.
 
 Design
 ======
 
-Userfaults are delivered and resolved through the userfaultfd syscall.
+Userfaults are delivered and resolved through the ``userfaultfd`` syscall.
 
-The userfaultfd (aside from registering and unregistering virtual
+The ``userfaultfd`` (aside from registering and unregistering virtual
 memory ranges) provides two primary functionalities:
 
-1) read/POLLIN protocol to notify a userland thread of the faults
+1) ``read/POLLIN`` protocol to notify a userland thread of the faults
    happening
 
-2) various UFFDIO_* ioctls that can manage the virtual memory regions
-   registered in the userfaultfd that allows userland to efficiently
+2) various ``UFFDIO_*`` ioctls that can manage the virtual memory regions
+   registered in the ``userfaultfd`` that allows userland to efficiently
    resolve the userfaults it receives via 1) or to manage the virtual
    memory in the background
 
 The real advantage of userfaults if compared to regular virtual memory
 management of mremap/mprotect is that the userfaults in all their
 operations never involve heavyweight structures like vmas (in fact the
-userfaultfd runtime load never takes the mmap_sem for writing).
+``userfaultfd`` runtime load never takes the mmap_sem for writing).
 
 Vmas are not suitable for page- (or hugepage) granular fault tracking
 when dealing with virtual address spaces that could span
 Terabytes. Too many vmas would be needed for that.
 
-The userfaultfd once opened by invoking the syscall, can also be
+The ``userfaultfd`` once opened by invoking the syscall, can also be
 passed using unix domain sockets to a manager process, so the same
 manager process could handle the userfaults of a multitude of
 different processes without them being aware about what is going on
-(well of course unless they later try to use the userfaultfd
+(well of course unless they later try to use the ``userfaultfd``
 themselves on the same region the manager is already tracking, which
-is a corner case that would currently return -EBUSY).
+is a corner case that would currently return ``-EBUSY``).
 
 API
 ===
 
-When first opened the userfaultfd must be enabled invoking the
-UFFDIO_API ioctl specifying a uffdio_api.api value set to UFFD_API (or
-a later API version) which will specify the read/POLLIN protocol
-userland intends to speak on the UFFD and the uffdio_api.features
-userland requires. The UFFDIO_API ioctl if successful (i.e. if the
-requested uffdio_api.api is spoken also by the running kernel and the
+When first opened the ``userfaultfd`` must be enabled invoking the
+``UFFDIO_API`` ioctl specifying a ``uffdio_api.api`` value set to ``UFFD_API`` (or
+a later API version) which will specify the ``read/POLLIN`` protocol
+userland intends to speak on the ``UFFD`` and the ``uffdio_api.features``
+userland requires. The ``UFFDIO_API`` ioctl if successful (i.e. if the
+requested ``uffdio_api.api`` is spoken also by the running kernel and the
 requested features are going to be enabled) will return into
-uffdio_api.features and uffdio_api.ioctls two 64bit bitmasks of
+``uffdio_api.features`` and ``uffdio_api.ioctls`` two 64bit bitmasks of
 respectively all the available features of the read(2) protocol and
 the generic ioctl available.
 
-The uffdio_api.features bitmask returned by the UFFDIO_API ioctl
-defines what memory types are supported by the userfaultfd and what
+The ``uffdio_api.features`` bitmask returned by the ``UFFDIO_API`` ioctl
+defines what memory types are supported by the ``userfaultfd`` and what
 events, except page fault notifications, may be generated.
 
-If the kernel supports registering userfaultfd ranges on hugetlbfs
-virtual memory areas, UFFD_FEATURE_MISSING_HUGETLBFS will be set in
-uffdio_api.features. Similarly, UFFD_FEATURE_MISSING_SHMEM will be
-set if the kernel supports registering userfaultfd ranges on shared
-memory (covering all shmem APIs, i.e. tmpfs, IPCSHM, /dev/zero
-MAP_SHARED, memfd_create, etc).
+If the kernel supports registering ``userfaultfd`` ranges on hugetlbfs
+virtual memory areas, ``UFFD_FEATURE_MISSING_HUGETLBFS`` will be set in
+``uffdio_api.features``. Similarly, ``UFFD_FEATURE_MISSING_SHMEM`` will be
+set if the kernel supports registering ``userfaultfd`` ranges on shared
+memory (covering all shmem APIs, i.e. tmpfs, ``IPCSHM``, ``/dev/zero``,
+``MAP_SHARED``, ``memfd_create``, etc).
 
-The userland application that wants to use userfaultfd with hugetlbfs
+The userland application that wants to use ``userfaultfd`` with hugetlbfs
 or shared memory need to set the corresponding flag in
-uffdio_api.features to enable those features.
+``uffdio_api.features`` to enable those features.
 
 If the userland desires to receive notifications for events other than
-page faults, it has to verify that uffdio_api.features has appropriate
-UFFD_FEATURE_EVENT_* bits set. These events are described in more
+page faults, it has to verify that ``uffdio_api.features`` has appropriate
+``UFFD_FEATURE_EVENT_*`` bits set. These events are described in more
 detail below in "Non-cooperative userfaultfd" section.
 
-Once the userfaultfd has been enabled the UFFDIO_REGISTER ioctl should
-be invoked (if present in the returned uffdio_api.ioctls bitmask) to
-register a memory range in the userfaultfd by setting the
-uffdio_register structure accordingly. The uffdio_register.mode
+Once the ``userfaultfd`` has been enabled the ``UFFDIO_REGISTER`` ioctl should
+be invoked (if present in the returned ``uffdio_api.ioctls`` bitmask) to
+register a memory range in the ``userfaultfd`` by setting the
+uffdio_register structure accordingly. The ``uffdio_register.mode``
 bitmask will specify to the kernel which kind of faults to track for
-the range (UFFDIO_REGISTER_MODE_MISSING would track missing
-pages). The UFFDIO_REGISTER ioctl will return the
-uffdio_register.ioctls bitmask of ioctls that are suitable to resolve
+the range (``UFFDIO_REGISTER_MODE_MISSING`` would track missing
+pages). The ``UFFDIO_REGISTER`` ioctl will return the
+``uffdio_register.ioctls`` bitmask of ioctls that are suitable to resolve
 userfaults on the range registered. Not all ioctls will necessarily be
 supported for all memory types depending on the underlying virtual
 memory backend (anonymous memory vs tmpfs vs real filebacked
 mappings).
 
-Userland can use the uffdio_register.ioctls to manage the virtual
+Userland can use the ``uffdio_register.ioctls`` to manage the virtual
 address space in the background (to add or potentially also remove
-memory from the userfaultfd registered range). This means a userfault
+memory from the ``userfaultfd`` registered range). This means a userfault
 could be triggering just before userland maps in the background the
 user-faulted page.
 
-The primary ioctl to resolve userfaults is UFFDIO_COPY. That
+The primary ioctl to resolve userfaults is ``UFFDIO_COPY``. That
 atomically copies a page into the userfault registered range and wakes
-up the blocked userfaults (unless uffdio_copy.mode &
-UFFDIO_COPY_MODE_DONTWAKE is set). Other ioctl works similarly to
-UFFDIO_COPY. They're atomic as in guaranteeing that nothing can see an
-half copied page since it'll keep userfaulting until the copy has
-finished.
+up the blocked userfaults
+(unless ``uffdio_copy.mode & UFFDIO_COPY_MODE_DONTWAKE`` is set).
+Other ioctl works similarly to ``UFFDIO_COPY``. They're atomic as in
+guaranteeing that nothing can see an half copied page since it'll
+keep userfaulting until the copy has finished.
 
 Notes:
 
-- If you requested UFFDIO_REGISTER_MODE_MISSING when registering then
+- If you requested ``UFFDIO_REGISTER_MODE_MISSING`` when registering then
   you must provide some kind of page in your thread after reading from
-  the uffd.  You must provide either UFFDIO_COPY or UFFDIO_ZEROPAGE.
+  the uffd.  You must provide either ``UFFDIO_COPY`` or ``UFFDIO_ZEROPAGE``.
   The normal behavior of the OS automatically providing a zero page on
   an annonymous mmaping is not in place.
 
@@ -122,13 +122,13 @@ Notes:
 
 - You get the address of the access that triggered the missing page
   event out of a struct uffd_msg that you read in the thread from the
-  uffd.  You can supply as many pages as you want with UFFDIO_COPY or
-  UFFDIO_ZEROPAGE.  Keep in mind that unless you used DONTWAKE then
+  uffd.  You can supply as many pages as you want with ``UFFDIO_COPY`` or
+  ``UFFDIO_ZEROPAGE``.  Keep in mind that unless you used DONTWAKE then
   the first of any of those IOCTLs wakes up the faulting thread.
 
-- Be sure to test for all errors including (pollfd[0].revents &
-  POLLERR).  This can happen, e.g. when ranges supplied were
-  incorrect.
+- Be sure to test for all errors including
+  (``pollfd[0].revents & POLLERR``).  This can happen, e.g. when ranges
+  supplied were incorrect.
 
 Write Protect Notifications
 ---------------------------
@@ -136,41 +136,42 @@ Write Protect Notifications
 This is equivalent to (but faster than) using mprotect and a SIGSEGV
 signal handler.
 
-Firstly you need to register a range with UFFDIO_REGISTER_MODE_WP.
-Instead of using mprotect(2) you use ioctl(uffd, UFFDIO_WRITEPROTECT,
-struct *uffdio_writeprotect) while mode = UFFDIO_WRITEPROTECT_MODE_WP
+Firstly you need to register a range with ``UFFDIO_REGISTER_MODE_WP``.
+Instead of using mprotect(2) you use
+``ioctl(uffd, UFFDIO_WRITEPROTECT, struct *uffdio_writeprotect)``
+while ``mode = UFFDIO_WRITEPROTECT_MODE_WP``
 in the struct passed in.  The range does not default to and does not
 have to be identical to the range you registered with.  You can write
 protect as many ranges as you like (inside the registered range).
 Then, in the thread reading from uffd the struct will have
-msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP set. Now you send
-ioctl(uffd, UFFDIO_WRITEPROTECT, struct *uffdio_writeprotect) again
-while pagefault.mode does not have UFFDIO_WRITEPROTECT_MODE_WP set.
-This wakes up the thread which will continue to run with writes. This
+``msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP`` set. Now you send
+``ioctl(uffd, UFFDIO_WRITEPROTECT, struct *uffdio_writeprotect)``
+again while ``pagefault.mode`` does not have ``UFFDIO_WRITEPROTECT_MODE_WP``
+set. This wakes up the thread which will continue to run with writes. This
 allows you to do the bookkeeping about the write in the uffd reading
 thread before the ioctl.
 
-If you registered with both UFFDIO_REGISTER_MODE_MISSING and
-UFFDIO_REGISTER_MODE_WP then you need to think about the sequence in
+If you registered with both ``UFFDIO_REGISTER_MODE_MISSING`` and
+``UFFDIO_REGISTER_MODE_WP`` then you need to think about the sequence in
 which you supply a page and undo write protect.  Note that there is a
 difference between writes into a WP area and into a !WP area.  The
-former will have UFFD_PAGEFAULT_FLAG_WP set, the latter
-UFFD_PAGEFAULT_FLAG_WRITE.  The latter did not fail on protection but
-you still need to supply a page when UFFDIO_REGISTER_MODE_MISSING was
+former will have ``UFFD_PAGEFAULT_FLAG_WP`` set, the latter
+``UFFD_PAGEFAULT_FLAG_WRITE``.  The latter did not fail on protection but
+you still need to supply a page when ``UFFDIO_REGISTER_MODE_MISSING`` was
 used.
 
 QEMU/KVM
 ========
 
-QEMU/KVM is using the userfaultfd syscall to implement postcopy live
+QEMU/KVM is using the ``userfaultfd`` syscall to implement postcopy live
 migration. Postcopy live migration is one form of memory
 externalization consisting of a virtual machine running with part or
 all of its memory residing on a different node in the cloud. The
-userfaultfd abstraction is generic enough that not a single line of
+``userfaultfd`` abstraction is generic enough that not a single line of
 KVM kernel code had to be modified in order to add postcopy live
 migration to QEMU.
 
-Guest async page faults, FOLL_NOWAIT and all other GUP features work
+Guest async page faults, ``FOLL_NOWAIT`` and all other ``GUP*`` features work
 just fine in combination with userfaults. Userfaults trigger async
 page faults in the guest scheduler so those guest processes that
 aren't waiting for userfaults (i.e. network bound) can keep running in
@@ -183,19 +184,19 @@ generating userfaults for readonly guest regions.
 The implementation of postcopy live migration currently uses one
 single bidirectional socket but in the future two different sockets
 will be used (to reduce the latency of the userfaults to the minimum
-possible without having to decrease /proc/sys/net/ipv4/tcp_wmem).
+possible without having to decrease ``/proc/sys/net/ipv4/tcp_wmem``).
 
 The QEMU in the source node writes all pages that it knows are missing
 in the destination node, into the socket, and the migration thread of
-the QEMU running in the destination node runs UFFDIO_COPY|ZEROPAGE
-ioctls on the userfaultfd in order to map the received pages into the
-guest (UFFDIO_ZEROCOPY is used if the source page was a zero page).
+the QEMU running in the destination node runs ``UFFDIO_COPY|ZEROPAGE``
+ioctls on the ``userfaultfd`` in order to map the received pages into the
+guest (``UFFDIO_ZEROCOPY`` is used if the source page was a zero page).
 
 A different postcopy thread in the destination node listens with
-poll() to the userfaultfd in parallel. When a POLLIN event is
+poll() to the ``userfaultfd`` in parallel. When a ``POLLIN`` event is
 generated after a userfault triggers, the postcopy thread read() from
-the userfaultfd and receives the fault address (or -EAGAIN in case the
-userfault was already resolved and waken by a UFFDIO_COPY|ZEROPAGE run
+the ``userfaultfd`` and receives the fault address (or ``-EAGAIN`` in case the
+userfault was already resolved and waken by a ``UFFDIO_COPY|ZEROPAGE`` run
 by the parallel QEMU migration thread).
 
 After the QEMU postcopy thread (running in the destination node) gets
@@ -206,7 +207,7 @@ remaining missing pages from that new page offset. Soon after that
 (just the time to flush the tcp_wmem queue through the network) the
 migration thread in the QEMU running in the destination node will
 receive the page that triggered the userfault and it'll map it as
-usual with the UFFDIO_COPY|ZEROPAGE (without actually knowing if it
+usual with the ``UFFDIO_COPY|ZEROPAGE`` (without actually knowing if it
 was spontaneously sent by the source or if it was an urgent page
 requested through a userfault).
 
@@ -219,74 +220,74 @@ checked to find which missing pages to send in round robin and we seek
 over it when receiving incoming userfaults. After sending each page of
 course the bitmap is updated accordingly. It's also useful to avoid
 sending the same page twice (in case the userfault is read by the
-postcopy thread just before UFFDIO_COPY|ZEROPAGE runs in the migration
+postcopy thread just before ``UFFDIO_COPY|ZEROPAGE`` runs in the migration
 thread).
 
 Non-cooperative userfaultfd
 ===========================
 
-When the userfaultfd is monitored by an external manager, the manager
+When the ``userfaultfd`` is monitored by an external manager, the manager
 must be able to track changes in the process virtual memory
 layout. Userfaultfd can notify the manager about such changes using
 the same read(2) protocol as for the page fault notifications. The
 manager has to explicitly enable these events by setting appropriate
-bits in uffdio_api.features passed to UFFDIO_API ioctl:
+bits in ``uffdio_api.features`` passed to ``UFFDIO_API`` ioctl:
 
-UFFD_FEATURE_EVENT_FORK
-	enable userfaultfd hooks for fork(). When this feature is
-	enabled, the userfaultfd context of the parent process is
+``UFFD_FEATURE_EVENT_FORK``
+	enable ``userfaultfd`` hooks for fork(). When this feature is
+	enabled, the ``userfaultfd`` context of the parent process is
 	duplicated into the newly created process. The manager
-	receives UFFD_EVENT_FORK with file descriptor of the new
-	userfaultfd context in the uffd_msg.fork.
+	receives ``UFFD_EVENT_FORK`` with file descriptor of the new
+	``userfaultfd`` context in the ``uffd_msg.fork``.
 
-UFFD_FEATURE_EVENT_REMAP
+``UFFD_FEATURE_EVENT_REMAP``
 	enable notifications about mremap() calls. When the
 	non-cooperative process moves a virtual memory area to a
 	different location, the manager will receive
-	UFFD_EVENT_REMAP. The uffd_msg.remap will contain the old and
+	``UFFD_EVENT_REMAP``. The ``uffd_msg.remap`` will contain the old and
 	new addresses of the area and its original length.
 
-UFFD_FEATURE_EVENT_REMOVE
+``UFFD_FEATURE_EVENT_REMOVE``
 	enable notifications about madvise(MADV_REMOVE) and
-	madvise(MADV_DONTNEED) calls. The event UFFD_EVENT_REMOVE will
-	be generated upon these calls to madvise. The uffd_msg.remove
+	madvise(MADV_DONTNEED) calls. The event ``UFFD_EVENT_REMOVE`` will
+	be generated upon these calls to madvise(). The ``uffd_msg.remove``
 	will contain start and end addresses of the removed area.
 
-UFFD_FEATURE_EVENT_UNMAP
+``UFFD_FEATURE_EVENT_UNMAP``
 	enable notifications about memory unmapping. The manager will
-	get UFFD_EVENT_UNMAP with uffd_msg.remove containing start and
+	get ``UFFD_EVENT_UNMAP`` with ``uffd_msg.remove`` containing start and
 	end addresses of the unmapped area.
 
-Although the UFFD_FEATURE_EVENT_REMOVE and UFFD_FEATURE_EVENT_UNMAP
+Although the ``UFFD_FEATURE_EVENT_REMOVE`` and ``UFFD_FEATURE_EVENT_UNMAP``
 are pretty similar, they quite differ in the action expected from the
-userfaultfd manager. In the former case, the virtual memory is
+``userfaultfd`` manager. In the former case, the virtual memory is
 removed, but the area is not, the area remains monitored by the
-userfaultfd, and if a page fault occurs in that area it will be
+``userfaultfd``, and if a page fault occurs in that area it will be
 delivered to the manager. The proper resolution for such page fault is
 to zeromap the faulting address. However, in the latter case, when an
 area is unmapped, either explicitly (with munmap() system call), or
 implicitly (e.g. during mremap()), the area is removed and in turn the
-userfaultfd context for such area disappears too and the manager will
+``userfaultfd`` context for such area disappears too and the manager will
 not get further userland page faults from the removed area. Still, the
 notification is required in order to prevent manager from using
-UFFDIO_COPY on the unmapped area.
+``UFFDIO_COPY`` on the unmapped area.
 
 Unlike userland page faults which have to be synchronous and require
 explicit or implicit wakeup, all the events are delivered
 asynchronously and the non-cooperative process resumes execution as
-soon as manager executes read(). The userfaultfd manager should
-carefully synchronize calls to UFFDIO_COPY with the events
-processing. To aid the synchronization, the UFFDIO_COPY ioctl will
-return -ENOSPC when the monitored process exits at the time of
-UFFDIO_COPY, and -ENOENT, when the non-cooperative process has changed
-its virtual memory layout simultaneously with outstanding UFFDIO_COPY
+soon as manager executes read(). The ``userfaultfd`` manager should
+carefully synchronize calls to ``UFFDIO_COPY`` with the events
+processing. To aid the synchronization, the ``UFFDIO_COPY`` ioctl will
+return ``-ENOSPC`` when the monitored process exits at the time of
+``UFFDIO_COPY``, and ``-ENOENT``, when the non-cooperative process has changed
+its virtual memory layout simultaneously with outstanding ``UFFDIO_COPY``
 operation.
 
 The current asynchronous model of the event delivery is optimal for
-single threaded non-cooperative userfaultfd manager implementations. A
+single threaded non-cooperative ``userfaultfd`` manager implementations. A
 synchronous event delivery model can be added later as a new
-userfaultfd feature to facilitate multithreading enhancements of the
-non cooperative manager, for example to allow UFFDIO_COPY ioctls to
+``userfaultfd`` feature to facilitate multithreading enhancements of the
+non cooperative manager, for example to allow ``UFFDIO_COPY`` ioctls to
 run in parallel to the event reception. Single threaded
 implementations should continue to use the current async event
 delivery model instead.
-- 
cgit v1.2.3


From 4a3fe6541c8cde8cb69e6453752b7fea1aac7d55 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:47 +0200
Subject: docs: mm: userfaultfd.rst: use a cross-reference for a section

Instead of using "foo", let's use `foo`_, with is a ReST way of
saying that foo is a section of the document. With that, after
building the docs, an hyperlink is generated.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/f46b45f1aaec233217f2e0b0438bbd8cc16fe17b.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/mm/userfaultfd.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/mm/userfaultfd.rst b/Documentation/admin-guide/mm/userfaultfd.rst
index 740d111faf1c..0bf49d7313ad 100644
--- a/Documentation/admin-guide/mm/userfaultfd.rst
+++ b/Documentation/admin-guide/mm/userfaultfd.rst
@@ -79,7 +79,7 @@ or shared memory need to set the corresponding flag in
 If the userland desires to receive notifications for events other than
 page faults, it has to verify that ``uffdio_api.features`` has appropriate
 ``UFFD_FEATURE_EVENT_*`` bits set. These events are described in more
-detail below in "Non-cooperative userfaultfd" section.
+detail below in `Non-cooperative userfaultfd`_ section.
 
 Once the ``userfaultfd`` has been enabled the ``UFFDIO_REGISTER`` ioctl should
 be invoked (if present in the returned ``uffdio_api.ioctls`` bitmask) to
-- 
cgit v1.2.3


From 9070492b10c36faba0d9c2cb13d3e0a5a0ffaf6a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:48 +0200
Subject: docs: vm: index.rst: add an orphan doc to the building system

The new free_page_reporting.rst file is not listed at the index.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/146432ae6965a2bb62c929a6b62f9d4010986622.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/vm/index.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst
index e8d943b21cf9..611140ffef7e 100644
--- a/Documentation/vm/index.rst
+++ b/Documentation/vm/index.rst
@@ -31,6 +31,7 @@ descriptions of data structures and algorithms.
    active_mm
    balance
    cleancache
+   free_page_reporting
    frontswap
    highmem
    hmm
-- 
cgit v1.2.3


From b4c6d8efdcdde455e0bd9b5df5c42b28c131ca6c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:49 +0200
Subject: docs: dt: qcom,dwc3.txt: fix cross-reference for a converted file

The qcom-qusb2-phy.txt file was converted and renamed to yaml.
Update cross-reference accordingly.

Fixes: 8ce65d8d38df ("dt-bindings: phy: qcom,qusb2: Convert QUSB2 phy bindings to yaml")
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/a055c564f2a79aa748064329d938db8b3c8edd58.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/devicetree/bindings/usb/qcom,dwc3.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/usb/qcom,dwc3.txt b/Documentation/devicetree/bindings/usb/qcom,dwc3.txt
index cb695aa3fba4..fbdd01756752 100644
--- a/Documentation/devicetree/bindings/usb/qcom,dwc3.txt
+++ b/Documentation/devicetree/bindings/usb/qcom,dwc3.txt
@@ -52,8 +52,8 @@ A child node must exist to represent the core DWC3 IP block. The name of
 the node is not important. The content of the node is defined in dwc3.txt.
 
 Phy documentation is provided in the following places:
-Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt   - USB3 QMP PHY
-Documentation/devicetree/bindings/phy/qcom-qusb2-phy.txt - USB2 QUSB2 PHY
+Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt    - USB3 QMP PHY
+Documentation/devicetree/bindings/phy/qcom,qusb2-phy.yaml - USB2 QUSB2 PHY
 
 Example device nodes:
 
-- 
cgit v1.2.3


From 3f4a6c925a42adc15d3d50a333b179743e31df45 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:50 +0200
Subject: docs: dt: fix a broken reference for a file converted to json

Changeset 32ced09d7903 ("dt-bindings: serial: Convert slave-device bindings to json-schema")
moved a binding to json and updated the links.

Yet, one link was not changed, due to a merge conflict.

Update this one too.

Fixes: 32ced09d7903 ("dt-bindings: serial: Convert slave-device bindings to json-schema")
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/9b1603e254d39c9607bfedefeedaafd2c44aeb19.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
index beca6466d59a..d2202791c1d4 100644
--- a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
+++ b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
@@ -29,7 +29,7 @@ Required properties for compatible string qcom,wcn399x-bt:
 
 Optional properties for compatible string qcom,wcn399x-bt:
 
- - max-speed: see Documentation/devicetree/bindings/serial/slave-device.txt
+ - max-speed: see Documentation/devicetree/bindings/serial/serial.yaml
  - firmware-name: specify the name of nvm firmware to load
  - clocks: clock provided to the controller
 
-- 
cgit v1.2.3


From 8f97986ccbd75ff7f4deab8e33fd33be1e9d43c4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:51 +0200
Subject: docs: powerpc: cxl.rst: mark two section titles as such

The User API chapter contains two sub-chapters. Mark them as
such.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com>
Link: https://lore.kernel.org/r/190d67397cd63e419de8d85b92e8018d48e8c345.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/powerpc/cxl.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/powerpc/cxl.rst b/Documentation/powerpc/cxl.rst
index 920546d81326..d2d77057610e 100644
--- a/Documentation/powerpc/cxl.rst
+++ b/Documentation/powerpc/cxl.rst
@@ -133,6 +133,7 @@ User API
 ========
 
 1. AFU character devices
+^^^^^^^^^^^^^^^^^^^^^^^^
 
     For AFUs operating in AFU directed mode, two character device
     files will be created. /dev/cxl/afu0.0m will correspond to a
@@ -395,6 +396,7 @@ read
 
 
 2. Card character device (powerVM guest only)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
     In a powerVM guest, an extra character device is created for the
     card. The device is only used to write (flash) a new image on the
-- 
cgit v1.2.3


From 36536a02e5542ea4909eea27fa7b173e81565312 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:52 +0200
Subject: docs: i2c: rename i2c.svg to i2c_bus.svg

When generating the PDF output, the Documentation/i2c dir
will generate an i2c.pdf. The same happens with i2c.svg:
it will also produce a file with the same name, at the same dir.

This causes errors when building the PDF output. So, rename the
image to i2c_bus.svg.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Link: https://lore.kernel.org/r/ecf3d51909ce46b3e84a1df4b36f07d76989e5da.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/i2c/i2c.svg     | 1341 -----------------------------------------
 Documentation/i2c/i2c_bus.svg | 1341 +++++++++++++++++++++++++++++++++++++++++
 Documentation/i2c/summary.rst |    2 +-
 3 files changed, 1342 insertions(+), 1342 deletions(-)
 delete mode 100644 Documentation/i2c/i2c.svg
 create mode 100644 Documentation/i2c/i2c_bus.svg

diff --git a/Documentation/i2c/i2c.svg b/Documentation/i2c/i2c.svg
deleted file mode 100644
index 5979405ad1c3..000000000000
--- a/Documentation/i2c/i2c.svg
+++ /dev/null
@@ -1,1341 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Created with Inkscape (http://www.inkscape.org/) -->
-
-<svg
-   xmlns:dc="http://purl.org/dc/elements/1.1/"
-   xmlns:cc="http://creativecommons.org/ns#"
-   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-   xmlns:svg="http://www.w3.org/2000/svg"
-   xmlns="http://www.w3.org/2000/svg"
-   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
-   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
-   sodipodi:docname="i2c.svg"
-   inkscape:version="0.92.3 (2405546, 2018-03-11)"
-   version="1.1"
-   id="svg2"
-   viewBox="0 0 813.34215 261.01596"
-   height="73.664505mm"
-   width="229.54323mm">
-  <defs
-     id="defs4">
-    <marker
-       inkscape:stockid="DotM"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker8861"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path8859"
-         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="matrix(0.4,0,0,0.4,2.96,0.4)" />
-    </marker>
-    <marker
-       inkscape:isstock="true"
-       style="overflow:visible"
-       id="marker6165"
-       refX="0"
-       refY="0"
-       orient="auto"
-       inkscape:stockid="DotM"
-       inkscape:collect="always">
-      <path
-         transform="matrix(0.4,0,0,0.4,2.96,0.4)"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
-         id="path6163"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       inkscape:isstock="true"
-       style="overflow:visible"
-       id="marker2713"
-       refX="0"
-       refY="0"
-       orient="auto"
-       inkscape:stockid="DotM">
-      <path
-         transform="matrix(0.4,0,0,0.4,2.96,0.4)"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
-         id="path2711"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       inkscape:stockid="DotM"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="DotM"
-       style="overflow:visible"
-       inkscape:isstock="true"
-       inkscape:collect="always">
-      <path
-         id="path1795"
-         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="matrix(0.4,0,0,0.4,2.96,0.4)"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       inkscape:isstock="true"
-       style="overflow:visible"
-       id="marker6389"
-       refX="0"
-       refY="0"
-       orient="auto"
-       inkscape:stockid="Arrow2Mend">
-      <path
-         transform="scale(-0.6)"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         id="path6387"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       inkscape:stockid="TriangleOutM"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="TriangleOutM"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         id="path4107"
-         d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#008000;fill-opacity:1;fill-rule:evenodd;stroke:#008000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="scale(0.4)"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       inkscape:stockid="TriangleOutL"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker5333"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         id="path5335"
-         d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="scale(0.8)"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       inkscape:stockid="TriangleOutL"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="TriangleOutL"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         id="path5049"
-         d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="scale(0.8)"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       inkscape:stockid="DotS"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="DotS"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         id="path9326"
-         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
-         style="fill:#404040;fill-opacity:1;fill-rule:evenodd;stroke:#404040;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="matrix(0.2,0,0,0.2,1.48,0.2)"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mstart"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow2Mstart"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         id="path9283"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(0.6)"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         id="path9097"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker8935"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         id="path8937"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker8781"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         id="path8783"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Lend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow2Lend"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         id="path4700"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       inkscape:stockid="EmptyTriangleOutL"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="EmptyTriangleOutL"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         id="path4502"
-         d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="matrix(0.8,0,0,0.8,-4.8,0)"
-         inkscape:connector-curvature="0" />
-    </marker>
-    <marker
-       inkscape:stockid="EmptyTriangleOutL"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="EmptyTriangleOutL-4"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4502-7"
-         d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="matrix(0.8,0,0,0.8,-4.8,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="EmptyTriangleOutL"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="EmptyTriangleOutL-4-4"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4502-7-5"
-         d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="matrix(0.8,0,0,0.8,-4.8,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="EmptyTriangleOutL"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="EmptyTriangleOutL-7"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4502-5"
-         d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="matrix(0.8,0,0,0.8,-4.8,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="EmptyTriangleOutL"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="EmptyTriangleOutL-6"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4502-2"
-         d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="matrix(0.8,0,0,0.8,-4.8,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="EmptyTriangleOutL"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="EmptyTriangleOutL-78"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4502-57"
-         d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="matrix(0.8,0,0,0.8,-4.8,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="EmptyTriangleOutL"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="EmptyTriangleOutL-1"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4502-8"
-         d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="matrix(0.8,0,0,0.8,-4.8,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="EmptyTriangleOutL"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="EmptyTriangleOutL-9"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4502-75"
-         d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="matrix(0.8,0,0,0.8,-4.8,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="EmptyTriangleOutL"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="EmptyTriangleOutL-78-2"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4502-57-6"
-         d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="matrix(0.8,0,0,0.8,-4.8,0)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow2Mend-3"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4369-6"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow2Mend-3-5"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4369-6-3"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow2Mend-3-5-6"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4369-6-3-2"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow2Mend-3-5-6-1"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4369-6-3-2-2"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow2Mend-3-5-7"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4369-6-3-0"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow2Mend-3-9"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4369-6-36"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow2Mend-31"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4369-9"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow2Mend-3-5-7-5"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4369-6-3-0-4"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow2Mend-3-9-6"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4369-6-36-5"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow2Mend-3-5-7-3"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4369-6-3-0-5"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-3"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-1"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-3-7"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-1-8"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-6"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-1"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-6-6"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-1-3"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-2"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-0"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-6-5"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-1-5"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-4"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-7"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-6-5-9"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-1-5-3"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-6-5-4"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-1-5-5"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-6-5-4-4"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-1-5-5-3"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-6-5-4-4-9"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-1-5-5-3-2"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-6-5-4-4-6"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-1-5-5-3-8"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-6-5-4-4-2"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-1-5-5-3-7"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-3"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-6"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-6-5-4-4-6-5"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-1-5-5-3-8-3"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-27"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-09"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-27-6"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-09-2"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-27-0"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-09-23"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-9"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-7"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-4-6"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-7-7"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-4-6-3"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-7-7-5"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-4-6-2"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-7-7-9"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="TriangleOutM"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="TriangleOutM-2"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path4107-7"
-         d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#008000;fill-opacity:1;fill-rule:evenodd;stroke:#008000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="scale(0.4)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mstart"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="Arrow2Mstart-9"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9283-3"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="Arrow2Mend"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker9095-1-5-4-6-6"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path9097-2-0-7-7-0"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
-         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
-         transform="scale(-0.6)" />
-    </marker>
-    <marker
-       inkscape:stockid="DotM"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="DotM-3"
-       style="overflow:visible"
-       inkscape:isstock="true">
-      <path
-         inkscape:connector-curvature="0"
-         id="path1795-7"
-         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="matrix(0.4,0,0,0.4,2.96,0.4)" />
-    </marker>
-    <marker
-       inkscape:isstock="true"
-       style="overflow:visible"
-       id="marker2713-9"
-       refX="0"
-       refY="0"
-       orient="auto"
-       inkscape:stockid="DotM">
-      <path
-         inkscape:connector-curvature="0"
-         transform="matrix(0.4,0,0,0.4,2.96,0.4)"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
-         id="path2711-2" />
-    </marker>
-    <marker
-       inkscape:stockid="DotM"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="DotM-2"
-       style="overflow:visible"
-       inkscape:isstock="true"
-       inkscape:collect="always">
-      <path
-         inkscape:connector-curvature="0"
-         id="path1795-8"
-         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="matrix(0.4,0,0,0.4,2.96,0.4)" />
-    </marker>
-    <marker
-       inkscape:isstock="true"
-       style="overflow:visible"
-       id="marker2713-3"
-       refX="0"
-       refY="0"
-       orient="auto"
-       inkscape:stockid="DotM">
-      <path
-         inkscape:connector-curvature="0"
-         transform="matrix(0.4,0,0,0.4,2.96,0.4)"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
-         id="path2711-6" />
-    </marker>
-    <marker
-       inkscape:stockid="DotM"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="DotM-1"
-       style="overflow:visible"
-       inkscape:isstock="true"
-       inkscape:collect="always">
-      <path
-         inkscape:connector-curvature="0"
-         id="path1795-2"
-         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="matrix(0.4,0,0,0.4,2.96,0.4)" />
-    </marker>
-    <marker
-       inkscape:isstock="true"
-       style="overflow:visible"
-       id="marker2713-94"
-       refX="0"
-       refY="0"
-       orient="auto"
-       inkscape:stockid="DotM">
-      <path
-         inkscape:connector-curvature="0"
-         transform="matrix(0.4,0,0,0.4,2.96,0.4)"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
-         id="path2711-7" />
-    </marker>
-    <marker
-       inkscape:stockid="DotM"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="DotM-8"
-       style="overflow:visible"
-       inkscape:isstock="true"
-       inkscape:collect="always">
-      <path
-         inkscape:connector-curvature="0"
-         id="path1795-4"
-         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="matrix(0.4,0,0,0.4,2.96,0.4)" />
-    </marker>
-    <marker
-       inkscape:isstock="true"
-       style="overflow:visible"
-       id="marker2713-36"
-       refX="0"
-       refY="0"
-       orient="auto"
-       inkscape:stockid="DotM">
-      <path
-         inkscape:connector-curvature="0"
-         transform="matrix(0.4,0,0,0.4,2.96,0.4)"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
-         id="path2711-1" />
-    </marker>
-    <marker
-       inkscape:isstock="true"
-       style="overflow:visible"
-       id="marker6165-5"
-       refX="0"
-       refY="0"
-       orient="auto"
-       inkscape:stockid="DotM">
-      <path
-         transform="matrix(0.4,0,0,0.4,2.96,0.4)"
-         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
-         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
-         id="path6163-5"
-         inkscape:connector-curvature="0" />
-    </marker>
-  </defs>
-  <sodipodi:namedview
-     showguides="true"
-     inkscape:window-maximized="1"
-     inkscape:window-y="0"
-     inkscape:window-x="0"
-     inkscape:window-height="1015"
-     inkscape:window-width="1920"
-     showgrid="true"
-     inkscape:current-layer="layer1"
-     inkscape:document-units="px"
-     inkscape:cy="214.66765"
-     inkscape:cx="-167.56857"
-     inkscape:zoom="0.70710678"
-     inkscape:pageshadow="2"
-     inkscape:pageopacity="0.0"
-     borderopacity="1.0"
-     bordercolor="#666666"
-     pagecolor="#ffffff"
-     id="base"
-     inkscape:snap-to-guides="true"
-     inkscape:snap-grids="true"
-     inkscape:snap-bbox="false"
-     inkscape:object-nodes="true"
-     fit-margin-top="5"
-     fit-margin-left="5"
-     fit-margin-right="5"
-     fit-margin-bottom="5">
-    <inkscape:grid
-       type="xygrid"
-       id="grid4451"
-       originx="-93.377219"
-       originy="-347.2523" />
-  </sodipodi:namedview>
-  <metadata
-     id="metadata7">
-    <rdf:RDF>
-      <cc:Work
-         rdf:about="">
-        <dc:format>image/svg+xml</dc:format>
-        <dc:type
-           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
-        <dc:title></dc:title>
-        <dc:creator>
-          <cc:Agent>
-            <dc:title>Luca Ceresoli</dc:title>
-          </cc:Agent>
-        </dc:creator>
-        <dc:date>2020</dc:date>
-        <cc:license
-           rdf:resource="http://creativecommons.org/licenses/by-sa/4.0/" />
-      </cc:Work>
-      <cc:License
-         rdf:about="http://creativecommons.org/licenses/by-sa/4.0/">
-        <cc:permits
-           rdf:resource="http://creativecommons.org/ns#Reproduction" />
-        <cc:permits
-           rdf:resource="http://creativecommons.org/ns#Distribution" />
-        <cc:requires
-           rdf:resource="http://creativecommons.org/ns#Notice" />
-        <cc:requires
-           rdf:resource="http://creativecommons.org/ns#Attribution" />
-        <cc:permits
-           rdf:resource="http://creativecommons.org/ns#DerivativeWorks" />
-        <cc:requires
-           rdf:resource="http://creativecommons.org/ns#ShareAlike" />
-      </cc:License>
-    </rdf:RDF>
-  </metadata>
-  <g
-     inkscape:label="Livello 1"
-     inkscape:groupmode="layer"
-     id="layer1"
-     transform="translate(-93.377215,-444.09395)">
-    <rect
-       style="opacity:1;fill:#ffb9b9;fill-opacity:1;stroke:#f00000;stroke-width:2.8125;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
-       id="rect4424-3-2-9-7"
-       width="112.5"
-       height="113.75008"
-       x="112.5"
-       y="471.11221"
-       rx="0"
-       ry="0" />
-    <text
-       xml:space="preserve"
-       style="font-style:normal;font-weight:normal;line-height:0%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
-       x="167.5354"
-       y="521.46259"
-       id="text4349"><tspan
-         sodipodi:role="line"
-         x="167.5354"
-         y="521.46259"
-         style="font-size:25px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle"
-         id="tspan1273">I2C</tspan><tspan
-         sodipodi:role="line"
-         x="167.5354"
-         y="552.71259"
-         style="font-size:25px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle"
-         id="tspan1285">Master</tspan></text>
-    <rect
-       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#b9ffb9;fill-opacity:1;fill-rule:nonzero;stroke:#006400;stroke-width:2.8125;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
-       id="rect4424-3-2-9-7-3-3-5-3"
-       width="112.49998"
-       height="112.50001"
-       x="262.5"
-       y="471.11218"
-       rx="0"
-       ry="0" />
-    <path
-       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968767;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
-       d="m 112.50002,639.86223 c 712.50002,0 712.50002,0 712.50002,0"
-       id="path4655-9-3-65-5-6"
-       inkscape:connector-curvature="0" />
-    <text
-       xml:space="preserve"
-       style="font-style:normal;font-weight:normal;font-size:12px;line-height:0%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
-       x="318.59131"
-       y="520.83752"
-       id="text4349-26"><tspan
-         sodipodi:role="line"
-         x="318.59131"
-         y="520.83752"
-         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
-         id="tspan1273-8">I2C</tspan><tspan
-         sodipodi:role="line"
-         x="318.59131"
-         y="552.08752"
-         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
-         id="tspan1287">Slave</tspan></text>
-    <path
-       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968767;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
-       d="m 112.49995,677.36223 c 712.50005,0 712.50005,0 712.50005,0"
-       id="path4655-9-3-65-5-6-2"
-       inkscape:connector-curvature="0" />
-    <text
-       xml:space="preserve"
-       style="font-style:normal;font-weight:normal;font-size:12px;line-height:0%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
-       x="861.07312"
-       y="687.03937"
-       id="text4349-7"><tspan
-         sodipodi:role="line"
-         x="861.07312"
-         y="687.03937"
-         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;writing-mode:lr-tb;direction:ltr;text-anchor:middle;stroke-width:1px"
-         id="tspan1285-9">SCL</tspan></text>
-    <flowRoot
-       xml:space="preserve"
-       id="flowRoot1627"
-       style="font-style:normal;font-weight:normal;font-size:40px;line-height:125%;font-family:Sans;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"><flowRegion
-         id="flowRegion1629"><rect
-           id="rect1631"
-           width="220"
-           height="120"
-           x="140"
-           y="-126.29921" /></flowRegion><flowPara
-         id="flowPara1633" /></flowRoot>    <text
-       xml:space="preserve"
-       style="font-style:normal;font-weight:normal;font-size:12px;line-height:0%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
-       x="863.31921"
-       y="648.96735"
-       id="text4349-7-3"><tspan
-         sodipodi:role="line"
-         x="863.31921"
-         y="648.96735"
-         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
-         id="tspan1285-9-6">SDA</tspan></text>
-    <rect
-       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#b9ffb9;fill-opacity:1;fill-rule:nonzero;stroke:#006400;stroke-width:2.8125;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
-       id="rect4424-3-2-9-7-3-3-5-3-0"
-       width="112.49998"
-       height="112.50002"
-       x="412.5"
-       y="471.11215"
-       rx="0"
-       ry="0" />
-    <text
-       xml:space="preserve"
-       style="font-style:normal;font-weight:normal;font-size:12px;line-height:0%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
-       x="468.59131"
-       y="520.83746"
-       id="text4349-26-6"><tspan
-         sodipodi:role="line"
-         x="468.59131"
-         y="520.83746"
-         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
-         id="tspan1273-8-2">I2C</tspan><tspan
-         sodipodi:role="line"
-         x="468.59131"
-         y="552.08746"
-         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
-         id="tspan1287-6">Slave</tspan></text>
-    <rect
-       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#b9ffb9;fill-opacity:1;fill-rule:nonzero;stroke:#006400;stroke-width:2.8125;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
-       id="rect4424-3-2-9-7-3-3-5-3-1"
-       width="112.49998"
-       height="112.50002"
-       x="562.5"
-       y="471.11215"
-       rx="0"
-       ry="0" />
-    <text
-       xml:space="preserve"
-       style="font-style:normal;font-weight:normal;font-size:12px;line-height:0%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
-       x="618.59131"
-       y="520.83746"
-       id="text4349-26-8"><tspan
-         sodipodi:role="line"
-         x="618.59131"
-         y="520.83746"
-         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
-         id="tspan1273-8-7">I2C</tspan><tspan
-         sodipodi:role="line"
-         x="618.59131"
-         y="552.08746"
-         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
-         id="tspan1287-9">Slave</tspan></text>
-    <path
-       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#DotM)"
-       d="m 150,583.61221 v 93.75"
-       id="path4655-9-3-65-5-6-20"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker2713)"
-       d="m 187.5,583.61221 v 56.25"
-       id="path4655-9-3-65-5-6-20-2"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#DotM-2)"
-       d="m 300,583.61221 v 93.75"
-       id="path4655-9-3-65-5-6-20-9"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker2713-9)"
-       d="m 337.5,583.61221 v 56.25"
-       id="path4655-9-3-65-5-6-20-2-7"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#DotM-1)"
-       d="m 450,583.61221 v 93.75"
-       id="path4655-9-3-65-5-6-20-93"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker2713-3)"
-       d="m 487.5,583.61221 v 56.25"
-       id="path4655-9-3-65-5-6-20-2-1"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#DotM-8)"
-       d="m 600,583.61221 v 93.75"
-       id="path4655-9-3-65-5-6-20-5"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker2713-94)"
-       d="m 637.5,583.61221 v 56.25"
-       id="path4655-9-3-65-5-6-20-2-0"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker-start:url(#marker6165);marker-end:url(#marker6165)"
-       d="m 750,471.11221 v 28.125 l 9.375,9.375 -18.74999,18.75 18.74999,18.75 -18.74999,18.75 18.74999,18.75 -9.375,9.375 v 28.125 0 0 56.25"
-       id="path6135"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cccccccccccc" />
-    <path
-       style="opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker-start:url(#marker8861);marker-end:url(#marker6165-5)"
-       d="m 787.49999,471.11221 v 28.125 l 9.375,9.375 -18.74999,18.75 18.74999,18.75 -18.74999,18.75 18.74999,18.75 -9.375,9.375 v 28.125 0 0 18.75001"
-       id="path6135-4"
-       inkscape:connector-curvature="0"
-       sodipodi:nodetypes="cccccccccccc" />
-    <path
-       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968719;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
-       d="m 712.5,471.11221 c 112.49999,0 112.49999,0 112.49999,0"
-       id="path4655-9-3-65-5-6-7"
-       inkscape:connector-curvature="0" />
-    <text
-       xml:space="preserve"
-       style="font-style:normal;font-weight:normal;font-size:12px;line-height:0%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
-       x="859.94275"
-       y="480.03558"
-       id="text4349-7-3-6"><tspan
-         sodipodi:role="line"
-         x="859.94275"
-         y="480.03558"
-         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
-         id="tspan1285-9-6-5">V<tspan
-   style="font-size:64.99999762%;baseline-shift:sub"
-   id="tspan9307">DD</tspan></tspan></text>
-  </g>
-</svg>
diff --git a/Documentation/i2c/i2c_bus.svg b/Documentation/i2c/i2c_bus.svg
new file mode 100644
index 000000000000..3170de976373
--- /dev/null
+++ b/Documentation/i2c/i2c_bus.svg
@@ -0,0 +1,1341 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   sodipodi:docname="i2c_bus.svg"
+   inkscape:version="0.92.3 (2405546, 2018-03-11)"
+   version="1.1"
+   id="svg2"
+   viewBox="0 0 813.34215 261.01596"
+   height="73.664505mm"
+   width="229.54323mm">
+  <defs
+     id="defs4">
+    <marker
+       inkscape:stockid="DotM"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker8861"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path8859"
+         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(0.4,0,0,0.4,2.96,0.4)" />
+    </marker>
+    <marker
+       inkscape:isstock="true"
+       style="overflow:visible"
+       id="marker6165"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="DotM"
+       inkscape:collect="always">
+      <path
+         transform="matrix(0.4,0,0,0.4,2.96,0.4)"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
+         id="path6163"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:isstock="true"
+       style="overflow:visible"
+       id="marker2713"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="DotM">
+      <path
+         transform="matrix(0.4,0,0,0.4,2.96,0.4)"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
+         id="path2711"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="DotM"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="DotM"
+       style="overflow:visible"
+       inkscape:isstock="true"
+       inkscape:collect="always">
+      <path
+         id="path1795"
+         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(0.4,0,0,0.4,2.96,0.4)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:isstock="true"
+       style="overflow:visible"
+       id="marker6389"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="Arrow2Mend">
+      <path
+         transform="scale(-0.6)"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         id="path6387"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="TriangleOutM"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="TriangleOutM"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         id="path4107"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         style="fill:#008000;fill-opacity:1;fill-rule:evenodd;stroke:#008000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="scale(0.4)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="TriangleOutL"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker5333"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         id="path5335"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="scale(0.8)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="TriangleOutL"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="TriangleOutL"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         id="path5049"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="scale(0.8)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="DotS"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="DotS"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         id="path9326"
+         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
+         style="fill:#404040;fill-opacity:1;fill-rule:evenodd;stroke:#404040;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(0.2,0,0,0.2,1.48,0.2)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mstart"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Mstart"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         id="path9283"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(0.6)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         id="path9097"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker8935"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         id="path8937"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker8781"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         id="path8783"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Lend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Lend"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         id="path4700"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="matrix(-1.1,0,0,-1.1,-1.1,0)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="EmptyTriangleOutL"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="EmptyTriangleOutL"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         id="path4502"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(0.8,0,0,0.8,-4.8,0)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <marker
+       inkscape:stockid="EmptyTriangleOutL"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="EmptyTriangleOutL-4"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4502-7"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(0.8,0,0,0.8,-4.8,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="EmptyTriangleOutL"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="EmptyTriangleOutL-4-4"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4502-7-5"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(0.8,0,0,0.8,-4.8,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="EmptyTriangleOutL"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="EmptyTriangleOutL-7"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4502-5"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(0.8,0,0,0.8,-4.8,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="EmptyTriangleOutL"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="EmptyTriangleOutL-6"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4502-2"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(0.8,0,0,0.8,-4.8,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="EmptyTriangleOutL"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="EmptyTriangleOutL-78"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4502-57"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(0.8,0,0,0.8,-4.8,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="EmptyTriangleOutL"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="EmptyTriangleOutL-1"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4502-8"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(0.8,0,0,0.8,-4.8,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="EmptyTriangleOutL"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="EmptyTriangleOutL-9"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4502-75"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(0.8,0,0,0.8,-4.8,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="EmptyTriangleOutL"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="EmptyTriangleOutL-78-2"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4502-57-6"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         style="fill:#ffffff;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(0.8,0,0,0.8,-4.8,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Mend-3"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4369-6"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Mend-3-5"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4369-6-3"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Mend-3-5-6"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4369-6-3-2"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Mend-3-5-6-1"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4369-6-3-2-2"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Mend-3-5-7"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4369-6-3-0"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Mend-3-9"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4369-6-36"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Mend-31"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4369-9"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Mend-3-5-7-5"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4369-6-3-0-4"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Mend-3-9-6"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4369-6-36-5"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Mend-3-5-7-3"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4369-6-3-0-5"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-3"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-1"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-3-7"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-1-8"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-6"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-1"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-6-6"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-1-3"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-2"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-0"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-6-5"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-1-5"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-4"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-7"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-6-5-9"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-1-5-3"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-6-5-4"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-1-5-5"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-6-5-4-4"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-1-5-5-3"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-6-5-4-4-9"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-1-5-5-3-2"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-6-5-4-4-6"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-1-5-5-3-8"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-6-5-4-4-2"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-1-5-5-3-7"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-3"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-6"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-6-5-4-4-6-5"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-1-5-5-3-8-3"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-27"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-09"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-27-6"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-09-2"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-27-0"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-09-23"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-9"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-7"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-4-6"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-7-7"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-4-6-3"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-7-7-5"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-4-6-2"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-7-7-9"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="TriangleOutM"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="TriangleOutM-2"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4107-7"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         style="fill:#008000;fill-opacity:1;fill-rule:evenodd;stroke:#008000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="scale(0.4)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mstart"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="Arrow2Mstart-9"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9283-3"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker9095-1-5-4-6-6"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path9097-2-0-7-7-0"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.625;stroke-linejoin:round;stroke-opacity:1"
+         d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
+         transform="scale(-0.6)" />
+    </marker>
+    <marker
+       inkscape:stockid="DotM"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="DotM-3"
+       style="overflow:visible"
+       inkscape:isstock="true">
+      <path
+         inkscape:connector-curvature="0"
+         id="path1795-7"
+         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(0.4,0,0,0.4,2.96,0.4)" />
+    </marker>
+    <marker
+       inkscape:isstock="true"
+       style="overflow:visible"
+       id="marker2713-9"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="DotM">
+      <path
+         inkscape:connector-curvature="0"
+         transform="matrix(0.4,0,0,0.4,2.96,0.4)"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
+         id="path2711-2" />
+    </marker>
+    <marker
+       inkscape:stockid="DotM"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="DotM-2"
+       style="overflow:visible"
+       inkscape:isstock="true"
+       inkscape:collect="always">
+      <path
+         inkscape:connector-curvature="0"
+         id="path1795-8"
+         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(0.4,0,0,0.4,2.96,0.4)" />
+    </marker>
+    <marker
+       inkscape:isstock="true"
+       style="overflow:visible"
+       id="marker2713-3"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="DotM">
+      <path
+         inkscape:connector-curvature="0"
+         transform="matrix(0.4,0,0,0.4,2.96,0.4)"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
+         id="path2711-6" />
+    </marker>
+    <marker
+       inkscape:stockid="DotM"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="DotM-1"
+       style="overflow:visible"
+       inkscape:isstock="true"
+       inkscape:collect="always">
+      <path
+         inkscape:connector-curvature="0"
+         id="path1795-2"
+         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(0.4,0,0,0.4,2.96,0.4)" />
+    </marker>
+    <marker
+       inkscape:isstock="true"
+       style="overflow:visible"
+       id="marker2713-94"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="DotM">
+      <path
+         inkscape:connector-curvature="0"
+         transform="matrix(0.4,0,0,0.4,2.96,0.4)"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
+         id="path2711-7" />
+    </marker>
+    <marker
+       inkscape:stockid="DotM"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="DotM-8"
+       style="overflow:visible"
+       inkscape:isstock="true"
+       inkscape:collect="always">
+      <path
+         inkscape:connector-curvature="0"
+         id="path1795-4"
+         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="matrix(0.4,0,0,0.4,2.96,0.4)" />
+    </marker>
+    <marker
+       inkscape:isstock="true"
+       style="overflow:visible"
+       id="marker2713-36"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="DotM">
+      <path
+         inkscape:connector-curvature="0"
+         transform="matrix(0.4,0,0,0.4,2.96,0.4)"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
+         id="path2711-1" />
+    </marker>
+    <marker
+       inkscape:isstock="true"
+       style="overflow:visible"
+       id="marker6165-5"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="DotM">
+      <path
+         transform="matrix(0.4,0,0,0.4,2.96,0.4)"
+         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt;stroke-opacity:1"
+         d="m -2.5,-1 c 0,2.76 -2.24,5 -5,5 -2.76,0 -5,-2.24 -5,-5 0,-2.76 2.24,-5 5,-5 2.76,0 5,2.24 5,5 z"
+         id="path6163-5"
+         inkscape:connector-curvature="0" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     showguides="true"
+     inkscape:window-maximized="1"
+     inkscape:window-y="0"
+     inkscape:window-x="0"
+     inkscape:window-height="1015"
+     inkscape:window-width="1920"
+     showgrid="true"
+     inkscape:current-layer="layer1"
+     inkscape:document-units="px"
+     inkscape:cy="214.66765"
+     inkscape:cx="-167.56857"
+     inkscape:zoom="0.70710678"
+     inkscape:pageshadow="2"
+     inkscape:pageopacity="0.0"
+     borderopacity="1.0"
+     bordercolor="#666666"
+     pagecolor="#ffffff"
+     id="base"
+     inkscape:snap-to-guides="true"
+     inkscape:snap-grids="true"
+     inkscape:snap-bbox="false"
+     inkscape:object-nodes="true"
+     fit-margin-top="5"
+     fit-margin-left="5"
+     fit-margin-right="5"
+     fit-margin-bottom="5">
+    <inkscape:grid
+       type="xygrid"
+       id="grid4451"
+       originx="-93.377219"
+       originy="-347.2523" />
+  </sodipodi:namedview>
+  <metadata
+     id="metadata7">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+        <dc:creator>
+          <cc:Agent>
+            <dc:title>Luca Ceresoli</dc:title>
+          </cc:Agent>
+        </dc:creator>
+        <dc:date>2020</dc:date>
+        <cc:license
+           rdf:resource="http://creativecommons.org/licenses/by-sa/4.0/" />
+      </cc:Work>
+      <cc:License
+         rdf:about="http://creativecommons.org/licenses/by-sa/4.0/">
+        <cc:permits
+           rdf:resource="http://creativecommons.org/ns#Reproduction" />
+        <cc:permits
+           rdf:resource="http://creativecommons.org/ns#Distribution" />
+        <cc:requires
+           rdf:resource="http://creativecommons.org/ns#Notice" />
+        <cc:requires
+           rdf:resource="http://creativecommons.org/ns#Attribution" />
+        <cc:permits
+           rdf:resource="http://creativecommons.org/ns#DerivativeWorks" />
+        <cc:requires
+           rdf:resource="http://creativecommons.org/ns#ShareAlike" />
+      </cc:License>
+    </rdf:RDF>
+  </metadata>
+  <g
+     inkscape:label="Livello 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(-93.377215,-444.09395)">
+    <rect
+       style="opacity:1;fill:#ffb9b9;fill-opacity:1;stroke:#f00000;stroke-width:2.8125;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       id="rect4424-3-2-9-7"
+       width="112.5"
+       height="113.75008"
+       x="112.5"
+       y="471.11221"
+       rx="0"
+       ry="0" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;line-height:0%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       x="167.5354"
+       y="521.46259"
+       id="text4349"><tspan
+         sodipodi:role="line"
+         x="167.5354"
+         y="521.46259"
+         style="font-size:25px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle"
+         id="tspan1273">I2C</tspan><tspan
+         sodipodi:role="line"
+         x="167.5354"
+         y="552.71259"
+         style="font-size:25px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle"
+         id="tspan1285">Master</tspan></text>
+    <rect
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;fill:#b9ffb9;fill-opacity:1;fill-rule:nonzero;stroke:#006400;stroke-width:2.8125;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       id="rect4424-3-2-9-7-3-3-5-3"
+       width="112.49998"
+       height="112.50001"
+       x="262.5"
+       y="471.11218"
+       rx="0"
+       ry="0" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968767;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="m 112.50002,639.86223 c 712.50002,0 712.50002,0 712.50002,0"
+       id="path4655-9-3-65-5-6"
+       inkscape:connector-curvature="0" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:12px;line-height:0%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       x="318.59131"
+       y="520.83752"
+       id="text4349-26"><tspan
+         sodipodi:role="line"
+         x="318.59131"
+         y="520.83752"
+         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
+         id="tspan1273-8">I2C</tspan><tspan
+         sodipodi:role="line"
+         x="318.59131"
+         y="552.08752"
+         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
+         id="tspan1287">Slave</tspan></text>
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968767;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="m 112.49995,677.36223 c 712.50005,0 712.50005,0 712.50005,0"
+       id="path4655-9-3-65-5-6-2"
+       inkscape:connector-curvature="0" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:12px;line-height:0%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       x="861.07312"
+       y="687.03937"
+       id="text4349-7"><tspan
+         sodipodi:role="line"
+         x="861.07312"
+         y="687.03937"
+         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;writing-mode:lr-tb;direction:ltr;text-anchor:middle;stroke-width:1px"
+         id="tspan1285-9">SCL</tspan></text>
+    <flowRoot
+       xml:space="preserve"
+       id="flowRoot1627"
+       style="font-style:normal;font-weight:normal;font-size:40px;line-height:125%;font-family:Sans;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"><flowRegion
+         id="flowRegion1629"><rect
+           id="rect1631"
+           width="220"
+           height="120"
+           x="140"
+           y="-126.29921" /></flowRegion><flowPara
+         id="flowPara1633" /></flowRoot>    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:12px;line-height:0%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       x="863.31921"
+       y="648.96735"
+       id="text4349-7-3"><tspan
+         sodipodi:role="line"
+         x="863.31921"
+         y="648.96735"
+         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
+         id="tspan1285-9-6">SDA</tspan></text>
+    <rect
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#b9ffb9;fill-opacity:1;fill-rule:nonzero;stroke:#006400;stroke-width:2.8125;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       id="rect4424-3-2-9-7-3-3-5-3-0"
+       width="112.49998"
+       height="112.50002"
+       x="412.5"
+       y="471.11215"
+       rx="0"
+       ry="0" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:12px;line-height:0%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       x="468.59131"
+       y="520.83746"
+       id="text4349-26-6"><tspan
+         sodipodi:role="line"
+         x="468.59131"
+         y="520.83746"
+         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
+         id="tspan1273-8-2">I2C</tspan><tspan
+         sodipodi:role="line"
+         x="468.59131"
+         y="552.08746"
+         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
+         id="tspan1287-6">Slave</tspan></text>
+    <rect
+       style="color:#000000;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#b9ffb9;fill-opacity:1;fill-rule:nonzero;stroke:#006400;stroke-width:2.8125;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
+       id="rect4424-3-2-9-7-3-3-5-3-1"
+       width="112.49998"
+       height="112.50002"
+       x="562.5"
+       y="471.11215"
+       rx="0"
+       ry="0" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:12px;line-height:0%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       x="618.59131"
+       y="520.83746"
+       id="text4349-26-8"><tspan
+         sodipodi:role="line"
+         x="618.59131"
+         y="520.83746"
+         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
+         id="tspan1273-8-7">I2C</tspan><tspan
+         sodipodi:role="line"
+         x="618.59131"
+         y="552.08746"
+         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
+         id="tspan1287-9">Slave</tspan></text>
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#DotM)"
+       d="m 150,583.61221 v 93.75"
+       id="path4655-9-3-65-5-6-20"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cc" />
+    <path
+       style="opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker2713)"
+       d="m 187.5,583.61221 v 56.25"
+       id="path4655-9-3-65-5-6-20-2"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cc" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#DotM-2)"
+       d="m 300,583.61221 v 93.75"
+       id="path4655-9-3-65-5-6-20-9"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cc" />
+    <path
+       style="opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker2713-9)"
+       d="m 337.5,583.61221 v 56.25"
+       id="path4655-9-3-65-5-6-20-2-7"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cc" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#DotM-1)"
+       d="m 450,583.61221 v 93.75"
+       id="path4655-9-3-65-5-6-20-93"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cc" />
+    <path
+       style="opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker2713-3)"
+       d="m 487.5,583.61221 v 56.25"
+       id="path4655-9-3-65-5-6-20-2-1"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cc" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#DotM-8)"
+       d="m 600,583.61221 v 93.75"
+       id="path4655-9-3-65-5-6-20-5"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cc" />
+    <path
+       style="opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker2713-94)"
+       d="m 637.5,583.61221 v 56.25"
+       id="path4655-9-3-65-5-6-20-2-0"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cc" />
+    <path
+       style="opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker-start:url(#marker6165);marker-end:url(#marker6165)"
+       d="m 750,471.11221 v 28.125 l 9.375,9.375 -18.74999,18.75 18.74999,18.75 -18.74999,18.75 18.74999,18.75 -9.375,9.375 v 28.125 0 0 56.25"
+       id="path6135"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cccccccccccc" />
+    <path
+       style="opacity:1;vector-effect:none;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968743;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker-start:url(#marker8861);marker-end:url(#marker6165-5)"
+       d="m 787.49999,471.11221 v 28.125 l 9.375,9.375 -18.74999,18.75 18.74999,18.75 -18.74999,18.75 18.74999,18.75 -9.375,9.375 v 28.125 0 0 18.75001"
+       id="path6135-4"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cccccccccccc" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1.99968719;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="m 712.5,471.11221 c 112.49999,0 112.49999,0 112.49999,0"
+       id="path4655-9-3-65-5-6-7"
+       inkscape:connector-curvature="0" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:12px;line-height:0%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       x="859.94275"
+       y="480.03558"
+       id="text4349-7-3-6"><tspan
+         sodipodi:role="line"
+         x="859.94275"
+         y="480.03558"
+         style="font-size:25.00000191px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:1px"
+         id="tspan1285-9-6-5">V<tspan
+   style="font-size:64.99999762%;baseline-shift:sub"
+   id="tspan9307">DD</tspan></tspan></text>
+  </g>
+</svg>
diff --git a/Documentation/i2c/summary.rst b/Documentation/i2c/summary.rst
index ce7230025b33..136c4e333be7 100644
--- a/Documentation/i2c/summary.rst
+++ b/Documentation/i2c/summary.rst
@@ -34,7 +34,7 @@ Terminology
 Using the terminology from the official documentation, the I2C bus connects
 one or more *master* chips and one or more *slave* chips.
 
-.. kernel-figure::  i2c.svg
+.. kernel-figure::  i2c_bus.svg
    :alt:    Simple I2C bus with one master and 3 slaves
 
    Simple I2C bus
-- 
cgit v1.2.3


From baeb2d5cb8ea84dfb932cbde0e2adcd527e51bb5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:53 +0200
Subject: docs: Makefile: place final pdf docs on a separate dir

The Sphinx build system for PDF is too complex and generate
lots of ancillary files, including one PDF file for each
image.

So, at the end, the main latex dir has 156 pdf files, instead
of the 71 ones that would match each generated book. That's
confusing and it makes harder to identify when something didn't
work.

So, instead, let's move the final PDF output(s) to a separate
dir. This way, the latex/ dir will have the temporary and the
final *.tex files, while the final pdf files that built ok
will be under the pdf/ directory.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/832752cbc9678a6e8d3d634bc3356d655d44684f.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/Makefile | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Documentation/Makefile b/Documentation/Makefile
index cc786d11a028..db1fc35ded50 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -98,7 +98,11 @@ else # HAVE_PDFLATEX
 
 pdfdocs: latexdocs
 	@$(srctree)/scripts/sphinx-pre-install --version-check
-	$(foreach var,$(SPHINXDIRS), $(MAKE) PDFLATEX="$(PDFLATEX)" LATEXOPTS="$(LATEXOPTS)" -C $(BUILDDIR)/$(var)/latex || exit;)
+	$(foreach var,$(SPHINXDIRS), \
+	   $(MAKE) PDFLATEX="$(PDFLATEX)" LATEXOPTS="$(LATEXOPTS)" -C $(BUILDDIR)/$(var)/latex || exit; \
+	   mkdir -p $(BUILDDIR)/$(var)/pdf; \
+	   mv $(subst .tex,.pdf,$(wildcard $(BUILDDIR)/$(var)/latex/*.tex)) $(BUILDDIR)/$(var)/pdf/; \
+	)
 
 endif # HAVE_PDFLATEX
 
-- 
cgit v1.2.3


From 77c34b2c18d4d88aed9ab3407407c492e0ce18b8 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:54 +0200
Subject: docs: dt: rockchip,dwc3.txt: fix a pointer to a renamed file

phy-rockchip-inno-usb2.txt was converted to yaml.

Fix the corresponding reference.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/287bd271f5c542e9d12a132a6b6a17672c9fd67c.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/devicetree/bindings/usb/rockchip,dwc3.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/usb/rockchip,dwc3.txt b/Documentation/devicetree/bindings/usb/rockchip,dwc3.txt
index c8c4b00ecb94..94520493233b 100644
--- a/Documentation/devicetree/bindings/usb/rockchip,dwc3.txt
+++ b/Documentation/devicetree/bindings/usb/rockchip,dwc3.txt
@@ -16,7 +16,7 @@ A child node must exist to represent the core DWC3 IP block. The name of
 the node is not important. The content of the node is defined in dwc3.txt.
 
 Phy documentation is provided in the following places:
-Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.txt - USB2.0 PHY
+Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.yaml - USB2.0 PHY
 Documentation/devicetree/bindings/phy/phy-rockchip-typec.txt     - Type-C PHY
 
 Example device nodes:
-- 
cgit v1.2.3


From a31a6997e6df4bd0d7782dbe945a59c321475669 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:55 +0200
Subject: ata: libata-core: fix a doc warning

The docs toolchain doesn't recognise this pattern:

	@link->[hw_]sata_spd_limit

As it can't really process it. So, instead, let's mark it with
a literal block markup:

	``link->[hw_]sata_spd_limit``

in order to get rid of the following warning:

	./drivers/ata/libata-core.c:5974: WARNING: Unknown target name: "hw".

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/9a21444df75c46095c4b1839d2061d19c9addcff.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 drivers/ata/libata-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index beca5f91bb4c..69361ec43db5 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5209,7 +5209,7 @@ void ata_link_init(struct ata_port *ap, struct ata_link *link, int pmp)
  *	sata_link_init_spd - Initialize link->sata_spd_limit
  *	@link: Link to configure sata_spd_limit for
  *
- *	Initialize @link->[hw_]sata_spd_limit to the currently
+ *	Initialize ``link->[hw_]sata_spd_limit`` to the currently
  *	configured value.
  *
  *	LOCKING:
-- 
cgit v1.2.3


From af690f459393017ce40bb9beef6a00e79511574d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:56 +0200
Subject: firewire: firewire-cdev.hL get rid of a docs warning

This warning:

	./include/uapi/linux/firewire-cdev.h:312: WARNING: Inline literal start-string without end-string.

is because %FOO doesn't work if there's a parenthesis at the
string (as a parenthesis may indicate a function). So, mark
the literal block using the alternate ``FOO`` syntax.

Acked-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/9b2501a41eba27ccdd4603cac2353c0efba7a90a.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/uapi/linux/firewire-cdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/firewire-cdev.h b/include/uapi/linux/firewire-cdev.h
index 1acd2b179aef..7e5b5c10a49c 100644
--- a/include/uapi/linux/firewire-cdev.h
+++ b/include/uapi/linux/firewire-cdev.h
@@ -308,7 +308,7 @@ struct fw_cdev_event_iso_interrupt_mc {
 /**
  * struct fw_cdev_event_iso_resource - Iso resources were allocated or freed
  * @closure:	See &fw_cdev_event_common;
- *		set by %FW_CDEV_IOC_(DE)ALLOCATE_ISO_RESOURCE(_ONCE) ioctl
+ *		set by``FW_CDEV_IOC_(DE)ALLOCATE_ISO_RESOURCE(_ONCE)`` ioctl
  * @type:	%FW_CDEV_EVENT_ISO_RESOURCE_ALLOCATED or
  *		%FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED
  * @handle:	Reference by which an allocated resource can be deallocated
-- 
cgit v1.2.3


From 2b8e8b5599a1831e9a4cf9aa86476887ff425e57 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:57 +0200
Subject: fs: inode.c: get rid of docs warnings

Use *foo makes the toolchain to think that this is an emphasis, causing
those warnings:

	./fs/inode.c:1609: WARNING: Inline emphasis start-string without end-string.
	./fs/inode.c:1609: WARNING: Inline emphasis start-string without end-string.
	./fs/inode.c:1615: WARNING: Inline emphasis start-string without end-string.

So, use, instead, ``*foo``, in order to mark it as a literal block.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/e8da46a0e57f2af6d63a0c53665495075698e28a.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 fs/inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 93d9252a00ab..37226a9cfa4f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1606,14 +1606,14 @@ EXPORT_SYMBOL(iput);
  *	@inode:  inode owning the block number being requested
  *	@block: pointer containing the block to find
  *
- *	Replaces the value in *block with the block number on the device holding
+ *	Replaces the value in ``*block`` with the block number on the device holding
  *	corresponding to the requested block number in the file.
  *	That is, asked for block 4 of inode 1 the function will replace the
- *	4 in *block, with disk block relative to the disk start that holds that
+ *	4 in ``*block``, with disk block relative to the disk start that holds that
  *	block of the file.
  *
  *	Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a
- *	hole, returns 0 and *block is also set to 0.
+ *	hole, returns 0 and ``*block`` is also set to 0.
  */
 int bmap(struct inode *inode, sector_t *block)
 {
-- 
cgit v1.2.3


From 03c109d66867767ddbb0fe09223410a79193f5ea Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:58 +0200
Subject: futex: get rid of a kernel-docs build warning

Adjust whitespaces and blank lines in order to get rid of this:

	./kernel/futex.c:491: WARNING: Definition list ends without a blank line; unexpected unindent.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/57788af7889161483e0c97f91c079cfb3986c4b3.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 kernel/futex.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/futex.c b/kernel/futex.c
index b59532862bc0..b4b9f960b610 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -486,10 +486,13 @@ static u64 get_inode_sequence_number(struct inode *inode)
  * The key words are stored in @key on success.
  *
  * For shared mappings (when @fshared), the key is:
+ *
  *   ( inode->i_sequence, page->index, offset_within_page )
+ *
  * [ also see get_inode_sequence_number() ]
  *
  * For private mappings (or when !@fshared), the key is:
+ *
  *   ( current->mm, address, 0 )
  *
  * This allows (cross process, where applicable) identification of the futex
-- 
cgit v1.2.3


From 4642289b5f6609b1af152a81b603ae8451df2626 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 14 Apr 2020 18:48:59 +0200
Subject: lib: bitmap.c: get rid of some doc warnings

There are two ascii art drawings there. Use a block markup tag there
in order to get rid of those warnings:

	./lib/bitmap.c:189: WARNING: Unexpected indentation.
	./lib/bitmap.c:190: WARNING: Block quote ends without a blank line; unexpected unindent.
	./lib/bitmap.c:190: WARNING: Unexpected indentation.
	./lib/bitmap.c:191: WARNING: Line block ends without a blank line.

It should be noticed that there's actually a syntax violation
right now, as something like:

	/**
	 ...
	 @src:

will be handled as a definition for @src parameter, and not as
part of a diagram. So, we need to add something before it, in
order for this to be processed the way it should.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/1e2568fdfa838c1a0d8cc2a1d70dd4b6de99bfb1.1586881715.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 lib/bitmap.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/lib/bitmap.c b/lib/bitmap.c
index 89260aa342d6..21a7640c5eed 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -182,21 +182,22 @@ EXPORT_SYMBOL(__bitmap_shift_left);
  *
  * In pictures, example for a big-endian 32-bit architecture:
  *
- * @src:
- * 31                                   63
- * |                                    |
- * 10000000 11000001 11110010 00010101  10000000 11000001 01110010 00010101
- *                 |  |              |                                    |
- *                16  14             0                                   32
- *
- * if @cut is 3, and @first is 14, bits 14-16 in @src are cut and @dst is:
- *
- * 31                                   63
- * |                                    |
- * 10110000 00011000 00110010 00010101  00010000 00011000 00101110 01000010
- *                    |              |                                    |
- *                    14 (bit 17     0                                   32
- *                        from @src)
+ * The @src bitmap is::
+ *
+ *   31                                   63
+ *   |                                    |
+ *   10000000 11000001 11110010 00010101  10000000 11000001 01110010 00010101
+ *                   |  |              |                                    |
+ *                  16  14             0                                   32
+ *
+ * if @cut is 3, and @first is 14, bits 14-16 in @src are cut and @dst is::
+ *
+ *   31                                   63
+ *   |                                    |
+ *   10110000 00011000 00110010 00010101  00010000 00011000 00101110 01000010
+ *                      |              |                                    |
+ *                      14 (bit 17     0                                   32
+ *                          from @src)
  *
  * Note that @dst and @src might overlap partially or entirely.
  *
-- 
cgit v1.2.3


From 5d8e5aee0e9391141fe2ab892d0ac98de5783537 Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Sun, 15 Mar 2020 13:26:48 +0100
Subject: docs: sysctl/kernel: document BPF entries

Based on the implementation in kernel/bpf/syscall.c,
kernel/bpf/trampoline.c, include/linux/filter.h, and the documentation
in bpftool-prog.rst.

Signed-off-by: Stephen Kitt <steve@sk2.org>
Link: https://lore.kernel.org/r/20200315122648.20558-1-steve@sk2.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/sysctl/kernel.rst | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 0d427fd10941..55b24eada13c 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -102,6 +102,20 @@ See the ``type_of_loader`` and ``ext_loader_ver`` fields in
 :doc:`/x86/boot` for additional information.
 
 
+bpf_stats_enabled
+=================
+
+Controls whether the kernel should collect statistics on BPF programs
+(total time spent running, number of times run...). Enabling
+statistics causes a slight reduction in performance on each program
+run. The statistics can be seen using ``bpftool``.
+
+= ===================================
+0 Don't collect statistics (default).
+1 Collect statistics.
+= ===================================
+
+
 cap_last_cap
 ============
 
@@ -1178,6 +1192,16 @@ NMI switch that most IA32 servers have fires unknown NMI up, for
 example.  If a system hangs up, try pressing the NMI switch.
 
 
+unprivileged_bpf_disabled
+=========================
+
+Writing 1 to this entry will disable unprivileged calls to ``bpf()``;
+once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` will return
+``-EPERM``.
+
+Once set, this can't be cleared.
+
+
 watchdog
 ========
 
-- 
cgit v1.2.3


From c7e1cc318d4a19549d55a4f114f3a3d472e9531e Mon Sep 17 00:00:00 2001
From: Gal Pressman <galpress@amazon.com>
Date: Mon, 20 Apr 2020 10:41:15 +0300
Subject: dma-buf: Couple of documentation typo fixes

Fix a couple of typos: "as" -> "has" and "int" -> "in".

Signed-off-by: Gal Pressman <galpress@amazon.com>
Link: https://lore.kernel.org/r/20200420074115.23931-1-galpress@amazon.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/dma-buf.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst
index c78db28519f7..63dec76d1d8d 100644
--- a/Documentation/driver-api/dma-buf.rst
+++ b/Documentation/driver-api/dma-buf.rst
@@ -11,7 +11,7 @@ course not limited to GPU use cases.
 The three main components of this are: (1) dma-buf, representing a
 sg_table and exposed to userspace as a file descriptor to allow passing
 between devices, (2) fence, which provides a mechanism to signal when
-one device as finished access, and (3) reservation, which manages the
+one device has finished access, and (3) reservation, which manages the
 shared or exclusive fence(s) associated with the buffer.
 
 Shared DMA Buffers
@@ -31,7 +31,7 @@ The exporter
  - implements and manages operations in :c:type:`struct dma_buf_ops
    <dma_buf_ops>` for the buffer,
  - allows other users to share the buffer by using dma_buf sharing APIs,
- - manages the details of buffer allocation, wrapped int a :c:type:`struct
+ - manages the details of buffer allocation, wrapped in a :c:type:`struct
    dma_buf <dma_buf>`,
  - decides about the actual backing storage where this allocation happens,
  - and takes care of any migration of scatterlist - for all (shared) users of
-- 
cgit v1.2.3


From d8e8ff1fe30278e46afcb3029468e678bbcffddd Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Sat, 18 Apr 2020 20:41:32 +0300
Subject: docs: ioctl-number.rst: add habanalabs driver IOCTL

Habanalabs driver in misc exposes several IOCTLs to userspace. Document the
letter and IOCTLs number range in ioctl-number.rst.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20200418174132.10597-1-oded.gabbay@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/userspace-api/ioctl/ioctl-number.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index f759edafd938..52bf58417653 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -146,6 +146,7 @@ Code  Seq#    Include File                                           Comments
 'H'   40-4F  sound/hdspm.h                                           conflict!
 'H'   40-4F  sound/hdsp.h                                            conflict!
 'H'   90     sound/usb/usx2y/usb_stream.h
+'H'   00-0F  uapi/misc/habanalabs.h                                  conflict!
 'H'   A0     uapi/linux/usb/cdc-wdm.h
 'H'   C0-F0  net/bluetooth/hci.h                                     conflict!
 'H'   C0-DF  net/bluetooth/hidp/hidp.h                               conflict!
-- 
cgit v1.2.3


From 7dbffd3f84b08c6c9f48f211dd7e5b41dade3092 Mon Sep 17 00:00:00 2001
From: Cristian Souza <cristianmsbr@gmail.com>
Date: Fri, 10 Apr 2020 22:02:01 -0300
Subject: docs: admin-guide: Clarify sentences

Changes to make the text more formal and organized. The reasons are now cited and described at the same time.
Minor grammatical problems have also been fixed.

Signed-off-by: Cristian Souza <cristianmsbr@gmail.com>
Link: https://lore.kernel.org/r/20200411010201.GA22706@darkstar
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/init.rst | 76 ++++++++++++++++++--------------------
 1 file changed, 36 insertions(+), 40 deletions(-)

diff --git a/Documentation/admin-guide/init.rst b/Documentation/admin-guide/init.rst
index e89d97f31eaf..41f06a09152e 100644
--- a/Documentation/admin-guide/init.rst
+++ b/Documentation/admin-guide/init.rst
@@ -1,52 +1,48 @@
-Explaining the dreaded "No init found." boot hang message
+Explaining the "No working init found." boot hang message
 =========================================================
+:Authors: Andreas Mohr <andi at lisas period de>
+          Cristian Souza <cristianmsbr at gmail period com>
 
-OK, so you've got this pretty unintuitive message (currently located
-in init/main.c) and are wondering what the H*** went wrong.
-Some high-level reasons for failure (listed roughly in order of execution)
-to load the init binary are:
-
-A) Unable to mount root FS
-B) init binary doesn't exist on rootfs
-C) broken console device
-D) binary exists but dependencies not available
-E) binary cannot be loaded
-
-Detailed explanations:
-
-A) Set "debug" kernel parameter (in bootloader config file or CONFIG_CMDLINE)
-   to get more detailed kernel messages.
-B) make sure you have the correct root FS type
-   (and ``root=`` kernel parameter points to the correct partition),
-   required drivers such as storage hardware (such as SCSI or USB!)
-   and filesystem (ext3, jffs2 etc.) are builtin (alternatively as modules,
-   to be pre-loaded by an initrd)
-C) Possibly a conflict in ``console= setup`` --> initial console unavailable.
-   E.g. some serial consoles are unreliable due to serial IRQ issues (e.g.
-   missing interrupt-based configuration).
+This document provides some high-level reasons for failure
+(listed roughly in order of execution) to load the init binary.
+
+1) **Unable to mount root FS**: Set "debug" kernel parameter (in bootloader
+   config file or CONFIG_CMDLINE) to get more detailed kernel messages.
+
+2) **init binary doesn't exist on rootfs**: Make sure you have the correct
+   root FS type (and ``root=`` kernel parameter points to the correct
+   partition), required drivers such as storage hardware (such as SCSI or
+   USB!) and filesystem (ext3, jffs2, etc.) are builtin (alternatively as
+   modules, to be pre-loaded by an initrd).
+
+3) **Broken console device**: Possibly a conflict in ``console= setup``
+   --> initial console unavailable. E.g. some serial consoles are unreliable
+   due to serial IRQ issues (e.g. missing interrupt-based configuration).
    Try using a different ``console= device`` or e.g. ``netconsole=``.
-D) e.g. required library dependencies of the init binary such as
-   ``/lib/ld-linux.so.2`` missing or broken. Use
-   ``readelf -d <INIT>|grep NEEDED`` to find out which libraries are required.
-E) make sure the binary's architecture matches your hardware.
-   E.g. i386 vs. x86_64 mismatch, or trying to load x86 on ARM hardware.
-   In case you tried loading a non-binary file here (shell script?),
-   you should make sure that the script specifies an interpreter in its shebang
-   header line (``#!/...``) that is fully working (including its library
-   dependencies). And before tackling scripts, better first test a simple
-   non-script binary such as ``/bin/sh`` and confirm its successful execution.
-   To find out more, add code ``to init/main.c`` to display kernel_execve()s
-   return values.
+
+4) **Binary exists but dependencies not available**: E.g. required library
+   dependencies of the init binary such as ``/lib/ld-linux.so.2`` missing or
+   broken. Use ``readelf -d <INIT>|grep NEEDED`` to find out which libraries
+   are required.
+
+5) **Binary cannot be loaded**: Make sure the binary's architecture matches
+   your hardware. E.g. i386 vs. x86_64 mismatch, or trying to load x86 on ARM
+   hardware. In case you tried loading a non-binary file here (shell script?),
+   you should make sure that the script specifies an interpreter in its
+   shebang header line (``#!/...``) that is fully working (including its
+   library dependencies). And before tackling scripts, better first test a
+   simple non-script binary such as ``/bin/sh`` and confirm its successful
+   execution. To find out more, add code ``to init/main.c`` to display
+   kernel_execve()s return values.
 
 Please extend this explanation whenever you find new failure causes
 (after all loading the init binary is a CRITICAL and hard transition step
-which needs to be made as painless as possible), then submit patch to LKML.
+which needs to be made as painless as possible), then submit a patch to LKML.
 Further TODOs:
 
 - Implement the various ``run_init_process()`` invocations via a struct array
   which can then store the ``kernel_execve()`` result value and on failure
   log it all by iterating over **all** results (very important usability fix).
-- try to make the implementation itself more helpful in general,
-  e.g. by providing additional error messages at affected places.
+- Try to make the implementation itself more helpful in general, e.g. by
+  providing additional error messages at affected places.
 
-Andreas Mohr <andi at lisas period de>
-- 
cgit v1.2.3


From fc965497d5b3204d7b726c27fefe92d2e885f994 Mon Sep 17 00:00:00 2001
From: Alessia Mantegazza <amantegazza@vaga.pv.it>
Date: Mon, 13 Apr 2020 18:34:57 +0200
Subject: doc:it_IT: translation of documents in process/

Translations for the following documents in process/:
    - email-clients
    - management-style

Signed-off-by: Alessia Mantegazza <amantegazza@vaga.pv.it>
Signed-off-by: Federico Vaga <federico.vaga@vaga.pv.it>
Link: https://lore.kernel.org/r/20200413163457.52669-1-federico.vaga@vaga.pv.it
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/it_IT/process/email-clients.rst   | 332 ++++++++++++++++++++-
 .../it_IT/process/management-style.rst             | 293 +++++++++++++++++-
 2 files changed, 614 insertions(+), 11 deletions(-)

diff --git a/Documentation/translations/it_IT/process/email-clients.rst b/Documentation/translations/it_IT/process/email-clients.rst
index 224ab031ffd3..89abf6d325f2 100644
--- a/Documentation/translations/it_IT/process/email-clients.rst
+++ b/Documentation/translations/it_IT/process/email-clients.rst
@@ -1,12 +1,334 @@
 .. include:: ../disclaimer-ita.rst
 
-:Original: :ref:`Documentation/process/email-clients.rst <email_clients>`
-
-.. _it_email_clients:
+:Original: :doc:`../../../process/email-clients`
+:Translator: Alessia Mantegazza <amantegazza@vaga.pv.it>
 
 Informazioni sui programmi di posta elettronica per Linux
 =========================================================
 
-.. warning::
+Git
+---
+
+Oggigiorno, la maggior parte degli sviluppatori utilizza ``git send-email``
+al posto dei classici programmi di posta elettronica.  Le pagine man sono
+abbastanza buone. Dal lato del ricevente, i manutentori utilizzano ``git am``
+per applicare le patch.
+
+Se siete dei novelli utilizzatori di ``git`` allora inviate la patch a voi
+stessi. Salvatela come testo includendo tutte le intestazioni. Poi eseguite
+il comando ``git am messaggio-formato-testo.txt`` e revisionatene il risultato
+con ``git log``. Quando tutto funziona correttamente, allora potete inviare
+la patch alla lista di discussione più appropriata.
+
+Panoramica delle opzioni
+------------------------
+
+Le patch per il kernel vengono inviate per posta elettronica, preferibilmente
+come testo integrante del messaggio.  Alcuni manutentori accettano gli
+allegati, ma in questo caso gli allegati devono avere il *content-type*
+impostato come ``text/plain``.  Tuttavia, generalmente gli allegati non sono
+ben apprezzati perché rende più difficile citare porzioni di patch durante il
+processo di revisione.
+
+I programmi di posta elettronica che vengono usati per inviare le patch per il
+kernel Linux dovrebbero inviarle senza alterazioni.  Per esempio, non
+dovrebbero modificare o rimuovere tabulazioni o spazi, nemmeno all'inizio o
+alla fine delle righe.
+
+Non inviate patch con ``format=flowed``.  Questo potrebbe introdurre
+interruzioni di riga inaspettate e indesiderate.
+
+Non lasciate che il vostro programma di posta vada a capo automaticamente.
+Questo può corrompere le patch.
+
+I programmi di posta non dovrebbero modificare la codifica dei caratteri nel
+testo.  Le patch inviate per posta elettronica dovrebbero essere codificate in
+ASCII o UTF-8.
+Se configurate il vostro programma per inviare messaggi codificati con UTF-8
+eviterete possibili problemi di codifica.
+
+I programmi di posta dovrebbero generare e mantenere le intestazioni
+"References" o "In-Reply-To:" cosicché la discussione non venga interrotta.
+
+Di solito, il copia-e-incolla (o taglia-e-incolla) non funziona con le patch
+perché le tabulazioni vengono convertite in spazi.  Usando xclipboard, xclip
+e/o xcutsel potrebbe funzionare, ma è meglio che lo verifichiate o meglio
+ancora: non usate il copia-e-incolla.
+
+Non usate firme PGP/GPG nei messaggi che contengono delle patch.  Questo
+impedisce il corretto funzionamento di alcuni script per leggere o applicare
+patch (questo si dovrebbe poter correggere).
+
+Prima di inviare le patch sulle liste di discussione Linux, può essere una
+buona idea quella di inviare la patch a voi stessi, salvare il messaggio
+ricevuto, e applicarlo ai sorgenti con successo.
+
+
+Alcuni suggerimenti per i programmi di posta elettronica (MUA)
+--------------------------------------------------------------
+
+Qui troverete alcuni suggerimenti per configurare i vostri MUA allo scopo
+di modificare ed inviare patch per il kernel Linux.  Tuttavia, questi
+suggerimenti non sono da considerarsi come un riassunto di una configurazione
+completa.
+
+Legenda:
+
+- TUI = interfaccia utente testuale (*text-based user interface*)
+- GUI = interfaccia utente grafica (*graphical user interface*)
+
+Alpine (TUI)
+************
+
+Opzioni per la configurazione:
+
+Nella sezione :menuselection:`Sending Preferences`:
+
+- :menuselection:`Do Not Send Flowed Text` deve essere ``enabled``
+- :menuselection:`Strip Whitespace Before Sending` deve essere ``disabled``
+
+Quando state scrivendo un messaggio, il cursore dev'essere posizionato
+dove volete che la patch inizi, poi premendo :kbd:`CTRL-R` vi verrà chiesto
+di selezionare il file patch da inserire nel messaggio.
+
+Claws Mail (GUI)
+****************
+
+Funziona. Alcune persone riescono ad usarlo con successo per inviare le patch.
+
+Per inserire una patch usate :menuselection:`Messaggio-->Inserisci file`
+(:kbd:`CTRL-I`) oppure un editor esterno.
+
+Se la patch che avete inserito dev'essere modificata usato la finestra di
+scrittura di Claws, allora assicuratevi che l'"auto-interruzione" sia
+disabilitata :menuselection:`Configurazione-->Preferenze-->Composizione-->Interruzione riga`.
+
+Evolution (GUI)
+***************
+
+Alcune persone riescono ad usarlo con successo per inviare le patch.
+
+Quando state scrivendo una lettera selezionate: Preformattato
+  da :menuselection:`Formato-->Stile del paragrafo-->Preformattato`
+  (:kbd:`CTRL-7`) o dalla barra degli strumenti
+
+Poi per inserire la patch usate:
+:menuselection:`Inserisci--> File di testo...` (:kbd:`ALT-N x`)
+
+Potete anche eseguire ``diff -Nru old.c new.c | xclip``, selezionare
+:menuselection:`Preformattato`, e poi usare il tasto centrale del mouse.
+
+Kmail (GUI)
+***********
+
+Alcune persone riescono ad usarlo con successo per inviare le patch.
+
+La configurazione base che disabilita la composizione di messaggi HTML è
+corretta; non abilitatela.
+
+Quando state scrivendo un messaggio, nel menu opzioni, togliete la selezione a
+"A capo automatico". L'unico svantaggio sarà che qualsiasi altra cosa scriviate
+nel messaggio non verrà mandata a capo in automatico ma dovrete farlo voi.
+Il modo più semplice per ovviare a questo problema è quello di scrivere il
+messaggio con l'opzione abilitata e poi di salvarlo nelle bozze. Riaprendo ora
+il messaggio dalle bozze le andate a capo saranno parte integrante del
+messaggio, per cui togliendo l'opzione "A capo automatico" non perderete nulla.
+
+Alla fine del vostro messaggio, appena prima di inserire la vostra patch,
+aggiungete il delimitatore di patch: tre trattini (``---``).
+
+Ora, dal menu :menuselection:`Messaggio`, selezionate :menuselection:`Inserisci file di testo...`
+quindi scegliete la vostra patch.
+Come soluzione aggiuntiva potreste personalizzare la vostra barra degli
+strumenti aggiungendo un'icona per :menuselection:`Inserisci file di testo...`.
+
+Allargate la finestra di scrittura abbastanza da evitare andate a capo.
+Questo perché in Kmail 1.13.5 (KDE 4.5.4), Kmail aggiunge andate a capo
+automaticamente al momento dell'invio per tutte quelle righe che graficamente,
+nella vostra finestra di composizione, si sono estete su una riga successiva.
+Disabilitare l'andata a capo automatica non è sufficiente. Dunque, se la vostra
+patch contiene delle righe molto lunghe, allora dovrete allargare la finestra
+di composizione per evitare che quelle righe vadano a capo. Vedere:
+https://bugs.kde.org/show_bug.cgi?id=174034
+
+Potete firmare gli allegati con GPG, ma per le patch si preferisce aggiungerle
+al testo del messaggio per cui non usate la firma GPG.  Firmare le patch
+inserite come testo del messaggio le rende più difficili da estrarre dalla loro
+codifica a 7-bit.
+
+Se dovete assolutamente inviare delle patch come allegati invece di integrarle
+nel testo del messaggio, allora premete il tasto destro sull'allegato e
+selezionate :menuselection:`Proprietà`, e poi attivate
+:menuselection:`Suggerisci visualizzazione automatica` per far si che
+l'allegato sia più leggibile venendo visualizzato come parte del messaggio.
+
+Per salvare le patch inviate come parte di un messaggio, selezionate il
+messaggio che la contiene, premete il tasto destro e selezionate
+:menuselection:`Salva come`. Se il messaggio fu ben preparato, allora potrete
+usarlo interamente senza alcuna modifica.
+I messaggi vengono salvati con permessi di lettura-scrittura solo per l'utente,
+nel caso in cui vogliate copiarli altrove per renderli disponibili ad altri
+gruppi o al mondo, ricordatevi di usare ``chmod`` per cambiare i permessi.
+
+Lotus Notes (GUI)
+*****************
+
+Scappate finché potete.
+
+IBM Verse (Web GUI)
+*******************
+
+Vedi il commento per Lotus Notes.
+
+Mutt (TUI)
+**********
+
+Un sacco di sviluppatori Linux usano ``mutt``, per cui deve funzionare
+abbastanza bene.
+
+Mutt non ha un proprio editor, quindi qualunque sia il vostro editor dovrete
+configurarlo per non aggiungere automaticamente le andate a capo.  Molti
+editor hanno un'opzione :menuselection:`Inserisci file` che inserisce il
+contenuto di un file senza alterarlo.
+
+Per usare ``vim`` come editor per mutt::
+
+  set editor="vi"
+
+Se per inserire la patch nel messaggio usate xclip, scrivete il comando::
+
+  :set paste
+
+prima di premere il tasto centrale o shift-insert. Oppure usate il
+comando::
+
+  :r filename
+
+(a)llega funziona bene senza ``set paste``
+
+Potete generare le patch con ``git format-patch`` e usare Mutt per inviarle::
+
+    $ mutt -H 0001-some-bug-fix.patch
+
+Opzioni per la configurazione:
+
+Tutto dovrebbe funzionare già nella configurazione base.
+Tuttavia, è una buona idea quella di impostare ``send_charset``::
+
+   set send_charset="us-ascii:utf-8"
+
+Mutt è molto personalizzabile. Qui di seguito trovate la configurazione minima
+per iniziare ad usare Mutt per inviare patch usando Gmail::
+
+  # .muttrc
+  # ================  IMAP ====================
+  set imap_user = 'yourusername@gmail.com'
+  set imap_pass = 'yourpassword'
+  set spoolfile = imaps://imap.gmail.com/INBOX
+  set folder = imaps://imap.gmail.com/
+  set record="imaps://imap.gmail.com/[Gmail]/Sent Mail"
+  set postponed="imaps://imap.gmail.com/[Gmail]/Drafts"
+  set mbox="imaps://imap.gmail.com/[Gmail]/All Mail"
+
+  # ================  SMTP  ====================
+  set smtp_url = "smtp://username@smtp.gmail.com:587/"
+  set smtp_pass = $imap_pass
+  set ssl_force_tls = yes # Require encrypted connection
+
+  # ================  Composition  ====================
+  set editor = `echo \$EDITOR`
+  set edit_headers = yes  # See the headers when editing
+  set charset = UTF-8     # value of $LANG; also fallback for send_charset
+  # Sender, email address, and sign-off line must match
+  unset use_domain        # because joe@localhost is just embarrassing
+  set realname = "YOUR NAME"
+  set from = "username@gmail.com"
+  set use_from = yes
+
+La documentazione di Mutt contiene molte più informazioni:
+
+    https://gitlab.com/muttmua/mutt/-/wikis/UseCases/Gmail
+
+    http://www.mutt.org/doc/manual/
+
+Pine (TUI)
+**********
+
+Pine aveva alcuni problemi con gli spazi vuoti, ma questi dovrebbero essere
+stati risolti.
+
+Se potete usate alpine (il successore di pine).
+
+Opzioni di configurazione:
+
+- Nelle versioni più recenti è necessario avere ``quell-flowed-text``
+- l'opzione ``no-strip-whitespace-before-send`` è necessaria
+
+Sylpheed (GUI)
+**************
+
+- funziona bene per aggiungere testo in linea (o usando allegati)
+- permette di utilizzare editor esterni
+- è lento su cartelle grandi
+- non farà l'autenticazione TSL SMTP su una connessione non SSL
+- ha un utile righello nella finestra di scrittura
+- la rubrica non comprende correttamente il nome da visualizzare e
+  l'indirizzo associato
+
+Thunderbird (GUI)
+*****************
+
+Thunderbird è un clone di Outlook a cui piace maciullare il testo, ma esistono
+modi per impedirglielo.
+
+- permettere l'uso di editor esterni:
+  La cosa più semplice da fare con Thunderbird e le patch è quello di usare
+  l'estensione "external editor" e di usare il vostro ``$EDITOR`` preferito per
+  leggere/includere patch nel vostro messaggio.  Per farlo, scaricate ed
+  installate l'estensione e aggiungete un bottone per chiamarla rapidamente
+  usando :menuselection:`Visualizza-->Barra degli strumenti-->Personalizza...`;
+  una volta fatto potrete richiamarlo premendo sul bottone mentre siete nella
+  finestra :menuselection:`Scrivi`
+
+  Tenete presente che "external editor" richiede che il vostro editor non
+  faccia alcun fork, in altre parole, l'editor non deve ritornare prima di
+  essere stato chiuso.  Potreste dover passare dei parametri aggiuntivi al
+  vostro editor oppure cambiargli la configurazione.  Per esempio, usando
+  gvim dovrete aggiungere l'opzione -f ``/usr/bin/gvim -f`` (Se il binario
+  si trova in ``/usr/bin``) nell'apposito campo nell'interfaccia di
+  configurazione di  :menuselection:`external editor`.  Se usate altri editor
+  consultate il loro  manuale per sapere come configurarli.
+
+Per rendere l'editor interno un po' più sensato, fate così:
+
+- Modificate le impostazioni di Thunderbird per far si che non usi
+  ``format=flowed``. Andate in :menuselection:`Modifica-->Preferenze-->Avanzate-->Editor di configurazione`
+  per invocare il registro delle impostazioni.
+
+- impostate ``mailnews.send_plaintext_flowed`` a ``false``
+
+- impostate ``mailnews.wraplength`` da ``72`` a ``0``
+
+- :menuselection:`Visualizza-->Corpo del messaggio come-->Testo semplice`
+
+- :menuselection:`Visualizza-->Codifica del testo-->Unicode`
+
+
+TkRat (GUI)
+***********
+
+Funziona. Usare "Inserisci file..." o un editor esterno.
+
+Gmail (Web GUI)
+***************
+
+Non funziona per inviare le patch.
+
+Il programma web Gmail converte automaticamente i tab in spazi.
+
+Allo stesso tempo aggiunge andata a capo ogni 78 caratteri. Comunque
+il problema della conversione fra spazi e tab può essere risolto usando
+un editor esterno.
 
-    TODO ancora da tradurre
+Un altro problema è che Gmail usa la codifica base64 per tutti quei messaggi
+che contengono caratteri non ASCII. Questo include cose tipo i nomi europei.
diff --git a/Documentation/translations/it_IT/process/management-style.rst b/Documentation/translations/it_IT/process/management-style.rst
index 07e68bfb8402..c709285138a7 100644
--- a/Documentation/translations/it_IT/process/management-style.rst
+++ b/Documentation/translations/it_IT/process/management-style.rst
@@ -1,12 +1,293 @@
 .. include:: ../disclaimer-ita.rst
 
-:Original: :ref:`Documentation/process/management-style.rst <managementstyle>`
+:Original: :doc:`../../../process/management-style`
+:Translator: Alessia Mantegazza <amantegazza@vaga.pv.it>
 
-.. _it_managementstyle:
+Il modello di gestione del kernel Linux
+=======================================
 
-Tipo di gestione del kernel Linux
-=================================
+Questo breve documento descrive il modello di gestione del kernel Linux.
+Per certi versi, esso rispecchia il documento
+:ref:`translations/it_IT/process/coding-style.rst <it_codingstyle>`,
+ed è principalmente scritto per evitare di rispondere [#f1]_ in continuazione
+alle stesse identiche (o quasi) domande.
 
-.. warning::
+Il modello di gestione è qualcosa di molto personale e molto più difficile da
+qualificare rispetto a delle semplici regole di codifica, quindi questo
+documento potrebbe avere più o meno a che fare con la realtà.  È cominciato
+come un gioco, ma ciò non significa che non possa essere vero.
+Lo dovrete decidere voi stessi.
 
-    TODO ancora da tradurre
+In ogni caso, quando si parla del "dirigente del kernel", ci si riferisce
+sempre alla persona che dirige tecnicamente, e non a coloro che
+tradizionalmente hanno un ruolo direttivo all'interno delle aziende.  Se vi
+occupate di convalidare acquisti o avete una qualche idea sul budget del vostro
+gruppo, probabilmente non siete un dirigente del kernel.  Quindi i suggerimenti
+qui indicati potrebbero fare al caso vostro, oppure no.
+
+Prima di tutto, suggerirei di acquistare "Le sette regole per avere successo",
+e di non leggerlo. Bruciatelo, è un grande gesto simbolico.
+
+.. [#f1] Questo documento non fa molto per risponde alla domanda, ma rende
+	 così dannatamente ovvio a chi la pone che non abbiamo la minima idea
+	 di come rispondere.
+
+Comunque, partiamo:
+
+.. _it_decisions:
+
+1) Le decisioni
+---------------
+
+Tutti pensano che i dirigenti decidano, e che questo prendere decisioni
+sia importante.  Più grande e dolorosa è la decisione, più importante deve
+essere il dirigente che la prende.  Questo è molto profondo ed ovvio, ma non è
+del tutto vero.
+
+Il gioco consiste nell'"evitare" di dover prendere decisioni.  In particolare
+se qualcuno vi chiede di "Decidere" tra (a) o (b), e vi dice che ha
+davvero bisogno di voi per questo, come dirigenti siete nei guai.
+Le persone che gestite devono conoscere i dettagli più di quanto li conosciate
+voi, quindi se vengono da voi per una decisione tecnica, siete fottuti.
+Non sarete chiaramente competente per prendere quella decisione per loro.
+
+(Corollario: se le persone che gestite non conoscono i dettagli meglio di voi,
+anche in questo caso sarete fregati, tuttavia per altre ragioni.  Ossia state
+facendo il lavoro sbagliato, e che invece dovrebbero essere "loro" a gestirvi)
+
+Quindi il gioco si chiama "evitare" decisioni, almeno le più grandi e
+difficili.  Prendere decisioni piccoli e senza conseguenze va bene, e vi fa
+sembrare competenti in quello che state facendo, quindi quello che un dirigente
+del kernel ha bisogno di fare è trasformare le decisioni grandi e difficili
+in minuzie delle quali nessuno importa.
+
+Ciò aiuta a capire che la differenza chiave tra una grande decisione ed una
+piccola sta nella possibilità di modificare tale decisione in seguito.
+Qualsiasi decisione importante può essere ridotta in decisioni meno importanti,
+ma dovete assicurarvi che possano essere reversibili in caso di errori
+(presenti o futuri).  Improvvisamente, dovrete essere doppiamente dirigenti
+per **due** decisioni non sequenziali - quella sbagliata **e** quella giusta.
+
+E le persone vedranno tutto ciò come prova di vera capacità di comando
+(*cough* cavolata *cough*)
+
+Così la chiave per evitare le decisioni difficili diviene l'evitare
+di fare cose che non possono essere disfatte.  Non infilatevi in un angolo
+dal quale non potrete sfuggire.  Un topo messo all'angolo può rivelarsi
+pericoloso - un dirigente messo all'angolo è solo pietoso.
+
+**In ogni caso** dato che nessuno è stupido al punto da lasciare veramente ad
+un dirigente del kernel un enorme responsabilità, solitamente è facile fare
+marcia indietro. Annullare una decisione è molto facile: semplicemente dite a
+tutti che siete stati degli scemi incompetenti, dite che siete dispiaciuti, ed
+annullate tutto l'inutile lavoro sul quale gli altri hanno lavorato nell'ultimo
+anno.  Improvvisamente la decisione che avevate preso un anno fa non era poi
+così grossa, dato che può essere facilmente annullata.
+
+È emerso che alcune persone hanno dei problemi con questo tipo di approccio,
+questo per due ragioni:
+
+ - ammettere di essere degli idioti è più difficile di quanto sembri.  A tutti
+   noi piace mantenere le apparenze, ed uscire allo scoperto in pubblico per
+   ammettere che ci si è sbagliati è qualcosa di davvero impegnativo.
+ - avere qualcuno che ti dice che ciò su cui hai lavorato nell'ultimo anno
+   non era del tutto valido, può rivelarsi difficile anche per un povero ed
+   umile ingegnere, e mentre il **lavoro** vero era abbastanza facile da
+   cancellare, dall'altro canto potreste aver irrimediabilmente perso la
+   fiducia di quell'ingegnere.  E ricordate che l'"irrevocabile" era quello
+   che avevamo cercato di evitare fin dall'inizio, e la vostra decisione
+   ha finito per esserlo.
+
+Fortunatamente, entrambe queste ragioni posso essere mitigate semplicemente
+ammettendo fin dal principio che non avete una cavolo di idea, dicendo
+agli altri in anticipo che la vostra decisione è puramente ipotetica, e che
+potrebbe essere sbagliata.  Dovreste sempre riservarvi il diritto di cambiare
+la vostra opinione, e rendere gli altri ben **consapevoli** di ciò.
+Ed è molto più facile ammettere di essere stupidi quando non avete **ancora**
+fatto quella cosa stupida.
+
+Poi, quando è realmente emersa la vostra stupidità, le persone semplicemente
+roteeranno gli occhi e diranno "Uffa, no, ancora".
+
+Questa ammissione preventiva di incompetenza potrebbe anche portare le persone
+che stanno facendo il vero lavoro, a pensarci due volte.  Dopo tutto, se
+**loro** non sono certi se sia una buona idea, voi, sicuro come la morte,
+non dovreste incoraggiarli promettendogli che ciò su cui stanno lavorando
+verrà incluso.  Fate si che ci pensino due volte prima che si imbarchino in un
+grosso lavoro.
+
+Ricordate: loro devono sapere più cose sui dettagli rispetto a voi, e
+solitamente pensano di avere già la risposta a tutto. La miglior cosa che
+potete fare in qualità di dirigente è di non instillare troppa fiducia, ma
+invece fornire una salutare dose di pensiero critico su quanto stanno facendo.
+
+Comunque, un altro modo di evitare una decisione è quello di lamentarsi
+malinconicamente dicendo : "non possiamo farli entrambi e basta?" e con uno
+sguardo pietoso.  Fidatevi, funziona.  Se non è chiaro quale sia il miglior
+approccio, lo scopriranno.  La risposta potrebbe essere data dal fatto che
+entrambe i gruppi di lavoro diventano frustati al punto di rinunciarvi.
+
+Questo può suonare come un fallimento, ma di solito questo è un segno che
+c'era qualcosa che non andava in entrambe i progetti, e il motivo per
+il quale le persone coinvolte non abbiano potuto decidere era che entrambe
+sbagliavano.  Voi ne uscirete freschi come una rosa, e avrete evitato un'altra
+decisione con la quale avreste potuto fregarvi.
+
+
+2) Le persone
+-------------
+
+Ci sono molte persone stupide, ed essere un dirigente significa che dovrete
+scendere a patti con questo, e molto più importate, che **loro** devono avere
+a che fare con **voi**.
+
+Ne emerge che mentre è facile annullare degli errori tecnici, non è invece
+così facile rimuovere i disordini della personalità.  Dovrete semplicemente
+convivere con i loro, ed i vostri, problemi.
+
+Comunque, al fine di preparavi in qualità di dirigenti del kernel, è meglio
+ricordare di non abbattere alcun ponte, bombardare alcun paesano innocente,
+o escludere troppi sviluppatori kernel. Ne emerge che escludere le persone
+è piuttosto facile, mentre includerle nuovamente è difficile. Così
+"l'esclusione" immediatamente cade sotto il titolo di "non reversibile", e
+diviene un no-no secondo la sezione :ref:`it_decisions`.
+
+Esistono alcune semplici regole qui:
+
+ (1) non chiamate le persone teste di c*** (al meno, non in pubblico)
+ (2) imparate a scusarvi quando dimenticate la regola (1)
+
+Il problema del punto numero 1 è che è molto facile da rispettare, dato che
+è possibile dire "sei una testa di c***" in milioni di modi differenti [#f2]_,
+a volte senza nemmeno pensarci, e praticamente sempre con la calda convinzione
+di essere nel giusto.
+
+E più convinti sarete che avete ragione (e diciamolo, potete chiamare
+praticamente **tutti** testa di c**, e spesso **sarete** nel giusto), più
+difficile sarà scusarvi successivamente.
+
+Per risolvere questo problema, avete due possibilità:
+
+ - diventare davvero bravi nello scusarsi
+ - essere amabili così che nessuno finirà col sentirsi preso di mira.  Siate
+   creativi abbastanza, e potrebbero esserne divertiti.
+
+L'opzione dell'essere immancabilmente educati non esiste proprio. Nessuno
+si fiderà di qualcuno che chiaramente sta nascondendo il suo vero carattere.
+
+.. [#f2] Paul Simon cantava: "50 modi per lasciare il vostro amante", perché,
+	 molto francamente, "Un milione di modi per dire ad uno sviluppatore
+	 Testa di c***" non avrebbe funzionato. Ma sono sicuro che ci abbia
+	 pensato.
+
+
+3) Le persone II - quelle buone
+-------------------------------
+
+Mentre emerge che la maggior parte delle persone sono stupide, il corollario
+a questo è il triste fatto che anche voi siete fra queste, e che mentre
+possiamo tutti crogiolarci nella sicurezza di essere migliori della media
+delle persone (diciamocelo, nessuno crede di essere nelle media o sotto di
+essa), dovremmo anche ammettere che non siamo il "coltello più affilato" del
+circondario, e che ci saranno altre persone che sono meno stupide di quanto
+lo siete voi.
+
+Molti reagiscono male davanti alle persone intelligenti. Altri le usano a
+proprio vantaggio.
+
+Assicuratevi che voi, in qualità di manutentori del kernel, siate nel secondo
+gruppo. Inchinatevi dinanzi a loro perché saranno le persone che vi renderanno
+il lavoro più facile.  In particolare, prenderanno le decisioni per voi, che è
+l'oggetto di questo gioco.
+
+Quindi quando trovate qualcuno più sveglio di voi, prendetevela comoda.
+Le vostre responsabilità dirigenziali si ridurranno in gran parte nel dire
+"Sembra una buona idea - Vai", oppure "Sembra buono, ma invece circa questo e
+quello?".  La seconda versione in particolare è una gran modo per imparare
+qualcosa di nuovo circa "questo e quello" o di sembrare **extra** dirigenziali
+sottolineando qualcosa alla quale i più svegli non avevano pensato.  In
+entrambe i casi, vincete.
+
+Una cosa alla quale dovete fare attenzione è che l'essere grandi in qualcosa
+non si traduce automaticamente nell'essere grandi anche in altre cose.  Quindi
+dovreste dare una spintarella alle persone in una specifica direzione, ma
+diciamocelo, potrebbero essere bravi in ciò che fanno e far schifo in tutto
+il resto.  La buona notizia è che le persone tendono a gravitare attorno a ciò
+in cui sono bravi, quindi non state facendo nulla di irreversibile quando li
+spingete verso una certa direzione, solo non spingete troppo.
+
+
+4) Addossare le colpe
+---------------------
+
+Le cose andranno male, e le persone vogliono qualcuno da incolpare. Sarete voi.
+
+Non è poi così difficile accettare la colpa, specialmente se le persone
+riescono a capire che non era **tutta** colpa vostra.  Il che ci porta
+sulla miglior strada per assumersi la colpa: fatelo per qualcun'altro.
+Vi sentirete bene nel assumervi la responsabilità, e loro si sentiranno
+bene nel non essere incolpati, e coloro che hanno perso i loro 36GB di
+pornografia a causa della vostra incompetenza ammetteranno a malincuore che
+almeno non avete cercato di fare il furbetto.
+
+Successivamente fate in modo che gli sviluppatori che in realtà hanno fallito
+(se riuscite a trovarli) sappiano **in privato** che sono "fottuti".
+Questo non per fargli sapere che la prossima volta possono evitarselo ma per
+fargli capire che sono in debito.  E, forse cosa più importante, sono loro che
+devono sistemare la cosa.  Perché, ammettiamolo, è sicuro non sarete voi a
+farlo.
+
+Assumersi la colpa è anche ciò che vi rendere dirigenti in prima battuta.
+È parte di ciò che spinge gli altri a fidarsi di voi, e vi garantisce
+la gloria potenziale, perché siete gli unici a dire "Ho fatto una cavolata".
+E se avete seguito le regole precedenti, sarete decisamente bravi nel dirlo.
+
+
+5) Le cose da evitare
+---------------------
+
+Esiste una cosa che le persone odiano più che essere chiamate "teste di c****",
+ed è essere chiamate "teste di c****" con fare da bigotto.  Se per il primo
+caso potrete comunque scusarvi, per il secondo non ve ne verrà data nemmeno
+l'opportunità.  Probabilmente smetteranno di ascoltarvi anche se tutto sommato
+state svolgendo un buon lavoro.
+
+Tutti crediamo di essere migliori degli altri, il che significa che quando
+qualcuno inizia a darsi delle arie, ci da **davvero** fastidio.  Potreste anche
+essere moralmente ed intellettualmente superiore a tutti quelli attorno a voi,
+ma non cercate di renderlo ovvio per gli altri a meno che non **vogliate**
+veramente far arrabbiare qualcuno [#f3]_.
+
+Allo stesso modo evitate di essere troppo gentili e pacati.  Le buone maniere
+facilmente finiscono per strabordare e nascondere i problemi, e come si usa
+dire, "su internet nessuno può sentire la vostra pacatezza".  Usate argomenti
+diretti per farvi capire, non potete sperare che la gente capisca in altro
+modo.
+
+Un po' di umorismo può aiutare a smorzare sia la franchezza che la moralità.
+Andare oltre i limiti al punto d'essere ridicolo può portare dei punti a casa
+senza renderlo spiacevole per i riceventi, i quali penseranno che stavate
+facendo gli scemi.  Può anche aiutare a lasciare andare quei blocchi mentali
+che abbiamo nei confronti delle critiche.
+
+.. [#f3] Suggerimento: i forum di discussione su internet, che non sono
+  collegati col vostro lavoro, sono ottimi modi per sfogare la frustrazione
+  verso altre persone. Di tanto in tanto scrivete messaggi offensivi col ghigno
+  in faccia per infiammare qualche discussione: vi sentirete purificati. Solo
+  cercate di non cagare troppo vicino a casa.
+
+6) Perché io?
+-------------
+
+Dato che la vostra responsabilità principale è quella di prendervi le colpe
+d'altri, e rendere dolorosamente ovvio a tutti che siete degli incompetenti,
+la domanda naturale che ne segue sarà : perché dovrei fare tutto ciò?
+
+Innanzitutto, potreste diventare o no popolari al punto da avere la fila di
+ragazzine (o ragazzini, evitiamo pregiudizi o sessismo) che gridano e bussano
+alla porta del vostro camerino, ma comunque **proverete** un immenso senso di
+realizzazione personale dall'essere "in carica".  Dimenticate il fatto che voi
+state discutendo con tutti e che cercate di inseguirli il più velocemente che
+potete. Tutti continueranno a pensare che voi siete la persona in carica.
+
+È un bel lavoro se riuscite ad adattarlo a voi.
-- 
cgit v1.2.3


From 4951d27b099bd24f87a093f67d91618742bd3a65 Mon Sep 17 00:00:00 2001
From: Bumsik Kim <kbumsik@gmail.com>
Date: Fri, 3 Apr 2020 12:15:07 +0900
Subject: watchdog: clarify that stop() is optional

The commit d0684c8a9354 ("watchdog: Make stop function optional")
made stop function not mandatory, but the comments
and the doc weren't reflected. Fix it to clarify.

Signed-off-by: Bumsik Kim <k.bumsik@gmail.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20200403031507.63487-1-k.bumsik@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/watchdog/convert_drivers_to_kernel_api.rst | 2 +-
 Documentation/watchdog/watchdog-kernel-api.rst           | 2 +-
 include/linux/watchdog.h                                 | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/watchdog/convert_drivers_to_kernel_api.rst b/Documentation/watchdog/convert_drivers_to_kernel_api.rst
index dd934cc08e40..51b999b5551a 100644
--- a/Documentation/watchdog/convert_drivers_to_kernel_api.rst
+++ b/Documentation/watchdog/convert_drivers_to_kernel_api.rst
@@ -115,7 +115,7 @@ Add the watchdog operations
 ---------------------------
 
 All possible callbacks are defined in 'struct watchdog_ops'. You can find it
-explained in 'watchdog-kernel-api.txt' in this directory. start(), stop() and
+explained in 'watchdog-kernel-api.txt' in this directory. start() and
 owner must be set, the rest are optional. You will easily find corresponding
 functions in the old driver. Note that you will now get a pointer to the
 watchdog_device as a parameter to these functions, so you probably have to
diff --git a/Documentation/watchdog/watchdog-kernel-api.rst b/Documentation/watchdog/watchdog-kernel-api.rst
index 864edbe932c1..068a55ee0d4a 100644
--- a/Documentation/watchdog/watchdog-kernel-api.rst
+++ b/Documentation/watchdog/watchdog-kernel-api.rst
@@ -123,8 +123,8 @@ The list of watchdog operations is defined as::
 	struct module *owner;
 	/* mandatory operations */
 	int (*start)(struct watchdog_device *);
-	int (*stop)(struct watchdog_device *);
 	/* optional operations */
+	int (*stop)(struct watchdog_device *);
 	int (*ping)(struct watchdog_device *);
 	unsigned int (*status)(struct watchdog_device *);
 	int (*set_timeout)(struct watchdog_device *, unsigned int);
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 417d9f37077a..1464ce6ffa31 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -37,15 +37,15 @@ struct watchdog_governor;
  *
  * The watchdog_ops structure contains a list of low-level operations
  * that control a watchdog device. It also contains the module that owns
- * these operations. The start and stop function are mandatory, all other
+ * these operations. The start function is mandatory, all other
  * functions are optional.
  */
 struct watchdog_ops {
 	struct module *owner;
 	/* mandatory operations */
 	int (*start)(struct watchdog_device *);
-	int (*stop)(struct watchdog_device *);
 	/* optional operations */
+	int (*stop)(struct watchdog_device *);
 	int (*ping)(struct watchdog_device *);
 	unsigned int (*status)(struct watchdog_device *);
 	int (*set_timeout)(struct watchdog_device *, unsigned int);
-- 
cgit v1.2.3


From 90c165f0de3adad4719e65ab0c31d59edf5bd481 Mon Sep 17 00:00:00 2001
From: Ricardo Cañuelo <ricardo.canuelo@collabora.com>
Date: Fri, 3 Apr 2020 11:36:17 +0200
Subject: docs: pr_*() kerneldocs and basic printk docs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add kerneldocs comments to the pr_*() macros in printk.h.

Add a new rst node in the core-api manual describing the basic usage of
printk and the related macro aliases.

Signed-off-by: Ricardo Cañuelo <ricardo.canuelo@collabora.com>
Link: https://lore.kernel.org/r/20200403093617.18003-1-ricardo.canuelo@collabora.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/index.rst          |   1 +
 Documentation/core-api/printk-basics.rst  | 115 ++++++++++++++++++++++++++++++
 Documentation/core-api/printk-formats.rst |   2 +
 include/linux/printk.h                    | 112 +++++++++++++++++++++++++----
 4 files changed, 218 insertions(+), 12 deletions(-)
 create mode 100644 Documentation/core-api/printk-basics.rst

diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 0897ad12c119..49e3da910d9e 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -18,6 +18,7 @@ it.
 
    kernel-api
    workqueue
+   printk-basics
    printk-formats
    symbol-namespaces
 
diff --git a/Documentation/core-api/printk-basics.rst b/Documentation/core-api/printk-basics.rst
new file mode 100644
index 000000000000..563a9ce5fe1d
--- /dev/null
+++ b/Documentation/core-api/printk-basics.rst
@@ -0,0 +1,115 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Message logging with printk
+===========================
+
+printk() is one of the most widely known functions in the Linux kernel. It's the
+standard tool we have for printing messages and usually the most basic way of
+tracing and debugging. If you're familiar with printf(3) you can tell printk()
+is based on it, although it has some functional differences:
+
+  - printk() messages can specify a log level.
+
+  - the format string, while largely compatible with C99, doesn't follow the
+    exact same specification. It has some extensions and a few limitations
+    (no ``%n`` or floating point conversion specifiers). See :ref:`How to get
+    printk format specifiers right <printk-specifiers>`.
+
+All printk() messages are printed to the kernel log buffer, which is a ring
+buffer exported to userspace through /dev/kmsg. The usual way to read it is
+using ``dmesg``.
+
+printk() is typically used like this::
+
+  printk(KERN_INFO "Message: %s\n", arg);
+
+where ``KERN_INFO`` is the log level (note that it's concatenated to the format
+string, the log level is not a separate argument). The available log levels are:
+
++----------------+--------+-----------------------------------------------+
+| Name           | String |  Alias function                               |
++================+========+===============================================+
+| KERN_EMERG     | "0"    | pr_emerg()                                    |
++----------------+--------+-----------------------------------------------+
+| KERN_ALERT     | "1"    | pr_alert()                                    |
++----------------+--------+-----------------------------------------------+
+| KERN_CRIT      | "2"    | pr_crit()                                     |
++----------------+--------+-----------------------------------------------+
+| KERN_ERR       | "3"    | pr_err()                                      |
++----------------+--------+-----------------------------------------------+
+| KERN_WARNING   | "4"    | pr_warn()                                     |
++----------------+--------+-----------------------------------------------+
+| KERN_NOTICE    | "5"    | pr_notice()                                   |
++----------------+--------+-----------------------------------------------+
+| KERN_INFO      | "6"    | pr_info()                                     |
++----------------+--------+-----------------------------------------------+
+| KERN_DEBUG     | "7"    | pr_debug() and pr_devel() if DEBUG is defined |
++----------------+--------+-----------------------------------------------+
+| KERN_DEFAULT   | ""     |                                               |
++----------------+--------+-----------------------------------------------+
+| KERN_CONT      | "c"    | pr_cont()                                     |
++----------------+--------+-----------------------------------------------+
+
+
+The log level specifies the importance of a message. The kernel decides whether
+to show the message immediately (printing it to the current console) depending
+on its log level and the current *console_loglevel* (a kernel variable). If the
+message priority is higher (lower log level value) than the *console_loglevel*
+the message will be printed to the console.
+
+If the log level is omitted, the message is printed with ``KERN_DEFAULT``
+level.
+
+You can check the current *console_loglevel* with::
+
+  $ cat /proc/sys/kernel/printk
+  4        4        1        7
+
+The result shows the *current*, *default*, *minimum* and *boot-time-default* log
+levels.
+
+To change the current console_loglevel simply write the the desired level to
+``/proc/sys/kernel/printk``. For example, to print all messages to the console::
+
+  # echo 8 > /proc/sys/kernel/printk
+
+Another way, using ``dmesg``::
+
+  # dmesg -n 5
+
+sets the console_loglevel to print KERN_WARNING (4) or more severe messages to
+console. See ``dmesg(1)`` for more information.
+
+As an alternative to printk() you can use the ``pr_*()`` aliases for
+logging. This family of macros embed the log level in the macro names. For
+example::
+
+  pr_info("Info message no. %d\n", msg_num);
+
+prints a ``KERN_INFO`` message.
+
+Besides being more concise than the equivalent printk() calls, they can use a
+common definition for the format string through the pr_fmt() macro. For
+instance, defining this at the top of a source file (before any ``#include``
+directive)::
+
+  #define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
+
+would prefix every pr_*() message in that file with the module and function name
+that originated the message.
+
+For debugging purposes there are also two conditionally-compiled macros:
+pr_debug() and pr_devel(), which are compiled-out unless ``DEBUG`` (or
+also ``CONFIG_DYNAMIC_DEBUG`` in the case of pr_debug()) is defined.
+
+
+Function reference
+==================
+
+.. kernel-doc:: kernel/printk/printk.c
+   :functions: printk
+
+.. kernel-doc:: include/linux/printk.h
+   :functions: pr_emerg pr_alert pr_crit pr_err pr_warn pr_notice pr_info
+      pr_fmt pr_debug pr_devel pr_cont
diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index 8ebe46b1af39..1e3838652348 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -2,6 +2,8 @@
 How to get printk format specifiers right
 =========================================
 
+.. _printk-specifiers:
+
 :Author: Randy Dunlap <rdunlap@infradead.org>
 :Author: Andrew Murray <amurray@mpc-data.co.uk>
 
diff --git a/include/linux/printk.h b/include/linux/printk.h
index e061635e0409..ccea5d4a509e 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -279,39 +279,116 @@ static inline void printk_safe_flush_on_panic(void)
 
 extern int kptr_restrict;
 
+/**
+ * pr_fmt - used by the pr_*() macros to generate the printk format string
+ * @fmt: format string passed from a pr_*() macro
+ *
+ * This macro can be used to generate a unified format string for pr_*()
+ * macros. A common use is to prefix all pr_*() messages in a file with a common
+ * string. For example, defining this at the top of a source file:
+ *
+ *        #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+ *
+ * would prefix all pr_info, pr_emerg... messages in the file with the module
+ * name.
+ */
 #ifndef pr_fmt
 #define pr_fmt(fmt) fmt
 #endif
 
-/*
- * These can be used to print at the various log levels.
- * All of these will print unconditionally, although note that pr_debug()
- * and other debug macros are compiled out unless either DEBUG is defined
- * or CONFIG_DYNAMIC_DEBUG is set.
+/**
+ * pr_emerg - Print an emergency-level message
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_EMERG loglevel. It uses pr_fmt() to
+ * generate the format string.
  */
 #define pr_emerg(fmt, ...) \
 	printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+/**
+ * pr_alert - Print an alert-level message
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_ALERT loglevel. It uses pr_fmt() to
+ * generate the format string.
+ */
 #define pr_alert(fmt, ...) \
 	printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+/**
+ * pr_crit - Print a critical-level message
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_CRIT loglevel. It uses pr_fmt() to
+ * generate the format string.
+ */
 #define pr_crit(fmt, ...) \
 	printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+/**
+ * pr_err - Print an error-level message
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_ERR loglevel. It uses pr_fmt() to
+ * generate the format string.
+ */
 #define pr_err(fmt, ...) \
 	printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+/**
+ * pr_warn - Print a warning-level message
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_WARNING loglevel. It uses pr_fmt()
+ * to generate the format string.
+ */
 #define pr_warn(fmt, ...) \
 	printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+/**
+ * pr_notice - Print a notice-level message
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_NOTICE loglevel. It uses pr_fmt() to
+ * generate the format string.
+ */
 #define pr_notice(fmt, ...) \
 	printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+/**
+ * pr_info - Print an info-level message
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_INFO loglevel. It uses pr_fmt() to
+ * generate the format string.
+ */
 #define pr_info(fmt, ...) \
 	printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
-/*
- * Like KERN_CONT, pr_cont() should only be used when continuing
- * a line with no newline ('\n') enclosed. Otherwise it defaults
- * back to KERN_DEFAULT.
+
+/**
+ * pr_cont - Continues a previous log message in the same line.
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_CONT loglevel. It should only be
+ * used when continuing a log message with no newline ('\n') enclosed. Otherwise
+ * it defaults back to KERN_DEFAULT loglevel.
  */
 #define pr_cont(fmt, ...) \
 	printk(KERN_CONT fmt, ##__VA_ARGS__)
 
-/* pr_devel() should produce zero code unless DEBUG is defined */
+/**
+ * pr_devel - Print a debug-level message conditionally
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to a printk with KERN_DEBUG loglevel if DEBUG is
+ * defined. Otherwise it does nothing.
+ *
+ * It uses pr_fmt() to generate the format string.
+ */
 #ifdef DEBUG
 #define pr_devel(fmt, ...) \
 	printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
@@ -325,8 +402,19 @@ extern int kptr_restrict;
 #if defined(CONFIG_DYNAMIC_DEBUG)
 #include <linux/dynamic_debug.h>
 
-/* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here */
-#define pr_debug(fmt, ...) \
+/**
+ * pr_debug - Print a debug-level message conditionally
+ * @fmt: format string
+ * @...: arguments for the format string
+ *
+ * This macro expands to dynamic_pr_debug() if CONFIG_DYNAMIC_DEBUG is
+ * set. Otherwise, if DEBUG is defined, it's equivalent to a printk with
+ * KERN_DEBUG loglevel. If DEBUG is not defined it does nothing.
+ *
+ * It uses pr_fmt() to generate the format string (dynamic_pr_debug() uses
+ * pr_fmt() internally).
+ */
+#define pr_debug(fmt, ...)			\
 	dynamic_pr_debug(fmt, ##__VA_ARGS__)
 #elif defined(DEBUG)
 #define pr_debug(fmt, ...) \
-- 
cgit v1.2.3


From 14bbe3e33710be52f21d61253a94c5f44a696d02 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 1 Apr 2020 10:33:43 -0700
Subject: docs: Add rbtree documentation to the core-api

This file is close enough to being in rst format that I didn't feel
the need to alter it in any way.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Michel Lespinasse <walken@google.com>
Link: https://lore.kernel.org/r/20200401173343.17472-1-willy@infradead.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/index.rst       |   1 +
 Documentation/core-api/rbtree.rst      | 429 +++++++++++++++++++++++++++++++++
 Documentation/rbtree.txt               | 429 ---------------------------------
 include/linux/rbtree.h                 |   2 +-
 include/linux/rbtree_augmented.h       |   2 +-
 lib/Kconfig                            |   2 +-
 tools/include/linux/rbtree.h           |   2 +-
 tools/include/linux/rbtree_augmented.h |   2 +-
 8 files changed, 435 insertions(+), 434 deletions(-)
 create mode 100644 Documentation/core-api/rbtree.rst
 delete mode 100644 Documentation/rbtree.txt

diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 49e3da910d9e..b29c4a07beda 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -35,6 +35,7 @@ Library functionality that is used throughout the kernel.
    xarray
    idr
    circular-buffers
+   rbtree
    generic-radix-tree
    packing
    timekeeping
diff --git a/Documentation/core-api/rbtree.rst b/Documentation/core-api/rbtree.rst
new file mode 100644
index 000000000000..523d54b60087
--- /dev/null
+++ b/Documentation/core-api/rbtree.rst
@@ -0,0 +1,429 @@
+=================================
+Red-black Trees (rbtree) in Linux
+=================================
+
+
+:Date: January 18, 2007
+:Author: Rob Landley <rob@landley.net>
+
+What are red-black trees, and what are they for?
+------------------------------------------------
+
+Red-black trees are a type of self-balancing binary search tree, used for
+storing sortable key/value data pairs.  This differs from radix trees (which
+are used to efficiently store sparse arrays and thus use long integer indexes
+to insert/access/delete nodes) and hash tables (which are not kept sorted to
+be easily traversed in order, and must be tuned for a specific size and
+hash function where rbtrees scale gracefully storing arbitrary keys).
+
+Red-black trees are similar to AVL trees, but provide faster real-time bounded
+worst case performance for insertion and deletion (at most two rotations and
+three rotations, respectively, to balance the tree), with slightly slower
+(but still O(log n)) lookup time.
+
+To quote Linux Weekly News:
+
+    There are a number of red-black trees in use in the kernel.
+    The deadline and CFQ I/O schedulers employ rbtrees to
+    track requests; the packet CD/DVD driver does the same.
+    The high-resolution timer code uses an rbtree to organize outstanding
+    timer requests.  The ext3 filesystem tracks directory entries in a
+    red-black tree.  Virtual memory areas (VMAs) are tracked with red-black
+    trees, as are epoll file descriptors, cryptographic keys, and network
+    packets in the "hierarchical token bucket" scheduler.
+
+This document covers use of the Linux rbtree implementation.  For more
+information on the nature and implementation of Red Black Trees,  see:
+
+  Linux Weekly News article on red-black trees
+    http://lwn.net/Articles/184495/
+
+  Wikipedia entry on red-black trees
+    http://en.wikipedia.org/wiki/Red-black_tree
+
+Linux implementation of red-black trees
+---------------------------------------
+
+Linux's rbtree implementation lives in the file "lib/rbtree.c".  To use it,
+"#include <linux/rbtree.h>".
+
+The Linux rbtree implementation is optimized for speed, and thus has one
+less layer of indirection (and better cache locality) than more traditional
+tree implementations.  Instead of using pointers to separate rb_node and data
+structures, each instance of struct rb_node is embedded in the data structure
+it organizes.  And instead of using a comparison callback function pointer,
+users are expected to write their own tree search and insert functions
+which call the provided rbtree functions.  Locking is also left up to the
+user of the rbtree code.
+
+Creating a new rbtree
+---------------------
+
+Data nodes in an rbtree tree are structures containing a struct rb_node member::
+
+  struct mytype {
+  	struct rb_node node;
+  	char *keystring;
+  };
+
+When dealing with a pointer to the embedded struct rb_node, the containing data
+structure may be accessed with the standard container_of() macro.  In addition,
+individual members may be accessed directly via rb_entry(node, type, member).
+
+At the root of each rbtree is an rb_root structure, which is initialized to be
+empty via:
+
+  struct rb_root mytree = RB_ROOT;
+
+Searching for a value in an rbtree
+----------------------------------
+
+Writing a search function for your tree is fairly straightforward: start at the
+root, compare each value, and follow the left or right branch as necessary.
+
+Example::
+
+  struct mytype *my_search(struct rb_root *root, char *string)
+  {
+  	struct rb_node *node = root->rb_node;
+
+  	while (node) {
+  		struct mytype *data = container_of(node, struct mytype, node);
+		int result;
+
+		result = strcmp(string, data->keystring);
+
+		if (result < 0)
+  			node = node->rb_left;
+		else if (result > 0)
+  			node = node->rb_right;
+		else
+  			return data;
+	}
+	return NULL;
+  }
+
+Inserting data into an rbtree
+-----------------------------
+
+Inserting data in the tree involves first searching for the place to insert the
+new node, then inserting the node and rebalancing ("recoloring") the tree.
+
+The search for insertion differs from the previous search by finding the
+location of the pointer on which to graft the new node.  The new node also
+needs a link to its parent node for rebalancing purposes.
+
+Example::
+
+  int my_insert(struct rb_root *root, struct mytype *data)
+  {
+  	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+  	/* Figure out where to put new node */
+  	while (*new) {
+  		struct mytype *this = container_of(*new, struct mytype, node);
+  		int result = strcmp(data->keystring, this->keystring);
+
+		parent = *new;
+  		if (result < 0)
+  			new = &((*new)->rb_left);
+  		else if (result > 0)
+  			new = &((*new)->rb_right);
+  		else
+  			return FALSE;
+  	}
+
+  	/* Add new node and rebalance tree. */
+  	rb_link_node(&data->node, parent, new);
+  	rb_insert_color(&data->node, root);
+
+	return TRUE;
+  }
+
+Removing or replacing existing data in an rbtree
+------------------------------------------------
+
+To remove an existing node from a tree, call::
+
+  void rb_erase(struct rb_node *victim, struct rb_root *tree);
+
+Example::
+
+  struct mytype *data = mysearch(&mytree, "walrus");
+
+  if (data) {
+  	rb_erase(&data->node, &mytree);
+  	myfree(data);
+  }
+
+To replace an existing node in a tree with a new one with the same key, call::
+
+  void rb_replace_node(struct rb_node *old, struct rb_node *new,
+  			struct rb_root *tree);
+
+Replacing a node this way does not re-sort the tree: If the new node doesn't
+have the same key as the old node, the rbtree will probably become corrupted.
+
+Iterating through the elements stored in an rbtree (in sort order)
+------------------------------------------------------------------
+
+Four functions are provided for iterating through an rbtree's contents in
+sorted order.  These work on arbitrary trees, and should not need to be
+modified or wrapped (except for locking purposes)::
+
+  struct rb_node *rb_first(struct rb_root *tree);
+  struct rb_node *rb_last(struct rb_root *tree);
+  struct rb_node *rb_next(struct rb_node *node);
+  struct rb_node *rb_prev(struct rb_node *node);
+
+To start iterating, call rb_first() or rb_last() with a pointer to the root
+of the tree, which will return a pointer to the node structure contained in
+the first or last element in the tree.  To continue, fetch the next or previous
+node by calling rb_next() or rb_prev() on the current node.  This will return
+NULL when there are no more nodes left.
+
+The iterator functions return a pointer to the embedded struct rb_node, from
+which the containing data structure may be accessed with the container_of()
+macro, and individual members may be accessed directly via
+rb_entry(node, type, member).
+
+Example::
+
+  struct rb_node *node;
+  for (node = rb_first(&mytree); node; node = rb_next(node))
+	printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring);
+
+Cached rbtrees
+--------------
+
+Computing the leftmost (smallest) node is quite a common task for binary
+search trees, such as for traversals or users relying on a the particular
+order for their own logic. To this end, users can use 'struct rb_root_cached'
+to optimize O(logN) rb_first() calls to a simple pointer fetch avoiding
+potentially expensive tree iterations. This is done at negligible runtime
+overhead for maintanence; albeit larger memory footprint.
+
+Similar to the rb_root structure, cached rbtrees are initialized to be
+empty via::
+
+  struct rb_root_cached mytree = RB_ROOT_CACHED;
+
+Cached rbtree is simply a regular rb_root with an extra pointer to cache the
+leftmost node. This allows rb_root_cached to exist wherever rb_root does,
+which permits augmented trees to be supported as well as only a few extra
+interfaces::
+
+  struct rb_node *rb_first_cached(struct rb_root_cached *tree);
+  void rb_insert_color_cached(struct rb_node *, struct rb_root_cached *, bool);
+  void rb_erase_cached(struct rb_node *node, struct rb_root_cached *);
+
+Both insert and erase calls have their respective counterpart of augmented
+trees::
+
+  void rb_insert_augmented_cached(struct rb_node *node, struct rb_root_cached *,
+				  bool, struct rb_augment_callbacks *);
+  void rb_erase_augmented_cached(struct rb_node *, struct rb_root_cached *,
+				 struct rb_augment_callbacks *);
+
+
+Support for Augmented rbtrees
+-----------------------------
+
+Augmented rbtree is an rbtree with "some" additional data stored in
+each node, where the additional data for node N must be a function of
+the contents of all nodes in the subtree rooted at N. This data can
+be used to augment some new functionality to rbtree. Augmented rbtree
+is an optional feature built on top of basic rbtree infrastructure.
+An rbtree user who wants this feature will have to call the augmentation
+functions with the user provided augmentation callback when inserting
+and erasing nodes.
+
+C files implementing augmented rbtree manipulation must include
+<linux/rbtree_augmented.h> instead of <linux/rbtree.h>. Note that
+linux/rbtree_augmented.h exposes some rbtree implementations details
+you are not expected to rely on; please stick to the documented APIs
+there and do not include <linux/rbtree_augmented.h> from header files
+either so as to minimize chances of your users accidentally relying on
+such implementation details.
+
+On insertion, the user must update the augmented information on the path
+leading to the inserted node, then call rb_link_node() as usual and
+rb_augment_inserted() instead of the usual rb_insert_color() call.
+If rb_augment_inserted() rebalances the rbtree, it will callback into
+a user provided function to update the augmented information on the
+affected subtrees.
+
+When erasing a node, the user must call rb_erase_augmented() instead of
+rb_erase(). rb_erase_augmented() calls back into user provided functions
+to updated the augmented information on affected subtrees.
+
+In both cases, the callbacks are provided through struct rb_augment_callbacks.
+3 callbacks must be defined:
+
+- A propagation callback, which updates the augmented value for a given
+  node and its ancestors, up to a given stop point (or NULL to update
+  all the way to the root).
+
+- A copy callback, which copies the augmented value for a given subtree
+  to a newly assigned subtree root.
+
+- A tree rotation callback, which copies the augmented value for a given
+  subtree to a newly assigned subtree root AND recomputes the augmented
+  information for the former subtree root.
+
+The compiled code for rb_erase_augmented() may inline the propagation and
+copy callbacks, which results in a large function, so each augmented rbtree
+user should have a single rb_erase_augmented() call site in order to limit
+compiled code size.
+
+
+Sample usage
+^^^^^^^^^^^^
+
+Interval tree is an example of augmented rb tree. Reference -
+"Introduction to Algorithms" by Cormen, Leiserson, Rivest and Stein.
+More details about interval trees:
+
+Classical rbtree has a single key and it cannot be directly used to store
+interval ranges like [lo:hi] and do a quick lookup for any overlap with a new
+lo:hi or to find whether there is an exact match for a new lo:hi.
+
+However, rbtree can be augmented to store such interval ranges in a structured
+way making it possible to do efficient lookup and exact match.
+
+This "extra information" stored in each node is the maximum hi
+(max_hi) value among all the nodes that are its descendants. This
+information can be maintained at each node just be looking at the node
+and its immediate children. And this will be used in O(log n) lookup
+for lowest match (lowest start address among all possible matches)
+with something like::
+
+  struct interval_tree_node *
+  interval_tree_first_match(struct rb_root *root,
+			    unsigned long start, unsigned long last)
+  {
+	struct interval_tree_node *node;
+
+	if (!root->rb_node)
+		return NULL;
+	node = rb_entry(root->rb_node, struct interval_tree_node, rb);
+
+	while (true) {
+		if (node->rb.rb_left) {
+			struct interval_tree_node *left =
+				rb_entry(node->rb.rb_left,
+					 struct interval_tree_node, rb);
+			if (left->__subtree_last >= start) {
+				/*
+				 * Some nodes in left subtree satisfy Cond2.
+				 * Iterate to find the leftmost such node N.
+				 * If it also satisfies Cond1, that's the match
+				 * we are looking for. Otherwise, there is no
+				 * matching interval as nodes to the right of N
+				 * can't satisfy Cond1 either.
+				 */
+				node = left;
+				continue;
+			}
+		}
+		if (node->start <= last) {		/* Cond1 */
+			if (node->last >= start)	/* Cond2 */
+				return node;	/* node is leftmost match */
+			if (node->rb.rb_right) {
+				node = rb_entry(node->rb.rb_right,
+					struct interval_tree_node, rb);
+				if (node->__subtree_last >= start)
+					continue;
+			}
+		}
+		return NULL;	/* No match */
+	}
+  }
+
+Insertion/removal are defined using the following augmented callbacks::
+
+  static inline unsigned long
+  compute_subtree_last(struct interval_tree_node *node)
+  {
+	unsigned long max = node->last, subtree_last;
+	if (node->rb.rb_left) {
+		subtree_last = rb_entry(node->rb.rb_left,
+			struct interval_tree_node, rb)->__subtree_last;
+		if (max < subtree_last)
+			max = subtree_last;
+	}
+	if (node->rb.rb_right) {
+		subtree_last = rb_entry(node->rb.rb_right,
+			struct interval_tree_node, rb)->__subtree_last;
+		if (max < subtree_last)
+			max = subtree_last;
+	}
+	return max;
+  }
+
+  static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
+  {
+	while (rb != stop) {
+		struct interval_tree_node *node =
+			rb_entry(rb, struct interval_tree_node, rb);
+		unsigned long subtree_last = compute_subtree_last(node);
+		if (node->__subtree_last == subtree_last)
+			break;
+		node->__subtree_last = subtree_last;
+		rb = rb_parent(&node->rb);
+	}
+  }
+
+  static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
+  {
+	struct interval_tree_node *old =
+		rb_entry(rb_old, struct interval_tree_node, rb);
+	struct interval_tree_node *new =
+		rb_entry(rb_new, struct interval_tree_node, rb);
+
+	new->__subtree_last = old->__subtree_last;
+  }
+
+  static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
+  {
+	struct interval_tree_node *old =
+		rb_entry(rb_old, struct interval_tree_node, rb);
+	struct interval_tree_node *new =
+		rb_entry(rb_new, struct interval_tree_node, rb);
+
+	new->__subtree_last = old->__subtree_last;
+	old->__subtree_last = compute_subtree_last(old);
+  }
+
+  static const struct rb_augment_callbacks augment_callbacks = {
+	augment_propagate, augment_copy, augment_rotate
+  };
+
+  void interval_tree_insert(struct interval_tree_node *node,
+			    struct rb_root *root)
+  {
+	struct rb_node **link = &root->rb_node, *rb_parent = NULL;
+	unsigned long start = node->start, last = node->last;
+	struct interval_tree_node *parent;
+
+	while (*link) {
+		rb_parent = *link;
+		parent = rb_entry(rb_parent, struct interval_tree_node, rb);
+		if (parent->__subtree_last < last)
+			parent->__subtree_last = last;
+		if (start < parent->start)
+			link = &parent->rb.rb_left;
+		else
+			link = &parent->rb.rb_right;
+	}
+
+	node->__subtree_last = last;
+	rb_link_node(&node->rb, rb_parent, link);
+	rb_insert_augmented(&node->rb, root, &augment_callbacks);
+  }
+
+  void interval_tree_remove(struct interval_tree_node *node,
+			    struct rb_root *root)
+  {
+	rb_erase_augmented(&node->rb, root, &augment_callbacks);
+  }
diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt
deleted file mode 100644
index 523d54b60087..000000000000
--- a/Documentation/rbtree.txt
+++ /dev/null
@@ -1,429 +0,0 @@
-=================================
-Red-black Trees (rbtree) in Linux
-=================================
-
-
-:Date: January 18, 2007
-:Author: Rob Landley <rob@landley.net>
-
-What are red-black trees, and what are they for?
-------------------------------------------------
-
-Red-black trees are a type of self-balancing binary search tree, used for
-storing sortable key/value data pairs.  This differs from radix trees (which
-are used to efficiently store sparse arrays and thus use long integer indexes
-to insert/access/delete nodes) and hash tables (which are not kept sorted to
-be easily traversed in order, and must be tuned for a specific size and
-hash function where rbtrees scale gracefully storing arbitrary keys).
-
-Red-black trees are similar to AVL trees, but provide faster real-time bounded
-worst case performance for insertion and deletion (at most two rotations and
-three rotations, respectively, to balance the tree), with slightly slower
-(but still O(log n)) lookup time.
-
-To quote Linux Weekly News:
-
-    There are a number of red-black trees in use in the kernel.
-    The deadline and CFQ I/O schedulers employ rbtrees to
-    track requests; the packet CD/DVD driver does the same.
-    The high-resolution timer code uses an rbtree to organize outstanding
-    timer requests.  The ext3 filesystem tracks directory entries in a
-    red-black tree.  Virtual memory areas (VMAs) are tracked with red-black
-    trees, as are epoll file descriptors, cryptographic keys, and network
-    packets in the "hierarchical token bucket" scheduler.
-
-This document covers use of the Linux rbtree implementation.  For more
-information on the nature and implementation of Red Black Trees,  see:
-
-  Linux Weekly News article on red-black trees
-    http://lwn.net/Articles/184495/
-
-  Wikipedia entry on red-black trees
-    http://en.wikipedia.org/wiki/Red-black_tree
-
-Linux implementation of red-black trees
----------------------------------------
-
-Linux's rbtree implementation lives in the file "lib/rbtree.c".  To use it,
-"#include <linux/rbtree.h>".
-
-The Linux rbtree implementation is optimized for speed, and thus has one
-less layer of indirection (and better cache locality) than more traditional
-tree implementations.  Instead of using pointers to separate rb_node and data
-structures, each instance of struct rb_node is embedded in the data structure
-it organizes.  And instead of using a comparison callback function pointer,
-users are expected to write their own tree search and insert functions
-which call the provided rbtree functions.  Locking is also left up to the
-user of the rbtree code.
-
-Creating a new rbtree
----------------------
-
-Data nodes in an rbtree tree are structures containing a struct rb_node member::
-
-  struct mytype {
-  	struct rb_node node;
-  	char *keystring;
-  };
-
-When dealing with a pointer to the embedded struct rb_node, the containing data
-structure may be accessed with the standard container_of() macro.  In addition,
-individual members may be accessed directly via rb_entry(node, type, member).
-
-At the root of each rbtree is an rb_root structure, which is initialized to be
-empty via:
-
-  struct rb_root mytree = RB_ROOT;
-
-Searching for a value in an rbtree
-----------------------------------
-
-Writing a search function for your tree is fairly straightforward: start at the
-root, compare each value, and follow the left or right branch as necessary.
-
-Example::
-
-  struct mytype *my_search(struct rb_root *root, char *string)
-  {
-  	struct rb_node *node = root->rb_node;
-
-  	while (node) {
-  		struct mytype *data = container_of(node, struct mytype, node);
-		int result;
-
-		result = strcmp(string, data->keystring);
-
-		if (result < 0)
-  			node = node->rb_left;
-		else if (result > 0)
-  			node = node->rb_right;
-		else
-  			return data;
-	}
-	return NULL;
-  }
-
-Inserting data into an rbtree
------------------------------
-
-Inserting data in the tree involves first searching for the place to insert the
-new node, then inserting the node and rebalancing ("recoloring") the tree.
-
-The search for insertion differs from the previous search by finding the
-location of the pointer on which to graft the new node.  The new node also
-needs a link to its parent node for rebalancing purposes.
-
-Example::
-
-  int my_insert(struct rb_root *root, struct mytype *data)
-  {
-  	struct rb_node **new = &(root->rb_node), *parent = NULL;
-
-  	/* Figure out where to put new node */
-  	while (*new) {
-  		struct mytype *this = container_of(*new, struct mytype, node);
-  		int result = strcmp(data->keystring, this->keystring);
-
-		parent = *new;
-  		if (result < 0)
-  			new = &((*new)->rb_left);
-  		else if (result > 0)
-  			new = &((*new)->rb_right);
-  		else
-  			return FALSE;
-  	}
-
-  	/* Add new node and rebalance tree. */
-  	rb_link_node(&data->node, parent, new);
-  	rb_insert_color(&data->node, root);
-
-	return TRUE;
-  }
-
-Removing or replacing existing data in an rbtree
-------------------------------------------------
-
-To remove an existing node from a tree, call::
-
-  void rb_erase(struct rb_node *victim, struct rb_root *tree);
-
-Example::
-
-  struct mytype *data = mysearch(&mytree, "walrus");
-
-  if (data) {
-  	rb_erase(&data->node, &mytree);
-  	myfree(data);
-  }
-
-To replace an existing node in a tree with a new one with the same key, call::
-
-  void rb_replace_node(struct rb_node *old, struct rb_node *new,
-  			struct rb_root *tree);
-
-Replacing a node this way does not re-sort the tree: If the new node doesn't
-have the same key as the old node, the rbtree will probably become corrupted.
-
-Iterating through the elements stored in an rbtree (in sort order)
-------------------------------------------------------------------
-
-Four functions are provided for iterating through an rbtree's contents in
-sorted order.  These work on arbitrary trees, and should not need to be
-modified or wrapped (except for locking purposes)::
-
-  struct rb_node *rb_first(struct rb_root *tree);
-  struct rb_node *rb_last(struct rb_root *tree);
-  struct rb_node *rb_next(struct rb_node *node);
-  struct rb_node *rb_prev(struct rb_node *node);
-
-To start iterating, call rb_first() or rb_last() with a pointer to the root
-of the tree, which will return a pointer to the node structure contained in
-the first or last element in the tree.  To continue, fetch the next or previous
-node by calling rb_next() or rb_prev() on the current node.  This will return
-NULL when there are no more nodes left.
-
-The iterator functions return a pointer to the embedded struct rb_node, from
-which the containing data structure may be accessed with the container_of()
-macro, and individual members may be accessed directly via
-rb_entry(node, type, member).
-
-Example::
-
-  struct rb_node *node;
-  for (node = rb_first(&mytree); node; node = rb_next(node))
-	printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring);
-
-Cached rbtrees
---------------
-
-Computing the leftmost (smallest) node is quite a common task for binary
-search trees, such as for traversals or users relying on a the particular
-order for their own logic. To this end, users can use 'struct rb_root_cached'
-to optimize O(logN) rb_first() calls to a simple pointer fetch avoiding
-potentially expensive tree iterations. This is done at negligible runtime
-overhead for maintanence; albeit larger memory footprint.
-
-Similar to the rb_root structure, cached rbtrees are initialized to be
-empty via::
-
-  struct rb_root_cached mytree = RB_ROOT_CACHED;
-
-Cached rbtree is simply a regular rb_root with an extra pointer to cache the
-leftmost node. This allows rb_root_cached to exist wherever rb_root does,
-which permits augmented trees to be supported as well as only a few extra
-interfaces::
-
-  struct rb_node *rb_first_cached(struct rb_root_cached *tree);
-  void rb_insert_color_cached(struct rb_node *, struct rb_root_cached *, bool);
-  void rb_erase_cached(struct rb_node *node, struct rb_root_cached *);
-
-Both insert and erase calls have their respective counterpart of augmented
-trees::
-
-  void rb_insert_augmented_cached(struct rb_node *node, struct rb_root_cached *,
-				  bool, struct rb_augment_callbacks *);
-  void rb_erase_augmented_cached(struct rb_node *, struct rb_root_cached *,
-				 struct rb_augment_callbacks *);
-
-
-Support for Augmented rbtrees
------------------------------
-
-Augmented rbtree is an rbtree with "some" additional data stored in
-each node, where the additional data for node N must be a function of
-the contents of all nodes in the subtree rooted at N. This data can
-be used to augment some new functionality to rbtree. Augmented rbtree
-is an optional feature built on top of basic rbtree infrastructure.
-An rbtree user who wants this feature will have to call the augmentation
-functions with the user provided augmentation callback when inserting
-and erasing nodes.
-
-C files implementing augmented rbtree manipulation must include
-<linux/rbtree_augmented.h> instead of <linux/rbtree.h>. Note that
-linux/rbtree_augmented.h exposes some rbtree implementations details
-you are not expected to rely on; please stick to the documented APIs
-there and do not include <linux/rbtree_augmented.h> from header files
-either so as to minimize chances of your users accidentally relying on
-such implementation details.
-
-On insertion, the user must update the augmented information on the path
-leading to the inserted node, then call rb_link_node() as usual and
-rb_augment_inserted() instead of the usual rb_insert_color() call.
-If rb_augment_inserted() rebalances the rbtree, it will callback into
-a user provided function to update the augmented information on the
-affected subtrees.
-
-When erasing a node, the user must call rb_erase_augmented() instead of
-rb_erase(). rb_erase_augmented() calls back into user provided functions
-to updated the augmented information on affected subtrees.
-
-In both cases, the callbacks are provided through struct rb_augment_callbacks.
-3 callbacks must be defined:
-
-- A propagation callback, which updates the augmented value for a given
-  node and its ancestors, up to a given stop point (or NULL to update
-  all the way to the root).
-
-- A copy callback, which copies the augmented value for a given subtree
-  to a newly assigned subtree root.
-
-- A tree rotation callback, which copies the augmented value for a given
-  subtree to a newly assigned subtree root AND recomputes the augmented
-  information for the former subtree root.
-
-The compiled code for rb_erase_augmented() may inline the propagation and
-copy callbacks, which results in a large function, so each augmented rbtree
-user should have a single rb_erase_augmented() call site in order to limit
-compiled code size.
-
-
-Sample usage
-^^^^^^^^^^^^
-
-Interval tree is an example of augmented rb tree. Reference -
-"Introduction to Algorithms" by Cormen, Leiserson, Rivest and Stein.
-More details about interval trees:
-
-Classical rbtree has a single key and it cannot be directly used to store
-interval ranges like [lo:hi] and do a quick lookup for any overlap with a new
-lo:hi or to find whether there is an exact match for a new lo:hi.
-
-However, rbtree can be augmented to store such interval ranges in a structured
-way making it possible to do efficient lookup and exact match.
-
-This "extra information" stored in each node is the maximum hi
-(max_hi) value among all the nodes that are its descendants. This
-information can be maintained at each node just be looking at the node
-and its immediate children. And this will be used in O(log n) lookup
-for lowest match (lowest start address among all possible matches)
-with something like::
-
-  struct interval_tree_node *
-  interval_tree_first_match(struct rb_root *root,
-			    unsigned long start, unsigned long last)
-  {
-	struct interval_tree_node *node;
-
-	if (!root->rb_node)
-		return NULL;
-	node = rb_entry(root->rb_node, struct interval_tree_node, rb);
-
-	while (true) {
-		if (node->rb.rb_left) {
-			struct interval_tree_node *left =
-				rb_entry(node->rb.rb_left,
-					 struct interval_tree_node, rb);
-			if (left->__subtree_last >= start) {
-				/*
-				 * Some nodes in left subtree satisfy Cond2.
-				 * Iterate to find the leftmost such node N.
-				 * If it also satisfies Cond1, that's the match
-				 * we are looking for. Otherwise, there is no
-				 * matching interval as nodes to the right of N
-				 * can't satisfy Cond1 either.
-				 */
-				node = left;
-				continue;
-			}
-		}
-		if (node->start <= last) {		/* Cond1 */
-			if (node->last >= start)	/* Cond2 */
-				return node;	/* node is leftmost match */
-			if (node->rb.rb_right) {
-				node = rb_entry(node->rb.rb_right,
-					struct interval_tree_node, rb);
-				if (node->__subtree_last >= start)
-					continue;
-			}
-		}
-		return NULL;	/* No match */
-	}
-  }
-
-Insertion/removal are defined using the following augmented callbacks::
-
-  static inline unsigned long
-  compute_subtree_last(struct interval_tree_node *node)
-  {
-	unsigned long max = node->last, subtree_last;
-	if (node->rb.rb_left) {
-		subtree_last = rb_entry(node->rb.rb_left,
-			struct interval_tree_node, rb)->__subtree_last;
-		if (max < subtree_last)
-			max = subtree_last;
-	}
-	if (node->rb.rb_right) {
-		subtree_last = rb_entry(node->rb.rb_right,
-			struct interval_tree_node, rb)->__subtree_last;
-		if (max < subtree_last)
-			max = subtree_last;
-	}
-	return max;
-  }
-
-  static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
-  {
-	while (rb != stop) {
-		struct interval_tree_node *node =
-			rb_entry(rb, struct interval_tree_node, rb);
-		unsigned long subtree_last = compute_subtree_last(node);
-		if (node->__subtree_last == subtree_last)
-			break;
-		node->__subtree_last = subtree_last;
-		rb = rb_parent(&node->rb);
-	}
-  }
-
-  static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
-  {
-	struct interval_tree_node *old =
-		rb_entry(rb_old, struct interval_tree_node, rb);
-	struct interval_tree_node *new =
-		rb_entry(rb_new, struct interval_tree_node, rb);
-
-	new->__subtree_last = old->__subtree_last;
-  }
-
-  static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
-  {
-	struct interval_tree_node *old =
-		rb_entry(rb_old, struct interval_tree_node, rb);
-	struct interval_tree_node *new =
-		rb_entry(rb_new, struct interval_tree_node, rb);
-
-	new->__subtree_last = old->__subtree_last;
-	old->__subtree_last = compute_subtree_last(old);
-  }
-
-  static const struct rb_augment_callbacks augment_callbacks = {
-	augment_propagate, augment_copy, augment_rotate
-  };
-
-  void interval_tree_insert(struct interval_tree_node *node,
-			    struct rb_root *root)
-  {
-	struct rb_node **link = &root->rb_node, *rb_parent = NULL;
-	unsigned long start = node->start, last = node->last;
-	struct interval_tree_node *parent;
-
-	while (*link) {
-		rb_parent = *link;
-		parent = rb_entry(rb_parent, struct interval_tree_node, rb);
-		if (parent->__subtree_last < last)
-			parent->__subtree_last = last;
-		if (start < parent->start)
-			link = &parent->rb.rb_left;
-		else
-			link = &parent->rb.rb_right;
-	}
-
-	node->__subtree_last = last;
-	rb_link_node(&node->rb, rb_parent, link);
-	rb_insert_augmented(&node->rb, root, &augment_callbacks);
-  }
-
-  void interval_tree_remove(struct interval_tree_node *node,
-			    struct rb_root *root)
-  {
-	rb_erase_augmented(&node->rb, root, &augment_callbacks);
-  }
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 1fd61a9af45c..d7db17996322 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -11,7 +11,7 @@
   I know it's not the cleaner way,  but in C (not in C++) to get
   performances and genericity...
 
-  See Documentation/rbtree.txt for documentation and samples.
+  See Documentation/core-api/rbtree.rst for documentation and samples.
 */
 
 #ifndef	_LINUX_RBTREE_H
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 724b0d036b57..d1c53e9d8c75 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -21,7 +21,7 @@
  * rb_insert_augmented() and rb_erase_augmented() are intended to be public.
  * The rest are implementation details you are not expected to depend on.
  *
- * See Documentation/rbtree.txt for documentation and samples.
+ * See Documentation/core-api/rbtree.rst for documentation and samples.
  */
 
 struct rb_augment_callbacks {
diff --git a/lib/Kconfig b/lib/Kconfig
index 5d53f9609c25..8005e36c3459 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -427,7 +427,7 @@ config INTERVAL_TREE
 
 	  See:
 
-		Documentation/rbtree.txt
+		Documentation/core-api/rbtree.rst
 
 	  for more information.
 
diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h
index e03b1ea23e0e..30dd21f976c3 100644
--- a/tools/include/linux/rbtree.h
+++ b/tools/include/linux/rbtree.h
@@ -11,7 +11,7 @@
   I know it's not the cleaner way,  but in C (not in C++) to get
   performances and genericity...
 
-  See Documentation/rbtree.txt for documentation and samples.
+  See Documentation/core-api/rbtree.rst for documentation and samples.
 */
 
 #ifndef __TOOLS_LINUX_PERF_RBTREE_H
diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h
index 381aa948610d..570bb9794421 100644
--- a/tools/include/linux/rbtree_augmented.h
+++ b/tools/include/linux/rbtree_augmented.h
@@ -23,7 +23,7 @@
  * rb_insert_augmented() and rb_erase_augmented() are intended to be public.
  * The rest are implementation details you are not expected to depend on.
  *
- * See Documentation/rbtree.txt for documentation and samples.
+ * See Documentation/core-api/rbtree.rst for documentation and samples.
  */
 
 struct rb_augment_callbacks {
-- 
cgit v1.2.3


From 5af438d0dcdbdbc1da4f3abcdf08fe9b1291ca02 Mon Sep 17 00:00:00 2001
From: Federico Vaga <federico.vaga@vaga.pv.it>
Date: Sun, 5 Apr 2020 23:06:47 +0200
Subject: doc:it_IT: add RISC-V maintenance guidelines

Add translation for the RISC-V maintenance guidelines as part
of the translation of things related to "process/"

Signed-off-by: Federico Vaga <federico.vaga@vaga.pv.it>
Link: https://lore.kernel.org/r/20200405210647.24991-1-federico.vaga@vaga.pv.it
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/translations/it_IT/process/index.rst |  1 +
 .../translations/it_IT/riscv/patch-acceptance.rst  | 40 ++++++++++++++++++++++
 2 files changed, 41 insertions(+)
 create mode 100644 Documentation/translations/it_IT/riscv/patch-acceptance.rst

diff --git a/Documentation/translations/it_IT/process/index.rst b/Documentation/translations/it_IT/process/index.rst
index 012de0f3154a..c4c867132c88 100644
--- a/Documentation/translations/it_IT/process/index.rst
+++ b/Documentation/translations/it_IT/process/index.rst
@@ -59,6 +59,7 @@ perché non si è trovato un posto migliore.
    magic-number
    volatile-considered-harmful
    clang-format
+   ../riscv/patch-acceptance
 
 .. only::  subproject and html
 
diff --git a/Documentation/translations/it_IT/riscv/patch-acceptance.rst b/Documentation/translations/it_IT/riscv/patch-acceptance.rst
new file mode 100644
index 000000000000..edf67252b3fb
--- /dev/null
+++ b/Documentation/translations/it_IT/riscv/patch-acceptance.rst
@@ -0,0 +1,40 @@
+.. include:: ../disclaimer-ita.rst
+
+:Original: :doc:`../../../riscv/patch-acceptance`
+:Translator: Federico Vaga <federico.vaga@vaga.pv.it>
+
+arch/riscv linee guida alla manutenzione per gli sviluppatori
+=============================================================
+
+Introduzione
+------------
+
+L'insieme di istruzioni RISC-V sono sviluppate in modo aperto: le
+bozze in fase di sviluppo sono disponibili a tutti per essere
+revisionate e per essere sperimentare nelle implementazioni.  Le bozze
+dei nuovi moduli o estensioni possono cambiare in fase di sviluppo - a
+volte in modo incompatibile rispetto a bozze precedenti.  Questa
+flessibilità può portare a dei problemi di manutenzioni per il
+supporto RISC-V nel kernel Linux. I manutentori Linux non amano
+l'abbandono del codice, e il processo di sviluppo del kernel
+preferisce codice ben revisionato e testato rispetto a quello
+sperimentale.  Desideriamo estendere questi stessi principi al codice
+relativo all'architettura RISC-V che verrà accettato per l'inclusione
+nel kernel.
+
+In aggiunta alla lista delle verifiche da fare prima di inviare una patch
+-------------------------------------------------------------------------
+
+Accetteremo le patch per un nuovo modulo o estensione se la fondazione
+RISC-V li classifica come "Frozen" o "Retified".  (Ovviamente, gli
+sviluppatori sono liberi di mantenere una copia del kernel Linux
+contenente il codice per una bozza di estensione).
+
+In aggiunta, la specifica RISC-V permette agli implementatori di
+creare le proprie estensioni.  Queste estensioni non passano
+attraverso il processo di revisione della fondazione RISC-V.  Per
+questo motivo, al fine di evitare complicazioni o problemi di
+prestazioni, accetteremo patch solo per quelle estensioni che sono
+state ufficialmente accettate dalla fondazione RISC-V.  (Ovviamente,
+gli implementatori sono liberi di mantenere una copia del kernel Linux
+contenente il codice per queste specifiche estensioni).
-- 
cgit v1.2.3


From 7b9121040d83eb9e332f7dbc140eca17643d3586 Mon Sep 17 00:00:00 2001
From: Adrian Freund <adrian@freund.io>
Date: Tue, 7 Apr 2020 15:05:25 +0200
Subject: Documentation: scheduler: fix outdated information on sched groups

The documentation claims that two sched groups must not overlap. This is
no longer true, as overlapping sched groups are used on NUMA systems.
This change has been introduced by commit e3589f6c81e47 and was
documented by an in-code comment in commit 35a566e6e8a18.

Signed-off-by: Adrian Freund <adrian@freund.io>
Link: https://lore.kernel.org/r/20200407130525.76663-1-adrian@freund.io
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/scheduler/sched-domains.rst | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/Documentation/scheduler/sched-domains.rst b/Documentation/scheduler/sched-domains.rst
index f7504226f445..5c4b7f4f0062 100644
--- a/Documentation/scheduler/sched-domains.rst
+++ b/Documentation/scheduler/sched-domains.rst
@@ -19,10 +19,12 @@ CPUs".
 Each scheduling domain must have one or more CPU groups (struct sched_group)
 which are organised as a circular one way linked list from the ->groups
 pointer. The union of cpumasks of these groups MUST be the same as the
-domain's span. The intersection of cpumasks from any two of these groups
-MUST be the empty set. The group pointed to by the ->groups pointer MUST
-contain the CPU to which the domain belongs. Groups may be shared among
-CPUs as they contain read only data after they have been set up.
+domain's span. The group pointed to by the ->groups pointer MUST contain the CPU
+to which the domain belongs. Groups may be shared among CPUs as they contain
+read only data after they have been set up. The intersection of cpumasks from
+any two of these groups may be non empty. If this is the case the SD_OVERLAP
+flag is set on the corresponding scheduling domain and its groups may not be
+shared between CPUs.
 
 Balancing within a sched domain occurs between groups. That is, each group
 is treated as one entity. The load of a group is defined as the sum of the
-- 
cgit v1.2.3


From 2d5694796b6b4afe188270388813c6c89e4d3f46 Mon Sep 17 00:00:00 2001
From: Flavio Suligoi <f.suligoi@asem.it>
Date: Tue, 28 Apr 2020 15:32:25 +0200
Subject: Documentation: x86: fix space instead of tab in uefi doc

Signed-off-by: Flavio Suligoi <f.suligoi@asem.it>
Link: https://lore.kernel.org/r/1588080745-21999-1-git-send-email-f.suligoi@asem.it
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/x86/x86_64/uefi.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/x86/x86_64/uefi.rst b/Documentation/x86/x86_64/uefi.rst
index 88c3ba32546f..3b894103a734 100644
--- a/Documentation/x86/x86_64/uefi.rst
+++ b/Documentation/x86/x86_64/uefi.rst
@@ -36,7 +36,7 @@ Mechanics
 
 	elilo bootloader with x86_64 support, elilo configuration file,
 	kernel image built in first step and corresponding
-	initrd. Instructions on building elilo	and its dependencies
+	initrd. Instructions on building elilo and its dependencies
 	can be found in the elilo sourceforge project.
 
 - Boot to EFI shell and invoke elilo choosing the kernel image built
-- 
cgit v1.2.3


From 08ce0c1e1116080c2fe13b2dc2a05b884d710ef7 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras.c@gmail.com>
Date: Mon, 27 Apr 2020 23:44:40 -0300
Subject: mailmap: Add entry for Leonardo Bras

Add an entry to connect my email addresses.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Link: https://lore.kernel.org/r/20200428024439.215806-1-leobras.c@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .mailmap | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.mailmap b/.mailmap
index db3754a41018..4600dcfed0d3 100644
--- a/.mailmap
+++ b/.mailmap
@@ -152,6 +152,7 @@ Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com>
 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
 Leon Romanovsky <leon@kernel.org> <leon@leon.nu>
 Leon Romanovsky <leon@kernel.org> <leonro@mellanox.com>
+Leonardo Bras <leobras.c@gmail.com> <leonardo@linux.ibm.com>
 Leonid I Ananiev <leonid.i.ananiev@intel.com>
 Linas Vepstas <linas@austin.ibm.com>
 Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
-- 
cgit v1.2.3


From b529c06f9dc74a7ad8cb875e840967737d0455f3 Mon Sep 17 00:00:00 2001
From: Juan Manuel Méndez Rey <vejeta@gmail.com>
Date: Sun, 26 Apr 2020 03:52:50 +0200
Subject: Update the documentation referencing Plan 9 from User Space.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The page originally referenced to checkout Plan9 application and libraries
have been missing for quite some time and the development is carried out
in github and documented on this new site.

Signed-off-by: Juan Manuel Méndez Rey <vejeta@gmail.com>
Link: https://lore.kernel.org/r/20200426015250.GA35090@camelot
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/9p.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/filesystems/9p.rst b/Documentation/filesystems/9p.rst
index 671fef39a802..2995279ddc24 100644
--- a/Documentation/filesystems/9p.rst
+++ b/Documentation/filesystems/9p.rst
@@ -192,4 +192,4 @@ For more information on the Plan 9 Operating System check out
 http://plan9.bell-labs.com/plan9
 
 For information on Plan 9 from User Space (Plan 9 applications and libraries
-ported to Linux/BSD/OSX/etc) check out http://swtch.com/plan9
+ported to Linux/BSD/OSX/etc) check out https://9fans.github.io/plan9port/
-- 
cgit v1.2.3


From 6feb76dbd14a3addce915f251580f7b89d68d356 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Sat, 25 Apr 2020 12:06:16 +0200
Subject: Documentation: zh_CN: convert to use i2c_new_client_device()

Move away from the deprecated API and advertise the new one.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Alex Shi <alex.shi@linux.alibaba.com>
Link: https://lore.kernel.org/r/20200425100616.3363-1-wsa+renesas@sang-engineering.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/translations/zh_CN/video4linux/v4l2-framework.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt b/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt
index 9c39ee58ea50..a96abcdec777 100644
--- a/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt
+++ b/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt
@@ -488,7 +488,7 @@ struct v4l2_subdev *sd = v4l2_i2c_new_subdev(v4l2_dev, adapter,
 
 这个函数会加载给定的模块（如果没有模块需要加载，可以为 NULL），
 并用给定的 i2c 适配器结构体指针（i2c_adapter）和 器件地址（chip/address）
-作为参数调用 i2c_new_device()。如果一切顺利，则就在 v4l2_device
+作为参数调用 i2c_new_client_device()。如果一切顺利，则就在 v4l2_device
 中注册了子设备。
 
 你也可以利用 v4l2_i2c_new_subdev()的最后一个参数，传递一个可能的
-- 
cgit v1.2.3


From 920af1ce1b6e713d187f178b3fe37d3e3b12381a Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Fri, 24 Apr 2020 17:35:15 +0200
Subject: docs: orangefs: fix pvfs2tab literal block

Following a merge fix-up, the literal block is introduced too early;
this patch merges the localhost mention with the introduction, fixing

	Documentation/filesystems/orangefs.rst:124: WARNING: Literal block expected; none found.

Signed-off-by: Stephen Kitt <steve@sk2.org>
Link: https://lore.kernel.org/r/20200424153515.134500-1-steve@sk2.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/orangefs.rst | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Documentation/filesystems/orangefs.rst b/Documentation/filesystems/orangefs.rst
index e41369709c5b..463e37694250 100644
--- a/Documentation/filesystems/orangefs.rst
+++ b/Documentation/filesystems/orangefs.rst
@@ -119,9 +119,7 @@ it comes to that question::
 
     /opt/ofs/bin/pvfs2-genconfig /etc/pvfs2.conf
 
-Create an /etc/pvfs2tab file::
-
-Localhost is fine for your pvfs2tab file:
+Create an /etc/pvfs2tab file (localhost is fine)::
 
     echo tcp://localhost:3334/orangefs /pvfsmnt pvfs2 defaults,noauto 0 0 > \
 	/etc/pvfs2tab
-- 
cgit v1.2.3


From 2ad9a844fc83282d7f4e71baa82ffa2c0ae88c63 Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Fri, 24 Apr 2020 17:26:37 +0200
Subject: docs: virt/kvm: close inline string literal

This fixes

	Documentation/virt/kvm/amd-memory-encryption.rst:76: WARNING: Inline literal start-string without end-string.

Fixes: 2da1ed62d55c ("KVM: SVM: document KVM_MEM_ENCRYPT_OP, let userspace detect if SEV is available")
Signed-off-by: Stephen Kitt <steve@sk2.org>
Link: https://lore.kernel.org/r/20200424152637.120876-1-steve@sk2.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/virt/kvm/amd-memory-encryption.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/amd-memory-encryption.rst b/Documentation/virt/kvm/amd-memory-encryption.rst
index c3129b9ba5cb..57c01f531e61 100644
--- a/Documentation/virt/kvm/amd-memory-encryption.rst
+++ b/Documentation/virt/kvm/amd-memory-encryption.rst
@@ -74,7 +74,7 @@ should point to a file descriptor that is opened on the ``/dev/sev``
 device, if needed (see individual commands).
 
 On output, ``error`` is zero on success, or an error code.  Error codes
-are defined in ``<linux/psp-dev.h>`.
+are defined in ``<linux/psp-dev.h>``.
 
 KVM implements the following commands to support common lifecycle events of SEV
 guests, such as launching, running, snapshotting, migrating and decommissioning.
-- 
cgit v1.2.3


From 6bc47621cbf357a9b7bab3890489804f38916a35 Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Thu, 23 Apr 2020 20:36:49 +0200
Subject: docs: sysctl/kernel: document cad_pid

Based on the implementation in kernel/sysctl.c (the proc_do_cad_pid()
function), kernel/reboot.c, and include/linux/sched/signal.h.

Signed-off-by: Stephen Kitt <steve@sk2.org>
Link: https://lore.kernel.org/r/20200423183651.15365-1-steve@sk2.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/sysctl/kernel.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 55b24eada13c..82bfd5892663 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -116,6 +116,16 @@ run. The statistics can be seen using ``bpftool``.
 = ===================================
 
 
+cad_pid
+=======
+
+This is the pid which will be signalled on reboot (notably, by
+Ctrl-Alt-Delete). Writing a value to this file which doesn't
+correspond to a running process will result in ``-ESRCH``.
+
+See also `ctrl-alt-del`_.
+
+
 cap_last_cap
 ============
 
-- 
cgit v1.2.3


From 1f5ea8720e8db71d768533bf61773ecdbf9fdcc2 Mon Sep 17 00:00:00 2001
From: Michal Suchanek <msuchanek@suse.de>
Date: Wed, 15 Apr 2020 23:16:50 +0200
Subject: doc: nvdimm: remove reference to non-existent CONFIG_NFIT_TEST

The test driver is in tools/testing/nvdimm and cannot be selected by a
config option.

Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Link: https://lore.kernel.org/r/20200415211654.10827-1-msuchanek@suse.de
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/nvdimm/nvdimm.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/driver-api/nvdimm/nvdimm.rst b/Documentation/driver-api/nvdimm/nvdimm.rst
index 08f855cbb4e6..79c0fd39f2af 100644
--- a/Documentation/driver-api/nvdimm/nvdimm.rst
+++ b/Documentation/driver-api/nvdimm/nvdimm.rst
@@ -278,8 +278,8 @@ by a region device with a dynamically assigned id (REGION0 - REGION5).
        be contiguous in DPA-space.
 
     This bus is provided by the kernel under the device
-    /sys/devices/platform/nfit_test.0 when CONFIG_NFIT_TEST is enabled and
-    the nfit_test.ko module is loaded.  This not only test LIBNVDIMM but the
+    /sys/devices/platform/nfit_test.0 when the nfit_test.ko module from
+    tools/testing/nvdimm is loaded.  This not only test LIBNVDIMM but the
     acpi_nfit.ko driver as well.
 
 
-- 
cgit v1.2.3


From a8b380c379efcade6008cb5e2359073b72e73ad8 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 21 Apr 2020 16:31:05 +0200
Subject: scripts: sphinx-pre-install: only ask to activate valid venvs

If a venv doesn't contain Sphinx, or has an older Sphinx
version, ignore it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/d11a00d88514e8a0357e1b0a05ebd518952a1d39.1587478901.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/sphinx-pre-install | 53 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index d4dfe1e59989..249edb3932af 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -231,6 +231,27 @@ sub get_sphinx_fname()
 	return "";
 }
 
+sub get_sphinx_version($)
+{
+	my $cmd = shift;
+	my $ver;
+
+	open IN, "$cmd --version 2>&1 |";
+	while (<IN>) {
+		if (m/^\s*sphinx-build\s+([\d\.]+)(\+\/[\da-f]+)?$/) {
+			$ver=$1;
+			last;
+		}
+		# Sphinx 1.2.x uses a different format
+		if (m/^\s*Sphinx.*\s+([\d\.]+)$/) {
+			$ver=$1;
+			last;
+		}
+	}
+	close IN;
+	return $ver;
+}
+
 sub check_sphinx()
 {
 	my $rec_version;
@@ -266,19 +287,8 @@ sub check_sphinx()
 		return;
 	}
 
-	open IN, "$sphinx --version 2>&1 |" or die "$sphinx returned an error";
-	while (<IN>) {
-		if (m/^\s*sphinx-build\s+([\d\.]+)(\+\/[\da-f]+)?$/) {
-			$cur_version=$1;
-			last;
-		}
-		# Sphinx 1.2.x uses a different format
-		if (m/^\s*Sphinx.*\s+([\d\.]+)$/) {
-			$cur_version=$1;
-			last;
-		}
-	}
-	close IN;
+	$cur_version = get_sphinx_version($sphinx);
+	die ("$sphinx returned an error") if (!$cur_version);
 
 	die "$sphinx didn't return its version" if (!$cur_version);
 
@@ -765,10 +775,21 @@ sub check_needs()
 		my @activates = glob "$ENV{'PWD'}/${virtenv_prefix}*/bin/activate";
 
 		@activates = sort {$b cmp $a} @activates;
+		my ($activate, $ver);
+		foreach my $f (@activates) {
+			$activate = $f;
+			next if ($activate lt $min_activate);
+
+			my $sphinx_cmd = $activate;
+			$sphinx_cmd =~ s/activate/sphinx-build/;
+			next if (! -f $sphinx_cmd);
 
-		if ($need_sphinx && scalar @activates > 0 && $activates[0] ge $min_activate) {
-			printf "\nNeed to activate a compatible Sphinx version on virtualenv with:\n";
-			printf "\t. $activates[0]\n";
+			$ver = get_sphinx_version($sphinx_cmd);
+			last if ($ver ge $min_version);
+		}
+		if ($need_sphinx && ($activate ne "")) {
+			printf "\nNeed to activate Sphinx (version $ver) on virtualenv with:\n";
+			printf "\t. $activate\n";
 			deactivate_help();
 			exit (1);
 		} else {
-- 
cgit v1.2.3


From 1ef70ced55976368275c468dfd436881d5580111 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 21 Apr 2020 16:31:06 +0200
Subject: scripts: sphinx-pre-install: change the warning for version < 2.4.4

As requested by Jon, change the version check, in order to not
emit a warning if version is >= 1.7.9, but below 2.4.4.

After this patch, if someone used an older version, it will
say:

	./scripts/sphinx-pre-install
	Sphinx version 1.7.9
	Note: It is recommended at least Sphinx version 2.4.4 if you need PDF support.
	Detected OS: Fedora release 31 (Thirty One).

	To upgrade Sphinx, use:

		/devel/v4l/docs/sphinx_1.7.9/bin/python3 -m venv sphinx_2.4.4
		. sphinx_2.4.4/bin/activate
		pip install -r ./Documentation/sphinx/requirements.txt

	If you want to exit the virtualenv, you can use:
		deactivate

	All optional dependencies are met.
	Needed package dependencies are met.

If Sphinx is not detected at all, it

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/79584d317ba16f5d4f37801c5ee57cf04085f962.1587478901.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/sphinx-pre-install | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index 249edb3932af..0d5684c08bbc 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -29,6 +29,8 @@ my $install = "";
 my $virtenv_dir = "";
 my $python_cmd = "";
 my $min_version;
+my $rec_version = "1.7.9";	# PDF won't build here
+my $min_pdf_version = "2.4.4";	# Min version where pdf builds
 
 #
 # Command line arguments
@@ -254,7 +256,7 @@ sub get_sphinx_version($)
 
 sub check_sphinx()
 {
-	my $rec_version;
+	my $default_version;
 	my $cur_version;
 
 	open IN, $conf or die "Can't open $conf";
@@ -271,15 +273,15 @@ sub check_sphinx()
 	open IN, $requirement_file or die "Can't open $requirement_file";
 	while (<IN>) {
 		if (m/^\s*Sphinx\s*==\s*([\d\.]+)$/) {
-			$rec_version=$1;
+			$default_version=$1;
 			last;
 		}
 	}
 	close IN;
 
-	die "Can't get recommended sphinx version from $requirement_file" if (!$min_version);
+	die "Can't get default sphinx version from $requirement_file" if (!$default_version);
 
-	$virtenv_dir = $virtenv_prefix . $rec_version;
+	$virtenv_dir = $virtenv_prefix . $default_version;
 
 	my $sphinx = get_sphinx_fname();
 	if ($sphinx eq "") {
@@ -294,7 +296,7 @@ sub check_sphinx()
 
 	if ($cur_version lt $min_version) {
 		printf "ERROR: Sphinx version is %s. It should be >= %s (recommended >= %s)\n",
-		       $cur_version, $min_version, $rec_version;;
+		       $cur_version, $min_version, $default_version;
 		$need_sphinx = 1;
 		return;
 	}
@@ -302,6 +304,13 @@ sub check_sphinx()
 	if ($cur_version lt $rec_version) {
 		printf "Sphinx version %s\n", $cur_version;
 		print "Warning: It is recommended at least Sphinx version $rec_version.\n";
+		print "         If you want pdf, you need at least $min_pdf_version.\n";
+		$rec_sphinx_upgrade = 1;
+		return;
+	}
+	if ($cur_version lt $min_pdf_version) {
+		printf "Sphinx version %s\n", $cur_version;
+		print "Note: It is recommended at least Sphinx version $min_pdf_version if you need PDF support.\n";
 		$rec_sphinx_upgrade = 1;
 		return;
 	}
-- 
cgit v1.2.3


From 2834a7412bb1a74722231d7198518f9456279156 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 21 Apr 2020 16:31:07 +0200
Subject: scripts: sphinx-pre-install: change recommendation text if venv
 exists

If one is running a Sphinx version older than what's recommended,
but there's already a newer working virtual env, change the
text, as it is just a matter of switching to the new venv, instead
of creating a new one from scratch.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/bcf79d0399a1c3444ca938dcdce599c3273980ab.1587478901.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/sphinx-pre-install | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index 0d5684c08bbc..938b65d40fc8 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -29,6 +29,7 @@ my $install = "";
 my $virtenv_dir = "";
 my $python_cmd = "";
 my $min_version;
+my $cur_version;
 my $rec_version = "1.7.9";	# PDF won't build here
 my $min_pdf_version = "2.4.4";	# Min version where pdf builds
 
@@ -257,7 +258,6 @@ sub get_sphinx_version($)
 sub check_sphinx()
 {
 	my $default_version;
-	my $cur_version;
 
 	open IN, $conf or die "Can't open $conf";
 	while (<IN>) {
@@ -703,8 +703,6 @@ sub check_needs()
 		print "Unknown OS\n\n";
 	}
 
-	print "To upgrade Sphinx, use:\n\n" if ($rec_sphinx_upgrade);
-
 	# Check python command line, trying first python3
 	$python_cmd = findprog("python3");
 	$python_cmd = check_program("python", 0) if (!$python_cmd);
@@ -786,24 +784,36 @@ sub check_needs()
 		@activates = sort {$b cmp $a} @activates;
 		my ($activate, $ver);
 		foreach my $f (@activates) {
-			$activate = $f;
-			next if ($activate lt $min_activate);
+			next if ($f lt $min_activate);
 
-			my $sphinx_cmd = $activate;
+			my $sphinx_cmd = $f;
 			$sphinx_cmd =~ s/activate/sphinx-build/;
 			next if (! -f $sphinx_cmd);
 
 			$ver = get_sphinx_version($sphinx_cmd);
-			last if ($ver ge $min_version);
+			if ($need_sphinx && ($ver ge $min_version)) {
+				$activate = $f;
+				last;
+			} elsif ($ver gt $cur_version) {
+				$activate = $f;
+				last;
+			}
 		}
-		if ($need_sphinx && ($activate ne "")) {
-			printf "\nNeed to activate Sphinx (version $ver) on virtualenv with:\n";
-			printf "\t. $activate\n";
-			deactivate_help();
-			exit (1);
+		if ($activate ne "") {
+			if ($need_sphinx) {
+				printf "\nNeed to activate Sphinx (version $ver) on virtualenv with:\n";
+				printf "\t. $activate\n";
+				deactivate_help();
+				exit (1);
+			} else {
+				printf "\nYou may also use a newer Sphinx (version $ver) with:\n";
+				printf "\tdeactivate && . $activate\n";
+			}
 		} else {
 			my $rec_activate = "$virtenv_dir/bin/activate";
 
+			print "To upgrade Sphinx, use:\n\n" if ($rec_sphinx_upgrade);
+
 			if ($need_venv) {
 				printf "\t$python_cmd -m venv $virtenv_dir\n";
 			} else {
-- 
cgit v1.2.3


From 412b09ddadd34e9be0d9a26e6de252361fafc237 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 21 Apr 2020 16:31:08 +0200
Subject: scripts: sphinx-pre-install: fix a bug when using with venv

When python3 creates a venv, it adds python into it!

This causes any upgrade recommendation to look like this:

	/devel/v4l/docs/sphinx_1.7.9/bin/python3 -m venv sphinx_2.4.4
	. sphinx_2.4.4/bin/activate
	pip install -r ./Documentation/sphinx/requirements.txt

With is wrong (and it may not work). So, when recomending
an upgrade, exclude the venv dir from the search path, and
get the system's python.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/aa622ff71bebf6960fc0262fb90e7ebc7a999a02.1587478901.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/sphinx-pre-install | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index 938b65d40fc8..987aebf7e3a0 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -148,6 +148,24 @@ sub findprog($)
 	}
 }
 
+sub find_python_no_venv()
+{
+	my $prog = shift;
+
+	my $cur_dir = qx(pwd);
+	$cur_dir =~ s/\s+$//;
+
+	foreach my $dir (split(/:/, $ENV{PATH})) {
+		next if ($dir =~ m,($cur_dir)/sphinx,);
+		return "$dir/python3" if(-x "$dir/python3");
+	}
+	foreach my $dir (split(/:/, $ENV{PATH})) {
+		next if ($dir =~ m,($cur_dir)/sphinx,);
+		return "$dir/python" if(-x "$dir/python");
+	}
+	return "python";
+}
+
 sub check_program($$)
 {
 	my $prog = shift;
@@ -814,6 +832,8 @@ sub check_needs()
 
 			print "To upgrade Sphinx, use:\n\n" if ($rec_sphinx_upgrade);
 
+			$python_cmd = find_python_no_venv();
+
 			if ($need_venv) {
 				printf "\t$python_cmd -m venv $virtenv_dir\n";
 			} else {
-- 
cgit v1.2.3


From ec43a27fffd025f718428d9f4f8df22fa765405f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 21 Apr 2020 18:27:58 +0200
Subject: scripts: sphinx-pre-install: change the output order

When the script detects the need for an upgrade, it will
print either a warning or a note.

Let's change a little bit the order where messages will be
displayed, in order to make easier for the user to identify
the more important messages.

It should now be like this:

	Detected OS: Fedora release 31 (Thirty One).
	Sphinx version: 1.7.9

	Note: It is recommended at least Sphinx version 2.4.4 if you need PDF support.
	To upgrade Sphinx, use:

		/usr/bin/python3 -m venv sphinx_2.4.4
		. sphinx_2.4.4/bin/activate
		pip install -r ./Documentation/sphinx/requirements.txt

	If you want to exit the virtualenv, you can use:
		deactivate

All optional dependencies are met.
Needed package dependencies are met.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/20200421182758.04e0a53e@coco.lan
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/sphinx-pre-install | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index 987aebf7e3a0..c680c3efb176 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -320,15 +320,10 @@ sub check_sphinx()
 	}
 
 	if ($cur_version lt $rec_version) {
-		printf "Sphinx version %s\n", $cur_version;
-		print "Warning: It is recommended at least Sphinx version $rec_version.\n";
-		print "         If you want pdf, you need at least $min_pdf_version.\n";
 		$rec_sphinx_upgrade = 1;
 		return;
 	}
 	if ($cur_version lt $min_pdf_version) {
-		printf "Sphinx version %s\n", $cur_version;
-		print "Note: It is recommended at least Sphinx version $min_pdf_version if you need PDF support.\n";
 		$rec_sphinx_upgrade = 1;
 		return;
 	}
@@ -716,10 +711,11 @@ sub check_needs()
 	check_sphinx();
 
 	if ($system_release) {
-		print "Detected OS: $system_release.\n\n";
+		print "Detected OS: $system_release.\n";
 	} else {
-		print "Unknown OS\n\n";
+		print "Unknown OS\n";
 	}
+	printf "Sphinx version: %s\n\n", $cur_version if ($cur_version);
 
 	# Check python command line, trying first python3
 	$python_cmd = findprog("python3");
@@ -799,6 +795,13 @@ sub check_needs()
 		my $min_activate = "$ENV{'PWD'}/${virtenv_prefix}${min_version}/bin/activate";
 		my @activates = glob "$ENV{'PWD'}/${virtenv_prefix}*/bin/activate";
 
+		if ($cur_version lt $rec_version) {
+			print "Warning: It is recommended at least Sphinx version $rec_version.\n";
+			print "         If you want pdf, you need at least $min_pdf_version.\n";
+		}
+		if ($cur_version lt $min_pdf_version) {
+			print "Note: It is recommended at least Sphinx version $min_pdf_version if you need PDF support.\n";
+		}
 		@activates = sort {$b cmp $a} @activates;
 		my ($activate, $ver);
 		foreach my $f (@activates) {
-- 
cgit v1.2.3


From 67145c23e70b066443a3c0736e74aa2342a013fd Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:16:53 +0200
Subject: docs: filesystems: convert caching/object.txt to ReST

- Add a SPDX header;
- Adjust document and section titles;
- Comment out text ToC for html/pdf output;
- Some whitespace fixes and new line breaks;
- Adjust the events list to make them look better for html output;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/49026a8ea7e714c2e0f003aa26b975b1025476b7.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/caching/fscache.txt |   2 +-
 Documentation/filesystems/caching/index.rst   |   9 +
 Documentation/filesystems/caching/object.rst  | 313 +++++++++++++++++++++++++
 Documentation/filesystems/caching/object.txt  | 320 --------------------------
 Documentation/filesystems/index.rst           |   2 +
 fs/fscache/object.c                           |   2 +-
 6 files changed, 326 insertions(+), 322 deletions(-)
 create mode 100644 Documentation/filesystems/caching/index.rst
 create mode 100644 Documentation/filesystems/caching/object.rst
 delete mode 100644 Documentation/filesystems/caching/object.txt

diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt
index 50f0a5757f48..071ff50a774d 100644
--- a/Documentation/filesystems/caching/fscache.txt
+++ b/Documentation/filesystems/caching/fscache.txt
@@ -191,7 +191,7 @@ The cache backend API to FS-Cache can be found in:
 A description of the internal representations and object state machine can be
 found in:
 
-	Documentation/filesystems/caching/object.txt
+	Documentation/filesystems/caching/object.rst
 
 
 =======================
diff --git a/Documentation/filesystems/caching/index.rst b/Documentation/filesystems/caching/index.rst
new file mode 100644
index 000000000000..e5ec95ff0be2
--- /dev/null
+++ b/Documentation/filesystems/caching/index.rst
@@ -0,0 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Filesystem Caching
+==================
+
+.. toctree::
+   :maxdepth: 2
+
+   object
diff --git a/Documentation/filesystems/caching/object.rst b/Documentation/filesystems/caching/object.rst
new file mode 100644
index 000000000000..ce0e043ccd33
--- /dev/null
+++ b/Documentation/filesystems/caching/object.rst
@@ -0,0 +1,313 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================================================
+In-Kernel Cache Object Representation and Management
+====================================================
+
+By: David Howells <dhowells@redhat.com>
+
+.. Contents:
+
+ (*) Representation
+
+ (*) Object management state machine.
+
+     - Provision of cpu time.
+     - Locking simplification.
+
+ (*) The set of states.
+
+ (*) The set of events.
+
+
+Representation
+==============
+
+FS-Cache maintains an in-kernel representation of each object that a netfs is
+currently interested in.  Such objects are represented by the fscache_cookie
+struct and are referred to as cookies.
+
+FS-Cache also maintains a separate in-kernel representation of the objects that
+a cache backend is currently actively caching.  Such objects are represented by
+the fscache_object struct.  The cache backends allocate these upon request, and
+are expected to embed them in their own representations.  These are referred to
+as objects.
+
+There is a 1:N relationship between cookies and objects.  A cookie may be
+represented by multiple objects - an index may exist in more than one cache -
+or even by no objects (it may not be cached).
+
+Furthermore, both cookies and objects are hierarchical.  The two hierarchies
+correspond, but the cookies tree is a superset of the union of the object trees
+of multiple caches::
+
+	    NETFS INDEX TREE               :      CACHE 1     :      CACHE 2
+	                                   :                  :
+	                                   :   +-----------+  :
+	                          +----------->|  IObject  |  :
+	      +-----------+       |        :   +-----------+  :
+	      |  ICookie  |-------+        :         |        :
+	      +-----------+       |        :         |        :   +-----------+
+	            |             +------------------------------>|  IObject  |
+	            |                      :         |        :   +-----------+
+	            |                      :         V        :         |
+	            |                      :   +-----------+  :         |
+	            V             +----------->|  IObject  |  :         |
+	      +-----------+       |        :   +-----------+  :         |
+	      |  ICookie  |-------+        :         |        :         V
+	      +-----------+       |        :         |        :   +-----------+
+	            |             +------------------------------>|  IObject  |
+	      +-----+-----+                :         |        :   +-----------+
+	      |           |                :         |        :         |
+	      V           |                :         V        :         |
+	+-----------+     |                :   +-----------+  :         |
+	|  ICookie  |------------------------->|  IObject  |  :         |
+	+-----------+     |                :   +-----------+  :         |
+	      |           V                :         |        :         V
+	      |     +-----------+          :         |        :   +-----------+
+	      |     |  ICookie  |-------------------------------->|  IObject  |
+	      |     +-----------+          :         |        :   +-----------+
+	      V           |                :         V        :         |
+	+-----------+     |                :   +-----------+  :         |
+	|  DCookie  |------------------------->|  DObject  |  :         |
+	+-----------+     |                :   +-----------+  :         |
+	                  |                :                  :         |
+	          +-------+-------+        :                  :         |
+	          |               |        :                  :         |
+	          V               V        :                  :         V
+	    +-----------+   +-----------+  :                  :   +-----------+
+	    |  DCookie  |   |  DCookie  |------------------------>|  DObject  |
+	    +-----------+   +-----------+  :                  :   +-----------+
+	                                   :                  :
+
+In the above illustration, ICookie and IObject represent indices and DCookie
+and DObject represent data storage objects.  Indices may have representation in
+multiple caches, but currently, non-index objects may not.  Objects of any type
+may also be entirely unrepresented.
+
+As far as the netfs API goes, the netfs is only actually permitted to see
+pointers to the cookies.  The cookies themselves and any objects attached to
+those cookies are hidden from it.
+
+
+Object Management State Machine
+===============================
+
+Within FS-Cache, each active object is managed by its own individual state
+machine.  The state for an object is kept in the fscache_object struct, in
+object->state.  A cookie may point to a set of objects that are in different
+states.
+
+Each state has an action associated with it that is invoked when the machine
+wakes up in that state.  There are four logical sets of states:
+
+ (1) Preparation: states that wait for the parent objects to become ready.  The
+     representations are hierarchical, and it is expected that an object must
+     be created or accessed with respect to its parent object.
+
+ (2) Initialisation: states that perform lookups in the cache and validate
+     what's found and that create on disk any missing metadata.
+
+ (3) Normal running: states that allow netfs operations on objects to proceed
+     and that update the state of objects.
+
+ (4) Termination: states that detach objects from their netfs cookies, that
+     delete objects from disk, that handle disk and system errors and that free
+     up in-memory resources.
+
+
+In most cases, transitioning between states is in response to signalled events.
+When a state has finished processing, it will usually set the mask of events in
+which it is interested (object->event_mask) and relinquish the worker thread.
+Then when an event is raised (by calling fscache_raise_event()), if the event
+is not masked, the object will be queued for processing (by calling
+fscache_enqueue_object()).
+
+
+Provision of CPU Time
+---------------------
+
+The work to be done by the various states was given CPU time by the threads of
+the slow work facility.  This was used in preference to the workqueue facility
+because:
+
+ (1) Threads may be completely occupied for very long periods of time by a
+     particular work item.  These state actions may be doing sequences of
+     synchronous, journalled disk accesses (lookup, mkdir, create, setxattr,
+     getxattr, truncate, unlink, rmdir, rename).
+
+ (2) Threads may do little actual work, but may rather spend a lot of time
+     sleeping on I/O.  This means that single-threaded and 1-per-CPU-threaded
+     workqueues don't necessarily have the right numbers of threads.
+
+
+Locking Simplification
+----------------------
+
+Because only one worker thread may be operating on any particular object's
+state machine at once, this simplifies the locking, particularly with respect
+to disconnecting the netfs's representation of a cache object (fscache_cookie)
+from the cache backend's representation (fscache_object) - which may be
+requested from either end.
+
+
+The Set of States
+=================
+
+The object state machine has a set of states that it can be in.  There are
+preparation states in which the object sets itself up and waits for its parent
+object to transit to a state that allows access to its children:
+
+ (1) State FSCACHE_OBJECT_INIT.
+
+     Initialise the object and wait for the parent object to become active.  In
+     the cache, it is expected that it will not be possible to look an object
+     up from the parent object, until that parent object itself has been looked
+     up.
+
+There are initialisation states in which the object sets itself up and accesses
+disk for the object metadata:
+
+ (2) State FSCACHE_OBJECT_LOOKING_UP.
+
+     Look up the object on disk, using the parent as a starting point.
+     FS-Cache expects the cache backend to probe the cache to see whether this
+     object is represented there, and if it is, to see if it's valid (coherency
+     management).
+
+     The cache should call fscache_object_lookup_negative() to indicate lookup
+     failure for whatever reason, and should call fscache_obtained_object() to
+     indicate success.
+
+     At the completion of lookup, FS-Cache will let the netfs go ahead with
+     read operations, no matter whether the file is yet cached.  If not yet
+     cached, read operations will be immediately rejected with ENODATA until
+     the first known page is uncached - as to that point there can be no data
+     to be read out of the cache for that file that isn't currently also held
+     in the pagecache.
+
+ (3) State FSCACHE_OBJECT_CREATING.
+
+     Create an object on disk, using the parent as a starting point.  This
+     happens if the lookup failed to find the object, or if the object's
+     coherency data indicated what's on disk is out of date.  In this state,
+     FS-Cache expects the cache to create
+
+     The cache should call fscache_obtained_object() if creation completes
+     successfully, fscache_object_lookup_negative() otherwise.
+
+     At the completion of creation, FS-Cache will start processing write
+     operations the netfs has queued for an object.  If creation failed, the
+     write ops will be transparently discarded, and nothing recorded in the
+     cache.
+
+There are some normal running states in which the object spends its time
+servicing netfs requests:
+
+ (4) State FSCACHE_OBJECT_AVAILABLE.
+
+     A transient state in which pending operations are started, child objects
+     are permitted to advance from FSCACHE_OBJECT_INIT state, and temporary
+     lookup data is freed.
+
+ (5) State FSCACHE_OBJECT_ACTIVE.
+
+     The normal running state.  In this state, requests the netfs makes will be
+     passed on to the cache.
+
+ (6) State FSCACHE_OBJECT_INVALIDATING.
+
+     The object is undergoing invalidation.  When the state comes here, it
+     discards all pending read, write and attribute change operations as it is
+     going to clear out the cache entirely and reinitialise it.  It will then
+     continue to the FSCACHE_OBJECT_UPDATING state.
+
+ (7) State FSCACHE_OBJECT_UPDATING.
+
+     The state machine comes here to update the object in the cache from the
+     netfs's records.  This involves updating the auxiliary data that is used
+     to maintain coherency.
+
+And there are terminal states in which an object cleans itself up, deallocates
+memory and potentially deletes stuff from disk:
+
+ (8) State FSCACHE_OBJECT_LC_DYING.
+
+     The object comes here if it is dying because of a lookup or creation
+     error.  This would be due to a disk error or system error of some sort.
+     Temporary data is cleaned up, and the parent is released.
+
+ (9) State FSCACHE_OBJECT_DYING.
+
+     The object comes here if it is dying due to an error, because its parent
+     cookie has been relinquished by the netfs or because the cache is being
+     withdrawn.
+
+     Any child objects waiting on this one are given CPU time so that they too
+     can destroy themselves.  This object waits for all its children to go away
+     before advancing to the next state.
+
+(10) State FSCACHE_OBJECT_ABORT_INIT.
+
+     The object comes to this state if it was waiting on its parent in
+     FSCACHE_OBJECT_INIT, but its parent died.  The object will destroy itself
+     so that the parent may proceed from the FSCACHE_OBJECT_DYING state.
+
+(11) State FSCACHE_OBJECT_RELEASING.
+(12) State FSCACHE_OBJECT_RECYCLING.
+
+     The object comes to one of these two states when dying once it is rid of
+     all its children, if it is dying because the netfs relinquished its
+     cookie.  In the first state, the cached data is expected to persist, and
+     in the second it will be deleted.
+
+(13) State FSCACHE_OBJECT_WITHDRAWING.
+
+     The object transits to this state if the cache decides it wants to
+     withdraw the object from service, perhaps to make space, but also due to
+     error or just because the whole cache is being withdrawn.
+
+(14) State FSCACHE_OBJECT_DEAD.
+
+     The object transits to this state when the in-memory object record is
+     ready to be deleted.  The object processor shouldn't ever see an object in
+     this state.
+
+
+The Set of Events
+-----------------
+
+There are a number of events that can be raised to an object state machine:
+
+ FSCACHE_OBJECT_EV_UPDATE
+     The netfs requested that an object be updated.  The state machine will ask
+     the cache backend to update the object, and the cache backend will ask the
+     netfs for details of the change through its cookie definition ops.
+
+ FSCACHE_OBJECT_EV_CLEARED
+     This is signalled in two circumstances:
+
+     (a) when an object's last child object is dropped and
+
+     (b) when the last operation outstanding on an object is completed.
+
+     This is used to proceed from the dying state.
+
+ FSCACHE_OBJECT_EV_ERROR
+     This is signalled when an I/O error occurs during the processing of some
+     object.
+
+ FSCACHE_OBJECT_EV_RELEASE, FSCACHE_OBJECT_EV_RETIRE
+     These are signalled when the netfs relinquishes a cookie it was using.
+     The event selected depends on whether the netfs asks for the backing
+     object to be retired (deleted) or retained.
+
+ FSCACHE_OBJECT_EV_WITHDRAW
+     This is signalled when the cache backend wants to withdraw an object.
+     This means that the object will have to be detached from the netfs's
+     cookie.
+
+Because the withdrawing releasing/retiring events are all handled by the object
+state machine, it doesn't matter if there's a collision with both ends trying
+to sever the connection at the same time.  The state machine can just pick
+which one it wants to honour, and that effects the other.
diff --git a/Documentation/filesystems/caching/object.txt b/Documentation/filesystems/caching/object.txt
deleted file mode 100644
index 100ff41127e4..000000000000
--- a/Documentation/filesystems/caching/object.txt
+++ /dev/null
@@ -1,320 +0,0 @@
-	     ====================================================
-	     IN-KERNEL CACHE OBJECT REPRESENTATION AND MANAGEMENT
-	     ====================================================
-
-By: David Howells <dhowells@redhat.com>
-
-Contents:
-
- (*) Representation
-
- (*) Object management state machine.
-
-     - Provision of cpu time.
-     - Locking simplification.
-
- (*) The set of states.
-
- (*) The set of events.
-
-
-==============
-REPRESENTATION
-==============
-
-FS-Cache maintains an in-kernel representation of each object that a netfs is
-currently interested in.  Such objects are represented by the fscache_cookie
-struct and are referred to as cookies.
-
-FS-Cache also maintains a separate in-kernel representation of the objects that
-a cache backend is currently actively caching.  Such objects are represented by
-the fscache_object struct.  The cache backends allocate these upon request, and
-are expected to embed them in their own representations.  These are referred to
-as objects.
-
-There is a 1:N relationship between cookies and objects.  A cookie may be
-represented by multiple objects - an index may exist in more than one cache -
-or even by no objects (it may not be cached).
-
-Furthermore, both cookies and objects are hierarchical.  The two hierarchies
-correspond, but the cookies tree is a superset of the union of the object trees
-of multiple caches:
-
-	    NETFS INDEX TREE               :      CACHE 1     :      CACHE 2
-	                                   :                  :
-	                                   :   +-----------+  :
-	                          +----------->|  IObject  |  :
-	      +-----------+       |        :   +-----------+  :
-	      |  ICookie  |-------+        :         |        :
-	      +-----------+       |        :         |        :   +-----------+
-	            |             +------------------------------>|  IObject  |
-	            |                      :         |        :   +-----------+
-	            |                      :         V        :         |
-	            |                      :   +-----------+  :         |
-	            V             +----------->|  IObject  |  :         |
-	      +-----------+       |        :   +-----------+  :         |
-	      |  ICookie  |-------+        :         |        :         V
-	      +-----------+       |        :         |        :   +-----------+
-	            |             +------------------------------>|  IObject  |
-	      +-----+-----+                :         |        :   +-----------+
-	      |           |                :         |        :         |
-	      V           |                :         V        :         |
-	+-----------+     |                :   +-----------+  :         |
-	|  ICookie  |------------------------->|  IObject  |  :         |
-	+-----------+     |                :   +-----------+  :         |
-	      |           V                :         |        :         V
-	      |     +-----------+          :         |        :   +-----------+
-	      |     |  ICookie  |-------------------------------->|  IObject  |
-	      |     +-----------+          :         |        :   +-----------+
-	      V           |                :         V        :         |
-	+-----------+     |                :   +-----------+  :         |
-	|  DCookie  |------------------------->|  DObject  |  :         |
-	+-----------+     |                :   +-----------+  :         |
-	                  |                :                  :         |
-	          +-------+-------+        :                  :         |
-	          |               |        :                  :         |
-	          V               V        :                  :         V
-	    +-----------+   +-----------+  :                  :   +-----------+
-	    |  DCookie  |   |  DCookie  |------------------------>|  DObject  |
-	    +-----------+   +-----------+  :                  :   +-----------+
-	                                   :                  :
-
-In the above illustration, ICookie and IObject represent indices and DCookie
-and DObject represent data storage objects.  Indices may have representation in
-multiple caches, but currently, non-index objects may not.  Objects of any type
-may also be entirely unrepresented.
-
-As far as the netfs API goes, the netfs is only actually permitted to see
-pointers to the cookies.  The cookies themselves and any objects attached to
-those cookies are hidden from it.
-
-
-===============================
-OBJECT MANAGEMENT STATE MACHINE
-===============================
-
-Within FS-Cache, each active object is managed by its own individual state
-machine.  The state for an object is kept in the fscache_object struct, in
-object->state.  A cookie may point to a set of objects that are in different
-states.
-
-Each state has an action associated with it that is invoked when the machine
-wakes up in that state.  There are four logical sets of states:
-
- (1) Preparation: states that wait for the parent objects to become ready.  The
-     representations are hierarchical, and it is expected that an object must
-     be created or accessed with respect to its parent object.
-
- (2) Initialisation: states that perform lookups in the cache and validate
-     what's found and that create on disk any missing metadata.
-
- (3) Normal running: states that allow netfs operations on objects to proceed
-     and that update the state of objects.
-
- (4) Termination: states that detach objects from their netfs cookies, that
-     delete objects from disk, that handle disk and system errors and that free
-     up in-memory resources.
-
-
-In most cases, transitioning between states is in response to signalled events.
-When a state has finished processing, it will usually set the mask of events in
-which it is interested (object->event_mask) and relinquish the worker thread.
-Then when an event is raised (by calling fscache_raise_event()), if the event
-is not masked, the object will be queued for processing (by calling
-fscache_enqueue_object()).
-
-
-PROVISION OF CPU TIME
----------------------
-
-The work to be done by the various states was given CPU time by the threads of
-the slow work facility.  This was used in preference to the workqueue facility
-because:
-
- (1) Threads may be completely occupied for very long periods of time by a
-     particular work item.  These state actions may be doing sequences of
-     synchronous, journalled disk accesses (lookup, mkdir, create, setxattr,
-     getxattr, truncate, unlink, rmdir, rename).
-
- (2) Threads may do little actual work, but may rather spend a lot of time
-     sleeping on I/O.  This means that single-threaded and 1-per-CPU-threaded
-     workqueues don't necessarily have the right numbers of threads.
-
-
-LOCKING SIMPLIFICATION
-----------------------
-
-Because only one worker thread may be operating on any particular object's
-state machine at once, this simplifies the locking, particularly with respect
-to disconnecting the netfs's representation of a cache object (fscache_cookie)
-from the cache backend's representation (fscache_object) - which may be
-requested from either end.
-
-
-=================
-THE SET OF STATES
-=================
-
-The object state machine has a set of states that it can be in.  There are
-preparation states in which the object sets itself up and waits for its parent
-object to transit to a state that allows access to its children:
-
- (1) State FSCACHE_OBJECT_INIT.
-
-     Initialise the object and wait for the parent object to become active.  In
-     the cache, it is expected that it will not be possible to look an object
-     up from the parent object, until that parent object itself has been looked
-     up.
-
-There are initialisation states in which the object sets itself up and accesses
-disk for the object metadata:
-
- (2) State FSCACHE_OBJECT_LOOKING_UP.
-
-     Look up the object on disk, using the parent as a starting point.
-     FS-Cache expects the cache backend to probe the cache to see whether this
-     object is represented there, and if it is, to see if it's valid (coherency
-     management).
-
-     The cache should call fscache_object_lookup_negative() to indicate lookup
-     failure for whatever reason, and should call fscache_obtained_object() to
-     indicate success.
-
-     At the completion of lookup, FS-Cache will let the netfs go ahead with
-     read operations, no matter whether the file is yet cached.  If not yet
-     cached, read operations will be immediately rejected with ENODATA until
-     the first known page is uncached - as to that point there can be no data
-     to be read out of the cache for that file that isn't currently also held
-     in the pagecache.
-
- (3) State FSCACHE_OBJECT_CREATING.
-
-     Create an object on disk, using the parent as a starting point.  This
-     happens if the lookup failed to find the object, or if the object's
-     coherency data indicated what's on disk is out of date.  In this state,
-     FS-Cache expects the cache to create
-
-     The cache should call fscache_obtained_object() if creation completes
-     successfully, fscache_object_lookup_negative() otherwise.
-
-     At the completion of creation, FS-Cache will start processing write
-     operations the netfs has queued for an object.  If creation failed, the
-     write ops will be transparently discarded, and nothing recorded in the
-     cache.
-
-There are some normal running states in which the object spends its time
-servicing netfs requests:
-
- (4) State FSCACHE_OBJECT_AVAILABLE.
-
-     A transient state in which pending operations are started, child objects
-     are permitted to advance from FSCACHE_OBJECT_INIT state, and temporary
-     lookup data is freed.
-
- (5) State FSCACHE_OBJECT_ACTIVE.
-
-     The normal running state.  In this state, requests the netfs makes will be
-     passed on to the cache.
-
- (6) State FSCACHE_OBJECT_INVALIDATING.
-
-     The object is undergoing invalidation.  When the state comes here, it
-     discards all pending read, write and attribute change operations as it is
-     going to clear out the cache entirely and reinitialise it.  It will then
-     continue to the FSCACHE_OBJECT_UPDATING state.
-
- (7) State FSCACHE_OBJECT_UPDATING.
-
-     The state machine comes here to update the object in the cache from the
-     netfs's records.  This involves updating the auxiliary data that is used
-     to maintain coherency.
-
-And there are terminal states in which an object cleans itself up, deallocates
-memory and potentially deletes stuff from disk:
-
- (8) State FSCACHE_OBJECT_LC_DYING.
-
-     The object comes here if it is dying because of a lookup or creation
-     error.  This would be due to a disk error or system error of some sort.
-     Temporary data is cleaned up, and the parent is released.
-
- (9) State FSCACHE_OBJECT_DYING.
-
-     The object comes here if it is dying due to an error, because its parent
-     cookie has been relinquished by the netfs or because the cache is being
-     withdrawn.
-
-     Any child objects waiting on this one are given CPU time so that they too
-     can destroy themselves.  This object waits for all its children to go away
-     before advancing to the next state.
-
-(10) State FSCACHE_OBJECT_ABORT_INIT.
-
-     The object comes to this state if it was waiting on its parent in
-     FSCACHE_OBJECT_INIT, but its parent died.  The object will destroy itself
-     so that the parent may proceed from the FSCACHE_OBJECT_DYING state.
-
-(11) State FSCACHE_OBJECT_RELEASING.
-(12) State FSCACHE_OBJECT_RECYCLING.
-
-     The object comes to one of these two states when dying once it is rid of
-     all its children, if it is dying because the netfs relinquished its
-     cookie.  In the first state, the cached data is expected to persist, and
-     in the second it will be deleted.
-
-(13) State FSCACHE_OBJECT_WITHDRAWING.
-
-     The object transits to this state if the cache decides it wants to
-     withdraw the object from service, perhaps to make space, but also due to
-     error or just because the whole cache is being withdrawn.
-
-(14) State FSCACHE_OBJECT_DEAD.
-
-     The object transits to this state when the in-memory object record is
-     ready to be deleted.  The object processor shouldn't ever see an object in
-     this state.
-
-
-THE SET OF EVENTS
------------------
-
-There are a number of events that can be raised to an object state machine:
-
- (*) FSCACHE_OBJECT_EV_UPDATE
-
-     The netfs requested that an object be updated.  The state machine will ask
-     the cache backend to update the object, and the cache backend will ask the
-     netfs for details of the change through its cookie definition ops.
-
- (*) FSCACHE_OBJECT_EV_CLEARED
-
-     This is signalled in two circumstances:
-
-     (a) when an object's last child object is dropped and
-
-     (b) when the last operation outstanding on an object is completed.
-
-     This is used to proceed from the dying state.
-
- (*) FSCACHE_OBJECT_EV_ERROR
-
-     This is signalled when an I/O error occurs during the processing of some
-     object.
-
- (*) FSCACHE_OBJECT_EV_RELEASE
- (*) FSCACHE_OBJECT_EV_RETIRE
-
-     These are signalled when the netfs relinquishes a cookie it was using.
-     The event selected depends on whether the netfs asks for the backing
-     object to be retired (deleted) or retained.
-
- (*) FSCACHE_OBJECT_EV_WITHDRAW
-
-     This is signalled when the cache backend wants to withdraw an object.
-     This means that the object will have to be detached from the netfs's
-     cookie.
-
-Because the withdrawing releasing/retiring events are all handled by the object
-state machine, it doesn't matter if there's a collision with both ends trying
-to sever the connection at the same time.  The state machine can just pick
-which one it wants to honour, and that effects the other.
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index e7b46dac7079..773af390c5e5 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -25,6 +25,8 @@ algorithms work.
    locking
    directory-locking
 
+   caching/index
+
    porting
 
 Filesystem support layers
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index cfeba839a0f2..efaa003b8323 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -4,7 +4,7 @@
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
- * See Documentation/filesystems/caching/object.txt for a description of the
+ * See Documentation/filesystems/caching/object.rst for a description of the
  * object state machine and the in-kernel representations.
  */
 
-- 
cgit v1.2.3


From fd299b2a7339ad0b8e2050189e20619cd817ba7e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:16:54 +0200
Subject: docs: filesystems: convert caching/fscache.txt to ReST format

- Add a SPDX header;
- Adjust document and section titles;
- Comment out text ToC for html/pdf output;
- Some whitespace fixes and new line breaks;
- Add table markups;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/e33ec382a53cf10ffcbd802f6de3f384159cddba.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/caching/fscache.rst | 565 ++++++++++++++++++++++++++
 Documentation/filesystems/caching/fscache.txt | 448 --------------------
 Documentation/filesystems/caching/index.rst   |   1 +
 fs/fscache/Kconfig                            |   8 +-
 4 files changed, 570 insertions(+), 452 deletions(-)
 create mode 100644 Documentation/filesystems/caching/fscache.rst
 delete mode 100644 Documentation/filesystems/caching/fscache.txt

diff --git a/Documentation/filesystems/caching/fscache.rst b/Documentation/filesystems/caching/fscache.rst
new file mode 100644
index 000000000000..950602147aa5
--- /dev/null
+++ b/Documentation/filesystems/caching/fscache.rst
@@ -0,0 +1,565 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+General Filesystem Caching
+==========================
+
+Overview
+========
+
+This facility is a general purpose cache for network filesystems, though it
+could be used for caching other things such as ISO9660 filesystems too.
+
+FS-Cache mediates between cache backends (such as CacheFS) and network
+filesystems::
+
+	+---------+
+	|         |                        +--------------+
+	|   NFS   |--+                     |              |
+	|         |  |                 +-->|   CacheFS    |
+	+---------+  |   +----------+  |   |  /dev/hda5   |
+	             |   |          |  |   +--------------+
+	+---------+  +-->|          |  |
+	|         |      |          |--+
+	|   AFS   |----->| FS-Cache |
+	|         |      |          |--+
+	+---------+  +-->|          |  |
+	             |   |          |  |   +--------------+
+	+---------+  |   +----------+  |   |              |
+	|         |  |                 +-->|  CacheFiles  |
+	|  ISOFS  |--+                     |  /var/cache  |
+	|         |                        +--------------+
+	+---------+
+
+Or to look at it another way, FS-Cache is a module that provides a caching
+facility to a network filesystem such that the cache is transparent to the
+user::
+
+	+---------+
+	|         |
+	| Server  |
+	|         |
+	+---------+
+	     |                  NETWORK
+	~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	     |
+	     |           +----------+
+	     V           |          |
+	+---------+      |          |
+	|         |      |          |
+	|   NFS   |----->| FS-Cache |
+	|         |      |          |--+
+	+---------+      |          |  |   +--------------+   +--------------+
+	     |           |          |  |   |              |   |              |
+	     V           +----------+  +-->|  CacheFiles  |-->|  Ext3        |
+	+---------+                        |  /var/cache  |   |  /dev/sda6   |
+	|         |                        +--------------+   +--------------+
+	|   VFS   |                                ^                     ^
+	|         |                                |                     |
+	+---------+                                +--------------+      |
+	     |                  KERNEL SPACE                      |      |
+	~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|~~~~~~|~~~~
+	     |                  USER SPACE                        |      |
+	     V                                                    |      |
+	+---------+                                           +--------------+
+	|         |                                           |              |
+	| Process |                                           | cachefilesd  |
+	|         |                                           |              |
+	+---------+                                           +--------------+
+
+
+FS-Cache does not follow the idea of completely loading every netfs file
+opened in its entirety into a cache before permitting it to be accessed and
+then serving the pages out of that cache rather than the netfs inode because:
+
+ (1) It must be practical to operate without a cache.
+
+ (2) The size of any accessible file must not be limited to the size of the
+     cache.
+
+ (3) The combined size of all opened files (this includes mapped libraries)
+     must not be limited to the size of the cache.
+
+ (4) The user should not be forced to download an entire file just to do a
+     one-off access of a small portion of it (such as might be done with the
+     "file" program).
+
+It instead serves the cache out in PAGE_SIZE chunks as and when requested by
+the netfs('s) using it.
+
+
+FS-Cache provides the following facilities:
+
+ (1) More than one cache can be used at once.  Caches can be selected
+     explicitly by use of tags.
+
+ (2) Caches can be added / removed at any time.
+
+ (3) The netfs is provided with an interface that allows either party to
+     withdraw caching facilities from a file (required for (2)).
+
+ (4) The interface to the netfs returns as few errors as possible, preferring
+     rather to let the netfs remain oblivious.
+
+ (5) Cookies are used to represent indices, files and other objects to the
+     netfs.  The simplest cookie is just a NULL pointer - indicating nothing
+     cached there.
+
+ (6) The netfs is allowed to propose - dynamically - any index hierarchy it
+     desires, though it must be aware that the index search function is
+     recursive, stack space is limited, and indices can only be children of
+     indices.
+
+ (7) Data I/O is done direct to and from the netfs's pages.  The netfs
+     indicates that page A is at index B of the data-file represented by cookie
+     C, and that it should be read or written.  The cache backend may or may
+     not start I/O on that page, but if it does, a netfs callback will be
+     invoked to indicate completion.  The I/O may be either synchronous or
+     asynchronous.
+
+ (8) Cookies can be "retired" upon release.  At this point FS-Cache will mark
+     them as obsolete and the index hierarchy rooted at that point will get
+     recycled.
+
+ (9) The netfs provides a "match" function for index searches.  In addition to
+     saying whether a match was made or not, this can also specify that an
+     entry should be updated or deleted.
+
+(10) As much as possible is done asynchronously.
+
+
+FS-Cache maintains a virtual indexing tree in which all indices, files, objects
+and pages are kept.  Bits of this tree may actually reside in one or more
+caches::
+
+                                            FSDEF
+                                              |
+                         +------------------------------------+
+                         |                                    |
+                        NFS                                  AFS
+                         |                                    |
+            +--------------------------+                +-----------+
+            |                          |                |           |
+         homedir                     mirror          afs.org   redhat.com
+            |                          |                            |
+      +------------+           +---------------+              +----------+
+      |            |           |               |              |          |
+    00001        00002       00007           00125        vol00001   vol00002
+      |            |           |               |                         |
+  +---+---+     +-----+      +---+      +------+------+            +-----+----+
+  |   |   |     |     |      |   |      |      |      |            |     |    |
+ PG0 PG1 PG2   PG0  XATTR   PG0 PG1   DIRENT DIRENT DIRENT        R/W   R/O  Bak
+                      |                                            |
+                     PG0                                       +-------+
+                                                               |       |
+                                                             00001   00003
+                                                               |
+                                                           +---+---+
+                                                           |   |   |
+                                                          PG0 PG1 PG2
+
+In the example above, you can see two netfs's being backed: NFS and AFS.  These
+have different index hierarchies:
+
+   * The NFS primary index contains per-server indices.  Each server index is
+     indexed by NFS file handles to get data file objects.  Each data file
+     objects can have an array of pages, but may also have further child
+     objects, such as extended attributes and directory entries.  Extended
+     attribute objects themselves have page-array contents.
+
+   * The AFS primary index contains per-cell indices.  Each cell index contains
+     per-logical-volume indices.  Each of volume index contains up to three
+     indices for the read-write, read-only and backup mirrors of those volumes.
+     Each of these contains vnode data file objects, each of which contains an
+     array of pages.
+
+The very top index is the FS-Cache master index in which individual netfs's
+have entries.
+
+Any index object may reside in more than one cache, provided it only has index
+children.  Any index with non-index object children will be assumed to only
+reside in one cache.
+
+
+The netfs API to FS-Cache can be found in:
+
+	Documentation/filesystems/caching/netfs-api.txt
+
+The cache backend API to FS-Cache can be found in:
+
+	Documentation/filesystems/caching/backend-api.txt
+
+A description of the internal representations and object state machine can be
+found in:
+
+	Documentation/filesystems/caching/object.rst
+
+
+Statistical Information
+=======================
+
+If FS-Cache is compiled with the following options enabled::
+
+	CONFIG_FSCACHE_STATS=y
+	CONFIG_FSCACHE_HISTOGRAM=y
+
+then it will gather certain statistics and display them through a number of
+proc files.
+
+/proc/fs/fscache/stats
+----------------------
+
+     This shows counts of a number of events that can happen in FS-Cache:
+
++--------------+-------+-------------------------------------------------------+
+|CLASS         |EVENT  |MEANING                                                |
++==============+=======+=======================================================+
+|Cookies       |idx=N  |Number of index cookies allocated                      |
++              +-------+-------------------------------------------------------+
+|              |dat=N  |Number of data storage cookies allocated               |
++              +-------+-------------------------------------------------------+
+|              |spc=N  |Number of special cookies allocated                    |
++--------------+-------+-------------------------------------------------------+
+|Objects       |alc=N  |Number of objects allocated                            |
++              +-------+-------------------------------------------------------+
+|              |nal=N  |Number of object allocation failures                   |
++              +-------+-------------------------------------------------------+
+|              |avl=N  |Number of objects that reached the available state     |
++              +-------+-------------------------------------------------------+
+|              |ded=N  |Number of objects that reached the dead state          |
++--------------+-------+-------------------------------------------------------+
+|ChkAux        |non=N  |Number of objects that didn't have a coherency check   |
++              +-------+-------------------------------------------------------+
+|              |ok=N   |Number of objects that passed a coherency check        |
++              +-------+-------------------------------------------------------+
+|              |upd=N  |Number of objects that needed a coherency data update  |
++              +-------+-------------------------------------------------------+
+|              |obs=N  |Number of objects that were declared obsolete          |
++--------------+-------+-------------------------------------------------------+
+|Pages         |mrk=N  |Number of pages marked as being cached                 |
+|              |unc=N  |Number of uncache page requests seen                   |
++--------------+-------+-------------------------------------------------------+
+|Acquire       |n=N    |Number of acquire cookie requests seen                 |
++              +-------+-------------------------------------------------------+
+|              |nul=N  |Number of acq reqs given a NULL parent                 |
++              +-------+-------------------------------------------------------+
+|              |noc=N  |Number of acq reqs rejected due to no cache available  |
++              +-------+-------------------------------------------------------+
+|              |ok=N   |Number of acq reqs succeeded                           |
++              +-------+-------------------------------------------------------+
+|              |nbf=N  |Number of acq reqs rejected due to error               |
++              +-------+-------------------------------------------------------+
+|              |oom=N  |Number of acq reqs failed on ENOMEM                    |
++--------------+-------+-------------------------------------------------------+
+|Lookups       |n=N    |Number of lookup calls made on cache backends          |
++              +-------+-------------------------------------------------------+
+|              |neg=N  |Number of negative lookups made                        |
++              +-------+-------------------------------------------------------+
+|              |pos=N  |Number of positive lookups made                        |
++              +-------+-------------------------------------------------------+
+|              |crt=N  |Number of objects created by lookup                    |
++              +-------+-------------------------------------------------------+
+|              |tmo=N  |Number of lookups timed out and requeued               |
++--------------+-------+-------------------------------------------------------+
+|Updates       |n=N    |Number of update cookie requests seen                  |
++              +-------+-------------------------------------------------------+
+|              |nul=N  |Number of upd reqs given a NULL parent                 |
++              +-------+-------------------------------------------------------+
+|              |run=N  |Number of upd reqs granted CPU time                    |
++--------------+-------+-------------------------------------------------------+
+|Relinqs       |n=N    |Number of relinquish cookie requests seen              |
++              +-------+-------------------------------------------------------+
+|              |nul=N  |Number of rlq reqs given a NULL parent                 |
++              +-------+-------------------------------------------------------+
+|              |wcr=N  |Number of rlq reqs waited on completion of creation    |
++--------------+-------+-------------------------------------------------------+
+|AttrChg       |n=N    |Number of attribute changed requests seen              |
++              +-------+-------------------------------------------------------+
+|              |ok=N   |Number of attr changed requests queued                 |
++              +-------+-------------------------------------------------------+
+|              |nbf=N  |Number of attr changed rejected -ENOBUFS               |
++              +-------+-------------------------------------------------------+
+|              |oom=N  |Number of attr changed failed -ENOMEM                  |
++              +-------+-------------------------------------------------------+
+|              |run=N  |Number of attr changed ops given CPU time              |
++--------------+-------+-------------------------------------------------------+
+|Allocs        |n=N    |Number of allocation requests seen                     |
++              +-------+-------------------------------------------------------+
+|              |ok=N   |Number of successful alloc reqs                        |
++              +-------+-------------------------------------------------------+
+|              |wt=N   |Number of alloc reqs that waited on lookup completion  |
++              +-------+-------------------------------------------------------+
+|              |nbf=N  |Number of alloc reqs rejected -ENOBUFS                 |
++              +-------+-------------------------------------------------------+
+|              |int=N  |Number of alloc reqs aborted -ERESTARTSYS              |
++              +-------+-------------------------------------------------------+
+|              |ops=N  |Number of alloc reqs submitted                         |
++              +-------+-------------------------------------------------------+
+|              |owt=N  |Number of alloc reqs waited for CPU time               |
++              +-------+-------------------------------------------------------+
+|              |abt=N  |Number of alloc reqs aborted due to object death       |
++--------------+-------+-------------------------------------------------------+
+|Retrvls       |n=N    |Number of retrieval (read) requests seen               |
++              +-------+-------------------------------------------------------+
+|              |ok=N   |Number of successful retr reqs                         |
++              +-------+-------------------------------------------------------+
+|              |wt=N   |Number of retr reqs that waited on lookup completion   |
++              +-------+-------------------------------------------------------+
+|              |nod=N  |Number of retr reqs returned -ENODATA                  |
++              +-------+-------------------------------------------------------+
+|              |nbf=N  |Number of retr reqs rejected -ENOBUFS                  |
++              +-------+-------------------------------------------------------+
+|              |int=N  |Number of retr reqs aborted -ERESTARTSYS               |
++              +-------+-------------------------------------------------------+
+|              |oom=N  |Number of retr reqs failed -ENOMEM                     |
++              +-------+-------------------------------------------------------+
+|              |ops=N  |Number of retr reqs submitted                          |
++              +-------+-------------------------------------------------------+
+|              |owt=N  |Number of retr reqs waited for CPU time                |
++              +-------+-------------------------------------------------------+
+|              |abt=N  |Number of retr reqs aborted due to object death        |
++--------------+-------+-------------------------------------------------------+
+|Stores        |n=N    |Number of storage (write) requests seen                |
++              +-------+-------------------------------------------------------+
+|              |ok=N   |Number of successful store reqs                        |
++              +-------+-------------------------------------------------------+
+|              |agn=N  |Number of store reqs on a page already pending storage |
++              +-------+-------------------------------------------------------+
+|              |nbf=N  |Number of store reqs rejected -ENOBUFS                 |
++              +-------+-------------------------------------------------------+
+|              |oom=N  |Number of store reqs failed -ENOMEM                    |
++              +-------+-------------------------------------------------------+
+|              |ops=N  |Number of store reqs submitted                         |
++              +-------+-------------------------------------------------------+
+|              |run=N  |Number of store reqs granted CPU time                  |
++              +-------+-------------------------------------------------------+
+|              |pgs=N  |Number of pages given store req processing time        |
++              +-------+-------------------------------------------------------+
+|              |rxd=N  |Number of store reqs deleted from tracking tree        |
++              +-------+-------------------------------------------------------+
+|              |olm=N  |Number of store reqs over store limit                  |
++--------------+-------+-------------------------------------------------------+
+|VmScan        |nos=N  |Number of release reqs against pages with no           |
+|              |       |pending store                                          |
++              +-------+-------------------------------------------------------+
+|              |gon=N  |Number of release reqs against pages stored by         |
+|              |       |time lock granted                                      |
++              +-------+-------------------------------------------------------+
+|              |bsy=N  |Number of release reqs ignored due to in-progress store|
++              +-------+-------------------------------------------------------+
+|              |can=N  |Number of page stores cancelled due to release req     |
++--------------+-------+-------------------------------------------------------+
+|Ops           |pend=N |Number of times async ops added to pending queues      |
++              +-------+-------------------------------------------------------+
+|              |run=N  |Number of times async ops given CPU time               |
++              +-------+-------------------------------------------------------+
+|              |enq=N  |Number of times async ops queued for processing        |
++              +-------+-------------------------------------------------------+
+|              |can=N  |Number of async ops cancelled                          |
++              +-------+-------------------------------------------------------+
+|              |rej=N  |Number of async ops rejected due to object             |
+|              |       |lookup/create failure                                  |
++              +-------+-------------------------------------------------------+
+|              |ini=N  |Number of async ops initialised                        |
++              +-------+-------------------------------------------------------+
+|              |dfr=N  |Number of async ops queued for deferred release        |
++              +-------+-------------------------------------------------------+
+|              |rel=N  |Number of async ops released                           |
+|              |       |(should equal ini=N when idle)                         |
++              +-------+-------------------------------------------------------+
+|              |gc=N   |Number of deferred-release async ops garbage collected |
++--------------+-------+-------------------------------------------------------+
+|CacheOp       |alo=N  |Number of in-progress alloc_object() cache ops         |
++              +-------+-------------------------------------------------------+
+|              |luo=N  |Number of in-progress lookup_object() cache ops        |
++              +-------+-------------------------------------------------------+
+|              |luc=N  |Number of in-progress lookup_complete() cache ops      |
++              +-------+-------------------------------------------------------+
+|              |gro=N  |Number of in-progress grab_object() cache ops          |
++              +-------+-------------------------------------------------------+
+|              |upo=N  |Number of in-progress update_object() cache ops        |
++              +-------+-------------------------------------------------------+
+|              |dro=N  |Number of in-progress drop_object() cache ops          |
++              +-------+-------------------------------------------------------+
+|              |pto=N  |Number of in-progress put_object() cache ops           |
++              +-------+-------------------------------------------------------+
+|              |syn=N  |Number of in-progress sync_cache() cache ops           |
++              +-------+-------------------------------------------------------+
+|              |atc=N  |Number of in-progress attr_changed() cache ops         |
++              +-------+-------------------------------------------------------+
+|              |rap=N  |Number of in-progress read_or_alloc_page() cache ops   |
++              +-------+-------------------------------------------------------+
+|              |ras=N  |Number of in-progress read_or_alloc_pages() cache ops  |
++              +-------+-------------------------------------------------------+
+|              |alp=N  |Number of in-progress allocate_page() cache ops        |
++              +-------+-------------------------------------------------------+
+|              |als=N  |Number of in-progress allocate_pages() cache ops       |
++              +-------+-------------------------------------------------------+
+|              |wrp=N  |Number of in-progress write_page() cache ops           |
++              +-------+-------------------------------------------------------+
+|              |ucp=N  |Number of in-progress uncache_page() cache ops         |
++              +-------+-------------------------------------------------------+
+|              |dsp=N  |Number of in-progress dissociate_pages() cache ops     |
++--------------+-------+-------------------------------------------------------+
+|CacheEv       |nsp=N  |Number of object lookups/creations rejected due to     |
+|              |       |lack of space                                          |
++              +-------+-------------------------------------------------------+
+|              |stl=N  |Number of stale objects deleted                        |
++              +-------+-------------------------------------------------------+
+|              |rtr=N  |Number of objects retired when relinquished            |
++              +-------+-------------------------------------------------------+
+|              |cul=N  |Number of objects culled                               |
++--------------+-------+-------------------------------------------------------+
+
+
+
+/proc/fs/fscache/histogram
+--------------------------
+
+     ::
+
+	cat /proc/fs/fscache/histogram
+	JIFS  SECS  OBJ INST  OP RUNS   OBJ RUNS  RETRV DLY RETRIEVLS
+	===== ===== ========= ========= ========= ========= =========
+
+     This shows the breakdown of the number of times each amount of time
+     between 0 jiffies and HZ-1 jiffies a variety of tasks took to run.  The
+     columns are as follows:
+
+	=========	=======================================================
+	COLUMN		TIME MEASUREMENT
+	=========	=======================================================
+	OBJ INST	Length of time to instantiate an object
+	OP RUNS		Length of time a call to process an operation took
+	OBJ RUNS	Length of time a call to process an object event took
+	RETRV DLY	Time between an requesting a read and lookup completing
+	RETRIEVLS	Time between beginning and end of a retrieval
+	=========	=======================================================
+
+     Each row shows the number of events that took a particular range of times.
+     Each step is 1 jiffy in size.  The JIFS column indicates the particular
+     jiffy range covered, and the SECS field the equivalent number of seconds.
+
+
+
+Object List
+===========
+
+If CONFIG_FSCACHE_OBJECT_LIST is enabled, the FS-Cache facility will maintain a
+list of all the objects currently allocated and allow them to be viewed
+through::
+
+	/proc/fs/fscache/objects
+
+This will look something like::
+
+	[root@andromeda ~]# head /proc/fs/fscache/objects
+	OBJECT   PARENT   STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA       OBJECT_KEY, AUX_DATA
+	======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================
+	   17e4b        2 ACTV     0   0   0   0  0     0 7b  4 0 0 | NFS.fh           DT  0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a
+	   1693a        2 ACTV     0   0   0   0  0     0 7b  4 0 0 | NFS.fh           DT  0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a
+
+where the first set of columns before the '|' describe the object:
+
+	=======	===============================================================
+	COLUMN	DESCRIPTION
+	=======	===============================================================
+	OBJECT	Object debugging ID (appears as OBJ%x in some debug messages)
+	PARENT	Debugging ID of parent object
+	STAT	Object state
+	CHLDN	Number of child objects of this object
+	OPS	Number of outstanding operations on this object
+	OOP	Number of outstanding child object management operations
+	IPR
+	EX	Number of outstanding exclusive operations
+	READS	Number of outstanding read operations
+	EM	Object's event mask
+	EV	Events raised on this object
+	F	Object flags
+	S	Object work item busy state mask (1:pending 2:running)
+	=======	===============================================================
+
+and the second set of columns describe the object's cookie, if present:
+
+	================ ======================================================
+	COLUMN		 DESCRIPTION
+	================ ======================================================
+	NETFS_COOKIE_DEF Name of netfs cookie definition
+	TY		 Cookie type (IX - index, DT - data, hex - special)
+	FL		 Cookie flags
+	NETFS_DATA	 Netfs private data stored in the cookie
+	OBJECT_KEY	 Object key } 1 column, with separating comma
+	AUX_DATA	 Object aux data } presence may be configured
+	================ ======================================================
+
+The data shown may be filtered by attaching the a key to an appropriate keyring
+before viewing the file.  Something like::
+
+		keyctl add user fscache:objlist <restrictions> @s
+
+where <restrictions> are a selection of the following letters:
+
+	==	=========================================================
+	K	Show hexdump of object key (don't show if not given)
+	A	Show hexdump of object aux data (don't show if not given)
+	==	=========================================================
+
+and the following paired letters:
+
+	==	=========================================================
+	C	Show objects that have a cookie
+	c	Show objects that don't have a cookie
+	B	Show objects that are busy
+	b	Show objects that aren't busy
+	W	Show objects that have pending writes
+	w	Show objects that don't have pending writes
+	R	Show objects that have outstanding reads
+	r	Show objects that don't have outstanding reads
+	S	Show objects that have work queued
+	s	Show objects that don't have work queued
+	==	=========================================================
+
+If neither side of a letter pair is given, then both are implied.  For example:
+
+	keyctl add user fscache:objlist KB @s
+
+shows objects that are busy, and lists their object keys, but does not dump
+their auxiliary data.  It also implies "CcWwRrSs", but as 'B' is given, 'b' is
+not implied.
+
+By default all objects and all fields will be shown.
+
+
+Debugging
+=========
+
+If CONFIG_FSCACHE_DEBUG is enabled, the FS-Cache facility can have runtime
+debugging enabled by adjusting the value in::
+
+	/sys/module/fscache/parameters/debug
+
+This is a bitmask of debugging streams to enable:
+
+	=======	=======	===============================	=======================
+	BIT	VALUE	STREAM				POINT
+	=======	=======	===============================	=======================
+	0	1	Cache management		Function entry trace
+	1	2					Function exit trace
+	2	4					General
+	3	8	Cookie management		Function entry trace
+	4	16					Function exit trace
+	5	32					General
+	6	64	Page handling			Function entry trace
+	7	128					Function exit trace
+	8	256					General
+	9	512	Operation management		Function entry trace
+	10	1024					Function exit trace
+	11	2048					General
+	=======	=======	===============================	=======================
+
+The appropriate set of values should be OR'd together and the result written to
+the control file.  For example::
+
+	echo $((1|8|64)) >/sys/module/fscache/parameters/debug
+
+will turn on all function entry debugging.
diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt
deleted file mode 100644
index 071ff50a774d..000000000000
--- a/Documentation/filesystems/caching/fscache.txt
+++ /dev/null
@@ -1,448 +0,0 @@
-			  ==========================
-			  General Filesystem Caching
-			  ==========================
-
-========
-OVERVIEW
-========
-
-This facility is a general purpose cache for network filesystems, though it
-could be used for caching other things such as ISO9660 filesystems too.
-
-FS-Cache mediates between cache backends (such as CacheFS) and network
-filesystems:
-
-	+---------+
-	|         |                        +--------------+
-	|   NFS   |--+                     |              |
-	|         |  |                 +-->|   CacheFS    |
-	+---------+  |   +----------+  |   |  /dev/hda5   |
-	             |   |          |  |   +--------------+
-	+---------+  +-->|          |  |
-	|         |      |          |--+
-	|   AFS   |----->| FS-Cache |
-	|         |      |          |--+
-	+---------+  +-->|          |  |
-	             |   |          |  |   +--------------+
-	+---------+  |   +----------+  |   |              |
-	|         |  |                 +-->|  CacheFiles  |
-	|  ISOFS  |--+                     |  /var/cache  |
-	|         |                        +--------------+
-	+---------+
-
-Or to look at it another way, FS-Cache is a module that provides a caching
-facility to a network filesystem such that the cache is transparent to the
-user:
-
-	+---------+
-	|         |
-	| Server  |
-	|         |
-	+---------+
-	     |                  NETWORK
-	~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-	     |
-	     |           +----------+
-	     V           |          |
-	+---------+      |          |
-	|         |      |          |
-	|   NFS   |----->| FS-Cache |
-	|         |      |          |--+
-	+---------+      |          |  |   +--------------+   +--------------+
-	     |           |          |  |   |              |   |              |
-	     V           +----------+  +-->|  CacheFiles  |-->|  Ext3        |
-	+---------+                        |  /var/cache  |   |  /dev/sda6   |
-	|         |                        +--------------+   +--------------+
-	|   VFS   |                                ^                     ^
-	|         |                                |                     |
-	+---------+                                +--------------+      |
-	     |                  KERNEL SPACE                      |      |
-	~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|~~~~~~|~~~~
-	     |                  USER SPACE                        |      |
-	     V                                                    |      |
-	+---------+                                           +--------------+
-	|         |                                           |              |
-	| Process |                                           | cachefilesd  |
-	|         |                                           |              |
-	+---------+                                           +--------------+
-
-
-FS-Cache does not follow the idea of completely loading every netfs file
-opened in its entirety into a cache before permitting it to be accessed and
-then serving the pages out of that cache rather than the netfs inode because:
-
- (1) It must be practical to operate without a cache.
-
- (2) The size of any accessible file must not be limited to the size of the
-     cache.
-
- (3) The combined size of all opened files (this includes mapped libraries)
-     must not be limited to the size of the cache.
-
- (4) The user should not be forced to download an entire file just to do a
-     one-off access of a small portion of it (such as might be done with the
-     "file" program).
-
-It instead serves the cache out in PAGE_SIZE chunks as and when requested by
-the netfs('s) using it.
-
-
-FS-Cache provides the following facilities:
-
- (1) More than one cache can be used at once.  Caches can be selected
-     explicitly by use of tags.
-
- (2) Caches can be added / removed at any time.
-
- (3) The netfs is provided with an interface that allows either party to
-     withdraw caching facilities from a file (required for (2)).
-
- (4) The interface to the netfs returns as few errors as possible, preferring
-     rather to let the netfs remain oblivious.
-
- (5) Cookies are used to represent indices, files and other objects to the
-     netfs.  The simplest cookie is just a NULL pointer - indicating nothing
-     cached there.
-
- (6) The netfs is allowed to propose - dynamically - any index hierarchy it
-     desires, though it must be aware that the index search function is
-     recursive, stack space is limited, and indices can only be children of
-     indices.
-
- (7) Data I/O is done direct to and from the netfs's pages.  The netfs
-     indicates that page A is at index B of the data-file represented by cookie
-     C, and that it should be read or written.  The cache backend may or may
-     not start I/O on that page, but if it does, a netfs callback will be
-     invoked to indicate completion.  The I/O may be either synchronous or
-     asynchronous.
-
- (8) Cookies can be "retired" upon release.  At this point FS-Cache will mark
-     them as obsolete and the index hierarchy rooted at that point will get
-     recycled.
-
- (9) The netfs provides a "match" function for index searches.  In addition to
-     saying whether a match was made or not, this can also specify that an
-     entry should be updated or deleted.
-
-(10) As much as possible is done asynchronously.
-
-
-FS-Cache maintains a virtual indexing tree in which all indices, files, objects
-and pages are kept.  Bits of this tree may actually reside in one or more
-caches.
-
-                                           FSDEF
-                                             |
-                        +------------------------------------+
-                        |                                    |
-                       NFS                                  AFS
-                        |                                    |
-           +--------------------------+                +-----------+
-           |                          |                |           |
-        homedir                     mirror          afs.org   redhat.com
-           |                          |                            |
-     +------------+           +---------------+              +----------+
-     |            |           |               |              |          |
-   00001        00002       00007           00125        vol00001   vol00002
-     |            |           |               |                         |
- +---+---+     +-----+      +---+      +------+------+            +-----+----+
- |   |   |     |     |      |   |      |      |      |            |     |    |
-PG0 PG1 PG2   PG0  XATTR   PG0 PG1   DIRENT DIRENT DIRENT        R/W   R/O  Bak
-                     |                                            |
-                    PG0                                       +-------+
-                                                              |       |
-                                                            00001   00003
-                                                              |
-                                                          +---+---+
-                                                          |   |   |
-                                                         PG0 PG1 PG2
-
-In the example above, you can see two netfs's being backed: NFS and AFS.  These
-have different index hierarchies:
-
- (*) The NFS primary index contains per-server indices.  Each server index is
-     indexed by NFS file handles to get data file objects.  Each data file
-     objects can have an array of pages, but may also have further child
-     objects, such as extended attributes and directory entries.  Extended
-     attribute objects themselves have page-array contents.
-
- (*) The AFS primary index contains per-cell indices.  Each cell index contains
-     per-logical-volume indices.  Each of volume index contains up to three
-     indices for the read-write, read-only and backup mirrors of those volumes.
-     Each of these contains vnode data file objects, each of which contains an
-     array of pages.
-
-The very top index is the FS-Cache master index in which individual netfs's
-have entries.
-
-Any index object may reside in more than one cache, provided it only has index
-children.  Any index with non-index object children will be assumed to only
-reside in one cache.
-
-
-The netfs API to FS-Cache can be found in:
-
-	Documentation/filesystems/caching/netfs-api.txt
-
-The cache backend API to FS-Cache can be found in:
-
-	Documentation/filesystems/caching/backend-api.txt
-
-A description of the internal representations and object state machine can be
-found in:
-
-	Documentation/filesystems/caching/object.rst
-
-
-=======================
-STATISTICAL INFORMATION
-=======================
-
-If FS-Cache is compiled with the following options enabled:
-
-	CONFIG_FSCACHE_STATS=y
-	CONFIG_FSCACHE_HISTOGRAM=y
-
-then it will gather certain statistics and display them through a number of
-proc files.
-
- (*) /proc/fs/fscache/stats
-
-     This shows counts of a number of events that can happen in FS-Cache:
-
-	CLASS	EVENT	MEANING
-	=======	=======	=======================================================
-	Cookies	idx=N	Number of index cookies allocated
-		dat=N	Number of data storage cookies allocated
-		spc=N	Number of special cookies allocated
-	Objects	alc=N	Number of objects allocated
-		nal=N	Number of object allocation failures
-		avl=N	Number of objects that reached the available state
-		ded=N	Number of objects that reached the dead state
-	ChkAux	non=N	Number of objects that didn't have a coherency check
-		ok=N	Number of objects that passed a coherency check
-		upd=N	Number of objects that needed a coherency data update
-		obs=N	Number of objects that were declared obsolete
-	Pages	mrk=N	Number of pages marked as being cached
-		unc=N	Number of uncache page requests seen
-	Acquire	n=N	Number of acquire cookie requests seen
-		nul=N	Number of acq reqs given a NULL parent
-		noc=N	Number of acq reqs rejected due to no cache available
-		ok=N	Number of acq reqs succeeded
-		nbf=N	Number of acq reqs rejected due to error
-		oom=N	Number of acq reqs failed on ENOMEM
-	Lookups	n=N	Number of lookup calls made on cache backends
-		neg=N	Number of negative lookups made
-		pos=N	Number of positive lookups made
-		crt=N	Number of objects created by lookup
-		tmo=N	Number of lookups timed out and requeued
-	Updates	n=N	Number of update cookie requests seen
-		nul=N	Number of upd reqs given a NULL parent
-		run=N	Number of upd reqs granted CPU time
-	Relinqs	n=N	Number of relinquish cookie requests seen
-		nul=N	Number of rlq reqs given a NULL parent
-		wcr=N	Number of rlq reqs waited on completion of creation
-	AttrChg	n=N	Number of attribute changed requests seen
-		ok=N	Number of attr changed requests queued
-		nbf=N	Number of attr changed rejected -ENOBUFS
-		oom=N	Number of attr changed failed -ENOMEM
-		run=N	Number of attr changed ops given CPU time
-	Allocs	n=N	Number of allocation requests seen
-		ok=N	Number of successful alloc reqs
-		wt=N	Number of alloc reqs that waited on lookup completion
-		nbf=N	Number of alloc reqs rejected -ENOBUFS
-		int=N	Number of alloc reqs aborted -ERESTARTSYS
-		ops=N	Number of alloc reqs submitted
-		owt=N	Number of alloc reqs waited for CPU time
-		abt=N	Number of alloc reqs aborted due to object death
-	Retrvls	n=N	Number of retrieval (read) requests seen
-		ok=N	Number of successful retr reqs
-		wt=N	Number of retr reqs that waited on lookup completion
-		nod=N	Number of retr reqs returned -ENODATA
-		nbf=N	Number of retr reqs rejected -ENOBUFS
-		int=N	Number of retr reqs aborted -ERESTARTSYS
-		oom=N	Number of retr reqs failed -ENOMEM
-		ops=N	Number of retr reqs submitted
-		owt=N	Number of retr reqs waited for CPU time
-		abt=N	Number of retr reqs aborted due to object death
-	Stores	n=N	Number of storage (write) requests seen
-		ok=N	Number of successful store reqs
-		agn=N	Number of store reqs on a page already pending storage
-		nbf=N	Number of store reqs rejected -ENOBUFS
-		oom=N	Number of store reqs failed -ENOMEM
-		ops=N	Number of store reqs submitted
-		run=N	Number of store reqs granted CPU time
-		pgs=N	Number of pages given store req processing time
-		rxd=N	Number of store reqs deleted from tracking tree
-		olm=N	Number of store reqs over store limit
-	VmScan	nos=N	Number of release reqs against pages with no pending store
-		gon=N	Number of release reqs against pages stored by time lock granted
-		bsy=N	Number of release reqs ignored due to in-progress store
-		can=N	Number of page stores cancelled due to release req
-	Ops	pend=N	Number of times async ops added to pending queues
-		run=N	Number of times async ops given CPU time
-		enq=N	Number of times async ops queued for processing
-		can=N	Number of async ops cancelled
-		rej=N	Number of async ops rejected due to object lookup/create failure
-		ini=N	Number of async ops initialised
-		dfr=N	Number of async ops queued for deferred release
-		rel=N	Number of async ops released (should equal ini=N when idle)
-		gc=N	Number of deferred-release async ops garbage collected
-	CacheOp	alo=N	Number of in-progress alloc_object() cache ops
-		luo=N	Number of in-progress lookup_object() cache ops
-		luc=N	Number of in-progress lookup_complete() cache ops
-		gro=N	Number of in-progress grab_object() cache ops
-		upo=N	Number of in-progress update_object() cache ops
-		dro=N	Number of in-progress drop_object() cache ops
-		pto=N	Number of in-progress put_object() cache ops
-		syn=N	Number of in-progress sync_cache() cache ops
-		atc=N	Number of in-progress attr_changed() cache ops
-		rap=N	Number of in-progress read_or_alloc_page() cache ops
-		ras=N	Number of in-progress read_or_alloc_pages() cache ops
-		alp=N	Number of in-progress allocate_page() cache ops
-		als=N	Number of in-progress allocate_pages() cache ops
-		wrp=N	Number of in-progress write_page() cache ops
-		ucp=N	Number of in-progress uncache_page() cache ops
-		dsp=N	Number of in-progress dissociate_pages() cache ops
-	CacheEv	nsp=N	Number of object lookups/creations rejected due to lack of space
-		stl=N	Number of stale objects deleted
-		rtr=N	Number of objects retired when relinquished
-		cul=N	Number of objects culled
-
-
- (*) /proc/fs/fscache/histogram
-
-	cat /proc/fs/fscache/histogram
-	JIFS  SECS  OBJ INST  OP RUNS   OBJ RUNS  RETRV DLY RETRIEVLS
-	===== ===== ========= ========= ========= ========= =========
-
-     This shows the breakdown of the number of times each amount of time
-     between 0 jiffies and HZ-1 jiffies a variety of tasks took to run.  The
-     columns are as follows:
-
-	COLUMN		TIME MEASUREMENT
-	=======		=======================================================
-	OBJ INST	Length of time to instantiate an object
-	OP RUNS		Length of time a call to process an operation took
-	OBJ RUNS	Length of time a call to process an object event took
-	RETRV DLY	Time between an requesting a read and lookup completing
-	RETRIEVLS	Time between beginning and end of a retrieval
-
-     Each row shows the number of events that took a particular range of times.
-     Each step is 1 jiffy in size.  The JIFS column indicates the particular
-     jiffy range covered, and the SECS field the equivalent number of seconds.
-
-
-===========
-OBJECT LIST
-===========
-
-If CONFIG_FSCACHE_OBJECT_LIST is enabled, the FS-Cache facility will maintain a
-list of all the objects currently allocated and allow them to be viewed
-through:
-
-	/proc/fs/fscache/objects
-
-This will look something like:
-
-	[root@andromeda ~]# head /proc/fs/fscache/objects
-	OBJECT   PARENT   STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA       OBJECT_KEY, AUX_DATA
-	======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================
-	   17e4b        2 ACTV     0   0   0   0  0     0 7b  4 0 0 | NFS.fh           DT  0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a
-	   1693a        2 ACTV     0   0   0   0  0     0 7b  4 0 0 | NFS.fh           DT  0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a
-
-where the first set of columns before the '|' describe the object:
-
-	COLUMN	DESCRIPTION
-	=======	===============================================================
-	OBJECT	Object debugging ID (appears as OBJ%x in some debug messages)
-	PARENT	Debugging ID of parent object
-	STAT	Object state
-	CHLDN	Number of child objects of this object
-	OPS	Number of outstanding operations on this object
-	OOP	Number of outstanding child object management operations
-	IPR
-	EX	Number of outstanding exclusive operations
-	READS	Number of outstanding read operations
-	EM	Object's event mask
-	EV	Events raised on this object
-	F	Object flags
-	S	Object work item busy state mask (1:pending 2:running)
-
-and the second set of columns describe the object's cookie, if present:
-
-	COLUMN		DESCRIPTION
-	===============	=======================================================
-	NETFS_COOKIE_DEF Name of netfs cookie definition
-	TY		Cookie type (IX - index, DT - data, hex - special)
-	FL		Cookie flags
-	NETFS_DATA	Netfs private data stored in the cookie
-	OBJECT_KEY	Object key	} 1 column, with separating comma
-	AUX_DATA	Object aux data	} presence may be configured
-
-The data shown may be filtered by attaching the a key to an appropriate keyring
-before viewing the file.  Something like:
-
-		keyctl add user fscache:objlist <restrictions> @s
-
-where <restrictions> are a selection of the following letters:
-
-	K	Show hexdump of object key (don't show if not given)
-	A	Show hexdump of object aux data (don't show if not given)
-
-and the following paired letters:
-
-	C	Show objects that have a cookie
-	c	Show objects that don't have a cookie
-	B	Show objects that are busy
-	b	Show objects that aren't busy
-	W	Show objects that have pending writes
-	w	Show objects that don't have pending writes
-	R	Show objects that have outstanding reads
-	r	Show objects that don't have outstanding reads
-	S	Show objects that have work queued
-	s	Show objects that don't have work queued
-
-If neither side of a letter pair is given, then both are implied.  For example:
-
-	keyctl add user fscache:objlist KB @s
-
-shows objects that are busy, and lists their object keys, but does not dump
-their auxiliary data.  It also implies "CcWwRrSs", but as 'B' is given, 'b' is
-not implied.
-
-By default all objects and all fields will be shown.
-
-
-=========
-DEBUGGING
-=========
-
-If CONFIG_FSCACHE_DEBUG is enabled, the FS-Cache facility can have runtime
-debugging enabled by adjusting the value in:
-
-	/sys/module/fscache/parameters/debug
-
-This is a bitmask of debugging streams to enable:
-
-	BIT	VALUE	STREAM				POINT
-	=======	=======	===============================	=======================
-	0	1	Cache management		Function entry trace
-	1	2					Function exit trace
-	2	4					General
-	3	8	Cookie management		Function entry trace
-	4	16					Function exit trace
-	5	32					General
-	6	64	Page handling			Function entry trace
-	7	128					Function exit trace
-	8	256					General
-	9	512	Operation management		Function entry trace
-	10	1024					Function exit trace
-	11	2048					General
-
-The appropriate set of values should be OR'd together and the result written to
-the control file.  For example:
-
-	echo $((1|8|64)) >/sys/module/fscache/parameters/debug
-
-will turn on all function entry debugging.
diff --git a/Documentation/filesystems/caching/index.rst b/Documentation/filesystems/caching/index.rst
index e5ec95ff0be2..f488747630aa 100644
--- a/Documentation/filesystems/caching/index.rst
+++ b/Documentation/filesystems/caching/index.rst
@@ -6,4 +6,5 @@ Filesystem Caching
 .. toctree::
    :maxdepth: 2
 
+   fscache
    object
diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig
index 506c5e643f0d..5e796e6c38e5 100644
--- a/fs/fscache/Kconfig
+++ b/fs/fscache/Kconfig
@@ -8,7 +8,7 @@ config FSCACHE
 	  Different sorts of caches can be plugged in, depending on the
 	  resources available.
 
-	  See Documentation/filesystems/caching/fscache.txt for more information.
+	  See Documentation/filesystems/caching/fscache.rst for more information.
 
 config FSCACHE_STATS
 	bool "Gather statistical information on local caching"
@@ -25,7 +25,7 @@ config FSCACHE_STATS
 	  between CPUs.  On the other hand, the stats are very useful for
 	  debugging purposes.  Saying 'Y' here is recommended.
 
-	  See Documentation/filesystems/caching/fscache.txt for more information.
+	  See Documentation/filesystems/caching/fscache.rst for more information.
 
 config FSCACHE_HISTOGRAM
 	bool "Gather latency information on local caching"
@@ -42,7 +42,7 @@ config FSCACHE_HISTOGRAM
 	  bouncing between CPUs.  On the other hand, the histogram may be
 	  useful for debugging purposes.  Saying 'N' here is recommended.
 
-	  See Documentation/filesystems/caching/fscache.txt for more information.
+	  See Documentation/filesystems/caching/fscache.rst for more information.
 
 config FSCACHE_DEBUG
 	bool "Debug FS-Cache"
@@ -52,7 +52,7 @@ config FSCACHE_DEBUG
 	  management module.  If this is set, the debugging output may be
 	  enabled by setting bits in /sys/modules/fscache/parameter/debug.
 
-	  See Documentation/filesystems/caching/fscache.txt for more information.
+	  See Documentation/filesystems/caching/fscache.rst for more information.
 
 config FSCACHE_OBJECT_LIST
 	bool "Maintain global object list for debugging purposes"
-- 
cgit v1.2.3


From efc930fa1d844869eb65586f8cdd6a7837db3607 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:16:55 +0200
Subject: docs: filesystems: caching/netfs-api.txt: convert it to ReST

- Add a SPDX header;
- Adjust document and section titles;
- Some whitespace fixes and new line breaks;
- Mark literal blocks as such;
- Add it to filesystems/caching/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/cfe4cb1bf8e1f0093d44c30801ec42e74721e543.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/caching/fscache.rst   |   2 +-
 Documentation/filesystems/caching/index.rst     |   1 +
 Documentation/filesystems/caching/netfs-api.rst | 896 +++++++++++++++++++++++
 Documentation/filesystems/caching/netfs-api.txt | 910 ------------------------
 fs/fscache/cookie.c                             |   2 +-
 include/linux/fscache.h                         |  42 +-
 6 files changed, 920 insertions(+), 933 deletions(-)
 create mode 100644 Documentation/filesystems/caching/netfs-api.rst
 delete mode 100644 Documentation/filesystems/caching/netfs-api.txt

diff --git a/Documentation/filesystems/caching/fscache.rst b/Documentation/filesystems/caching/fscache.rst
index 950602147aa5..dd1297d884d0 100644
--- a/Documentation/filesystems/caching/fscache.rst
+++ b/Documentation/filesystems/caching/fscache.rst
@@ -183,7 +183,7 @@ reside in one cache.
 
 The netfs API to FS-Cache can be found in:
 
-	Documentation/filesystems/caching/netfs-api.txt
+	Documentation/filesystems/caching/netfs-api.rst
 
 The cache backend API to FS-Cache can be found in:
 
diff --git a/Documentation/filesystems/caching/index.rst b/Documentation/filesystems/caching/index.rst
index f488747630aa..d0651db450fb 100644
--- a/Documentation/filesystems/caching/index.rst
+++ b/Documentation/filesystems/caching/index.rst
@@ -8,3 +8,4 @@ Filesystem Caching
 
    fscache
    object
+   netfs-api
diff --git a/Documentation/filesystems/caching/netfs-api.rst b/Documentation/filesystems/caching/netfs-api.rst
new file mode 100644
index 000000000000..d9f14b8610ba
--- /dev/null
+++ b/Documentation/filesystems/caching/netfs-api.rst
@@ -0,0 +1,896 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================
+FS-Cache Network Filesystem API
+===============================
+
+There's an API by which a network filesystem can make use of the FS-Cache
+facilities.  This is based around a number of principles:
+
+ (1) Caches can store a number of different object types.  There are two main
+     object types: indices and files.  The first is a special type used by
+     FS-Cache to make finding objects faster and to make retiring of groups of
+     objects easier.
+
+ (2) Every index, file or other object is represented by a cookie.  This cookie
+     may or may not have anything associated with it, but the netfs doesn't
+     need to care.
+
+ (3) Barring the top-level index (one entry per cached netfs), the index
+     hierarchy for each netfs is structured according the whim of the netfs.
+
+This API is declared in <linux/fscache.h>.
+
+.. This document contains the following sections:
+
+	 (1) Network filesystem definition
+	 (2) Index definition
+	 (3) Object definition
+	 (4) Network filesystem (un)registration
+	 (5) Cache tag lookup
+	 (6) Index registration
+	 (7) Data file registration
+	 (8) Miscellaneous object registration
+ 	 (9) Setting the data file size
+	(10) Page alloc/read/write
+	(11) Page uncaching
+	(12) Index and data file consistency
+	(13) Cookie enablement
+	(14) Miscellaneous cookie operations
+	(15) Cookie unregistration
+	(16) Index invalidation
+	(17) Data file invalidation
+	(18) FS-Cache specific page flags.
+
+
+Network Filesystem Definition
+=============================
+
+FS-Cache needs a description of the network filesystem.  This is specified
+using a record of the following structure::
+
+	struct fscache_netfs {
+		uint32_t			version;
+		const char			*name;
+		struct fscache_cookie		*primary_index;
+		...
+	};
+
+This first two fields should be filled in before registration, and the third
+will be filled in by the registration function; any other fields should just be
+ignored and are for internal use only.
+
+The fields are:
+
+ (1) The name of the netfs (used as the key in the toplevel index).
+
+ (2) The version of the netfs (if the name matches but the version doesn't, the
+     entire in-cache hierarchy for this netfs will be scrapped and begun
+     afresh).
+
+ (3) The cookie representing the primary index will be allocated according to
+     another parameter passed into the registration function.
+
+For example, kAFS (linux/fs/afs/) uses the following definitions to describe
+itself::
+
+	struct fscache_netfs afs_cache_netfs = {
+		.version	= 0,
+		.name		= "afs",
+	};
+
+
+Index Definition
+================
+
+Indices are used for two purposes:
+
+ (1) To aid the finding of a file based on a series of keys (such as AFS's
+     "cell", "volume ID", "vnode ID").
+
+ (2) To make it easier to discard a subset of all the files cached based around
+     a particular key - for instance to mirror the removal of an AFS volume.
+
+However, since it's unlikely that any two netfs's are going to want to define
+their index hierarchies in quite the same way, FS-Cache tries to impose as few
+restraints as possible on how an index is structured and where it is placed in
+the tree.  The netfs can even mix indices and data files at the same level, but
+it's not recommended.
+
+Each index entry consists of a key of indeterminate length plus some auxiliary
+data, also of indeterminate length.
+
+There are some limits on indices:
+
+ (1) Any index containing non-index objects should be restricted to a single
+     cache.  Any such objects created within an index will be created in the
+     first cache only.  The cache in which an index is created can be
+     controlled by cache tags (see below).
+
+ (2) The entry data must be atomically journallable, so it is limited to about
+     400 bytes at present.  At least 400 bytes will be available.
+
+ (3) The depth of the index tree should be judged with care as the search
+     function is recursive.  Too many layers will run the kernel out of stack.
+
+
+Object Definition
+=================
+
+To define an object, a structure of the following type should be filled out::
+
+	struct fscache_cookie_def
+	{
+		uint8_t name[16];
+		uint8_t type;
+
+		struct fscache_cache_tag *(*select_cache)(
+			const void *parent_netfs_data,
+			const void *cookie_netfs_data);
+
+		enum fscache_checkaux (*check_aux)(void *cookie_netfs_data,
+						   const void *data,
+						   uint16_t datalen,
+						   loff_t object_size);
+
+		void (*get_context)(void *cookie_netfs_data, void *context);
+
+		void (*put_context)(void *cookie_netfs_data, void *context);
+
+		void (*mark_pages_cached)(void *cookie_netfs_data,
+					  struct address_space *mapping,
+					  struct pagevec *cached_pvec);
+	};
+
+This has the following fields:
+
+ (1) The type of the object [mandatory].
+
+     This is one of the following values:
+
+	FSCACHE_COOKIE_TYPE_INDEX
+	    This defines an index, which is a special FS-Cache type.
+
+	FSCACHE_COOKIE_TYPE_DATAFILE
+	    This defines an ordinary data file.
+
+	Any other value between 2 and 255
+	    This defines an extraordinary object such as an XATTR.
+
+ (2) The name of the object type (NUL terminated unless all 16 chars are used)
+     [optional].
+
+ (3) A function to select the cache in which to store an index [optional].
+
+     This function is invoked when an index needs to be instantiated in a cache
+     during the instantiation of a non-index object.  Only the immediate index
+     parent for the non-index object will be queried.  Any indices above that
+     in the hierarchy may be stored in multiple caches.  This function does not
+     need to be supplied for any non-index object or any index that will only
+     have index children.
+
+     If this function is not supplied or if it returns NULL then the first
+     cache in the parent's list will be chosen, or failing that, the first
+     cache in the master list.
+
+ (4) A function to check the auxiliary data [optional].
+
+     This function will be called to check that a match found in the cache for
+     this object is valid.  For instance with AFS it could check the auxiliary
+     data against the data version number returned by the server to determine
+     whether the index entry in a cache is still valid.
+
+     If this function is absent, it will be assumed that matching objects in a
+     cache are always valid.
+
+     The function is also passed the cache's idea of the object size and may
+     use this to manage coherency also.
+
+     If present, the function should return one of the following values:
+
+	FSCACHE_CHECKAUX_OKAY
+	    - the entry is okay as is
+
+	FSCACHE_CHECKAUX_NEEDS_UPDATE
+	    - the entry requires update
+
+	FSCACHE_CHECKAUX_OBSOLETE
+	    - the entry should be deleted
+
+     This function can also be used to extract data from the auxiliary data in
+     the cache and copy it into the netfs's structures.
+
+ (5) A pair of functions to manage contexts for the completion callback
+     [optional].
+
+     The cache read/write functions are passed a context which is then passed
+     to the I/O completion callback function.  To ensure this context remains
+     valid until after the I/O completion is called, two functions may be
+     provided: one to get an extra reference on the context, and one to drop a
+     reference to it.
+
+     If the context is not used or is a type of object that won't go out of
+     scope, then these functions are not required.  These functions are not
+     required for indices as indices may not contain data.  These functions may
+     be called in interrupt context and so may not sleep.
+
+ (6) A function to mark a page as retaining cache metadata [optional].
+
+     This is called by the cache to indicate that it is retaining in-memory
+     information for this page and that the netfs should uncache the page when
+     it has finished.  This does not indicate whether there's data on the disk
+     or not.  Note that several pages at once may be presented for marking.
+
+     The PG_fscache bit is set on the pages before this function would be
+     called, so the function need not be provided if this is sufficient.
+
+     This function is not required for indices as they're not permitted data.
+
+ (7) A function to unmark all the pages retaining cache metadata [mandatory].
+
+     This is called by FS-Cache to indicate that a backing store is being
+     unbound from a cookie and that all the marks on the pages should be
+     cleared to prevent confusion.  Note that the cache will have torn down all
+     its tracking information so that the pages don't need to be explicitly
+     uncached.
+
+     This function is not required for indices as they're not permitted data.
+
+
+Network Filesystem (Un)registration
+===================================
+
+The first step is to declare the network filesystem to the cache.  This also
+involves specifying the layout of the primary index (for AFS, this would be the
+"cell" level).
+
+The registration function is::
+
+	int fscache_register_netfs(struct fscache_netfs *netfs);
+
+It just takes a pointer to the netfs definition.  It returns 0 or an error as
+appropriate.
+
+For kAFS, registration is done as follows::
+
+	ret = fscache_register_netfs(&afs_cache_netfs);
+
+The last step is, of course, unregistration::
+
+	void fscache_unregister_netfs(struct fscache_netfs *netfs);
+
+
+Cache Tag Lookup
+================
+
+FS-Cache permits the use of more than one cache.  To permit particular index
+subtrees to be bound to particular caches, the second step is to look up cache
+representation tags.  This step is optional; it can be left entirely up to
+FS-Cache as to which cache should be used.  The problem with doing that is that
+FS-Cache will always pick the first cache that was registered.
+
+To get the representation for a named tag::
+
+	struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name);
+
+This takes a text string as the name and returns a representation of a tag.  It
+will never return an error.  It may return a dummy tag, however, if it runs out
+of memory; this will inhibit caching with this tag.
+
+Any representation so obtained must be released by passing it to this function::
+
+	void fscache_release_cache_tag(struct fscache_cache_tag *tag);
+
+The tag will be retrieved by FS-Cache when it calls the object definition
+operation select_cache().
+
+
+Index Registration
+==================
+
+The third step is to inform FS-Cache about part of an index hierarchy that can
+be used to locate files.  This is done by requesting a cookie for each index in
+the path to the file::
+
+	struct fscache_cookie *
+	fscache_acquire_cookie(struct fscache_cookie *parent,
+			       const struct fscache_object_def *def,
+			       const void *index_key,
+			       size_t index_key_len,
+			       const void *aux_data,
+			       size_t aux_data_len,
+			       void *netfs_data,
+			       loff_t object_size,
+			       bool enable);
+
+This function creates an index entry in the index represented by parent,
+filling in the index entry by calling the operations pointed to by def.
+
+A unique key that represents the object within the parent must be pointed to by
+index_key and is of length index_key_len.
+
+An optional blob of auxiliary data that is to be stored within the cache can be
+pointed to with aux_data and should be of length aux_data_len.  This would
+typically be used for storing coherency data.
+
+The netfs may pass an arbitrary value in netfs_data and this will be presented
+to it in the event of any calling back.  This may also be used in tracing or
+logging of messages.
+
+The cache tracks the size of the data attached to an object and this set to be
+object_size.  For indices, this should be 0.  This value will be passed to the
+->check_aux() callback.
+
+Note that this function never returns an error - all errors are handled
+internally.  It may, however, return NULL to indicate no cookie.  It is quite
+acceptable to pass this token back to this function as the parent to another
+acquisition (or even to the relinquish cookie, read page and write page
+functions - see below).
+
+Note also that no indices are actually created in a cache until a non-index
+object needs to be created somewhere down the hierarchy.  Furthermore, an index
+may be created in several different caches independently at different times.
+This is all handled transparently, and the netfs doesn't see any of it.
+
+A cookie will be created in the disabled state if enabled is false.  A cookie
+must be enabled to do anything with it.  A disabled cookie can be enabled by
+calling fscache_enable_cookie() (see below).
+
+For example, with AFS, a cell would be added to the primary index.  This index
+entry would have a dependent inode containing volume mappings within this cell::
+
+	cell->cache =
+		fscache_acquire_cookie(afs_cache_netfs.primary_index,
+				       &afs_cell_cache_index_def,
+				       cell->name, strlen(cell->name),
+				       NULL, 0,
+				       cell, 0, true);
+
+And then a particular volume could be added to that index by ID, creating
+another index for vnodes (AFS inode equivalents)::
+
+	volume->cache =
+		fscache_acquire_cookie(volume->cell->cache,
+				       &afs_volume_cache_index_def,
+				       &volume->vid, sizeof(volume->vid),
+				       NULL, 0,
+				       volume, 0, true);
+
+
+Data File Registration
+======================
+
+The fourth step is to request a data file be created in the cache.  This is
+identical to index cookie acquisition.  The only difference is that the type in
+the object definition should be something other than index type::
+
+	vnode->cache =
+		fscache_acquire_cookie(volume->cache,
+				       &afs_vnode_cache_object_def,
+				       &key, sizeof(key),
+				       &aux, sizeof(aux),
+				       vnode, vnode->status.size, true);
+
+
+Miscellaneous Object Registration
+=================================
+
+An optional step is to request an object of miscellaneous type be created in
+the cache.  This is almost identical to index cookie acquisition.  The only
+difference is that the type in the object definition should be something other
+than index type.  While the parent object could be an index, it's more likely
+it would be some other type of object such as a data file::
+
+	xattr->cache =
+		fscache_acquire_cookie(vnode->cache,
+				       &afs_xattr_cache_object_def,
+				       &xattr->name, strlen(xattr->name),
+				       NULL, 0,
+				       xattr, strlen(xattr->val), true);
+
+Miscellaneous objects might be used to store extended attributes or directory
+entries for example.
+
+
+Setting the Data File Size
+==========================
+
+The fifth step is to set the physical attributes of the file, such as its size.
+This doesn't automatically reserve any space in the cache, but permits the
+cache to adjust its metadata for data tracking appropriately::
+
+	int fscache_attr_changed(struct fscache_cookie *cookie);
+
+The cache will return -ENOBUFS if there is no backing cache or if there is no
+space to allocate any extra metadata required in the cache.
+
+Note that attempts to read or write data pages in the cache over this size may
+be rebuffed with -ENOBUFS.
+
+This operation schedules an attribute adjustment to happen asynchronously at
+some point in the future, and as such, it may happen after the function returns
+to the caller.  The attribute adjustment excludes read and write operations.
+
+
+Page alloc/read/write
+=====================
+
+And the sixth step is to store and retrieve pages in the cache.  There are
+three functions that are used to do this.
+
+Note:
+
+ (1) A page should not be re-read or re-allocated without uncaching it first.
+
+ (2) A read or allocated page must be uncached when the netfs page is released
+     from the pagecache.
+
+ (3) A page should only be written to the cache if previous read or allocated.
+
+This permits the cache to maintain its page tracking in proper order.
+
+
+PAGE READ
+---------
+
+Firstly, the netfs should ask FS-Cache to examine the caches and read the
+contents cached for a particular page of a particular file if present, or else
+allocate space to store the contents if not::
+
+	typedef
+	void (*fscache_rw_complete_t)(struct page *page,
+				      void *context,
+				      int error);
+
+	int fscache_read_or_alloc_page(struct fscache_cookie *cookie,
+				       struct page *page,
+				       fscache_rw_complete_t end_io_func,
+				       void *context,
+				       gfp_t gfp);
+
+The cookie argument must specify a cookie for an object that isn't an index,
+the page specified will have the data loaded into it (and is also used to
+specify the page number), and the gfp argument is used to control how any
+memory allocations made are satisfied.
+
+If the cookie indicates the inode is not cached:
+
+ (1) The function will return -ENOBUFS.
+
+Else if there's a copy of the page resident in the cache:
+
+ (1) The mark_pages_cached() cookie operation will be called on that page.
+
+ (2) The function will submit a request to read the data from the cache's
+     backing device directly into the page specified.
+
+ (3) The function will return 0.
+
+ (4) When the read is complete, end_io_func() will be invoked with:
+
+       * The netfs data supplied when the cookie was created.
+
+       * The page descriptor.
+
+       * The context argument passed to the above function.  This will be
+         maintained with the get_context/put_context functions mentioned above.
+
+       * An argument that's 0 on success or negative for an error code.
+
+     If an error occurs, it should be assumed that the page contains no usable
+     data.  fscache_readpages_cancel() may need to be called.
+
+     end_io_func() will be called in process context if the read is results in
+     an error, but it might be called in interrupt context if the read is
+     successful.
+
+Otherwise, if there's not a copy available in cache, but the cache may be able
+to store the page:
+
+ (1) The mark_pages_cached() cookie operation will be called on that page.
+
+ (2) A block may be reserved in the cache and attached to the object at the
+     appropriate place.
+
+ (3) The function will return -ENODATA.
+
+This function may also return -ENOMEM or -EINTR, in which case it won't have
+read any data from the cache.
+
+
+Page Allocate
+-------------
+
+Alternatively, if there's not expected to be any data in the cache for a page
+because the file has been extended, a block can simply be allocated instead::
+
+	int fscache_alloc_page(struct fscache_cookie *cookie,
+			       struct page *page,
+			       gfp_t gfp);
+
+This is similar to the fscache_read_or_alloc_page() function, except that it
+never reads from the cache.  It will return 0 if a block has been allocated,
+rather than -ENODATA as the other would.  One or the other must be performed
+before writing to the cache.
+
+The mark_pages_cached() cookie operation will be called on the page if
+successful.
+
+
+Page Write
+----------
+
+Secondly, if the netfs changes the contents of the page (either due to an
+initial download or if a user performs a write), then the page should be
+written back to the cache::
+
+	int fscache_write_page(struct fscache_cookie *cookie,
+			       struct page *page,
+			       loff_t object_size,
+			       gfp_t gfp);
+
+The cookie argument must specify a data file cookie, the page specified should
+contain the data to be written (and is also used to specify the page number),
+object_size is the revised size of the object and the gfp argument is used to
+control how any memory allocations made are satisfied.
+
+The page must have first been read or allocated successfully and must not have
+been uncached before writing is performed.
+
+If the cookie indicates the inode is not cached then:
+
+ (1) The function will return -ENOBUFS.
+
+Else if space can be allocated in the cache to hold this page:
+
+ (1) PG_fscache_write will be set on the page.
+
+ (2) The function will submit a request to write the data to cache's backing
+     device directly from the page specified.
+
+ (3) The function will return 0.
+
+ (4) When the write is complete PG_fscache_write is cleared on the page and
+     anyone waiting for that bit will be woken up.
+
+Else if there's no space available in the cache, -ENOBUFS will be returned.  It
+is also possible for the PG_fscache_write bit to be cleared when no write took
+place if unforeseen circumstances arose (such as a disk error).
+
+Writing takes place asynchronously.
+
+
+Multiple Page Read
+------------------
+
+A facility is provided to read several pages at once, as requested by the
+readpages() address space operation::
+
+	int fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
+					struct address_space *mapping,
+					struct list_head *pages,
+					int *nr_pages,
+					fscache_rw_complete_t end_io_func,
+					void *context,
+					gfp_t gfp);
+
+This works in a similar way to fscache_read_or_alloc_page(), except:
+
+ (1) Any page it can retrieve data for is removed from pages and nr_pages and
+     dispatched for reading to the disk.  Reads of adjacent pages on disk may
+     be merged for greater efficiency.
+
+ (2) The mark_pages_cached() cookie operation will be called on several pages
+     at once if they're being read or allocated.
+
+ (3) If there was an general error, then that error will be returned.
+
+     Else if some pages couldn't be allocated or read, then -ENOBUFS will be
+     returned.
+
+     Else if some pages couldn't be read but were allocated, then -ENODATA will
+     be returned.
+
+     Otherwise, if all pages had reads dispatched, then 0 will be returned, the
+     list will be empty and ``*nr_pages`` will be 0.
+
+ (4) end_io_func will be called once for each page being read as the reads
+     complete.  It will be called in process context if error != 0, but it may
+     be called in interrupt context if there is no error.
+
+Note that a return of -ENODATA, -ENOBUFS or any other error does not preclude
+some of the pages being read and some being allocated.  Those pages will have
+been marked appropriately and will need uncaching.
+
+
+Cancellation of Unread Pages
+----------------------------
+
+If one or more pages are passed to fscache_read_or_alloc_pages() but not then
+read from the cache and also not read from the underlying filesystem then
+those pages will need to have any marks and reservations removed.  This can be
+done by calling::
+
+	void fscache_readpages_cancel(struct fscache_cookie *cookie,
+				      struct list_head *pages);
+
+prior to returning to the caller.  The cookie argument should be as passed to
+fscache_read_or_alloc_pages().  Every page in the pages list will be examined
+and any that have PG_fscache set will be uncached.
+
+
+Page Uncaching
+==============
+
+To uncache a page, this function should be called::
+
+	void fscache_uncache_page(struct fscache_cookie *cookie,
+				  struct page *page);
+
+This function permits the cache to release any in-memory representation it
+might be holding for this netfs page.  This function must be called once for
+each page on which the read or write page functions above have been called to
+make sure the cache's in-memory tracking information gets torn down.
+
+Note that pages can't be explicitly deleted from the a data file.  The whole
+data file must be retired (see the relinquish cookie function below).
+
+Furthermore, note that this does not cancel the asynchronous read or write
+operation started by the read/alloc and write functions, so the page
+invalidation functions must use::
+
+	bool fscache_check_page_write(struct fscache_cookie *cookie,
+				      struct page *page);
+
+to see if a page is being written to the cache, and::
+
+	void fscache_wait_on_page_write(struct fscache_cookie *cookie,
+					struct page *page);
+
+to wait for it to finish if it is.
+
+
+When releasepage() is being implemented, a special FS-Cache function exists to
+manage the heuristics of coping with vmscan trying to eject pages, which may
+conflict with the cache trying to write pages to the cache (which may itself
+need to allocate memory)::
+
+	bool fscache_maybe_release_page(struct fscache_cookie *cookie,
+					struct page *page,
+					gfp_t gfp);
+
+This takes the netfs cookie, and the page and gfp arguments as supplied to
+releasepage().  It will return false if the page cannot be released yet for
+some reason and if it returns true, the page has been uncached and can now be
+released.
+
+To make a page available for release, this function may wait for an outstanding
+storage request to complete, or it may attempt to cancel the storage request -
+in which case the page will not be stored in the cache this time.
+
+
+Bulk Image Page Uncache
+-----------------------
+
+A convenience routine is provided to perform an uncache on all the pages
+attached to an inode.  This assumes that the pages on the inode correspond on a
+1:1 basis with the pages in the cache::
+
+	void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
+					     struct inode *inode);
+
+This takes the netfs cookie that the pages were cached with and the inode that
+the pages are attached to.  This function will wait for pages to finish being
+written to the cache and for the cache to finish with the page generally.  No
+error is returned.
+
+
+Index and Data File consistency
+===============================
+
+To find out whether auxiliary data for an object is up to data within the
+cache, the following function can be called::
+
+	int fscache_check_consistency(struct fscache_cookie *cookie,
+				      const void *aux_data);
+
+This will call back to the netfs to check whether the auxiliary data associated
+with a cookie is correct; if aux_data is non-NULL, it will update the auxiliary
+data buffer first.  It returns 0 if it is and -ESTALE if it isn't; it may also
+return -ENOMEM and -ERESTARTSYS.
+
+To request an update of the index data for an index or other object, the
+following function should be called::
+
+	void fscache_update_cookie(struct fscache_cookie *cookie,
+				   const void *aux_data);
+
+This function will update the cookie's auxiliary data buffer from aux_data if
+that is non-NULL and then schedule this to be stored on disk.  The update
+method in the parent index definition will be called to transfer the data.
+
+Note that partial updates may happen automatically at other times, such as when
+data blocks are added to a data file object.
+
+
+Cookie Enablement
+=================
+
+Cookies exist in one of two states: enabled and disabled.  If a cookie is
+disabled, it ignores all attempts to acquire child cookies; check, update or
+invalidate its state; allocate, read or write backing pages - though it is
+still possible to uncache pages and relinquish the cookie.
+
+The initial enablement state is set by fscache_acquire_cookie(), but the cookie
+can be enabled or disabled later.  To disable a cookie, call::
+
+	void fscache_disable_cookie(struct fscache_cookie *cookie,
+				    const void *aux_data,
+    				    bool invalidate);
+
+If the cookie is not already disabled, this locks the cookie against other
+enable and disable ops, marks the cookie as being disabled, discards or
+invalidates any backing objects and waits for cessation of activity on any
+associated object before unlocking the cookie.
+
+All possible failures are handled internally.  The caller should consider
+calling fscache_uncache_all_inode_pages() afterwards to make sure all page
+markings are cleared up.
+
+Cookies can be enabled or reenabled with::
+
+    	void fscache_enable_cookie(struct fscache_cookie *cookie,
+				   const void *aux_data,
+				   loff_t object_size,
+    				   bool (*can_enable)(void *data),
+    				   void *data)
+
+If the cookie is not already enabled, this locks the cookie against other
+enable and disable ops, invokes can_enable() and, if the cookie is not an index
+cookie, will begin the procedure of acquiring backing objects.
+
+The optional can_enable() function is passed the data argument and returns a
+ruling as to whether or not enablement should actually be permitted to begin.
+
+All possible failures are handled internally.  The cookie will only be marked
+as enabled if provisional backing objects are allocated.
+
+The object's data size is updated from object_size and is passed to the
+->check_aux() function.
+
+In both cases, the cookie's auxiliary data buffer is updated from aux_data if
+that is non-NULL inside the enablement lock before proceeding.
+
+
+Miscellaneous Cookie operations
+===============================
+
+There are a number of operations that can be used to control cookies:
+
+     * Cookie pinning::
+
+	int fscache_pin_cookie(struct fscache_cookie *cookie);
+	void fscache_unpin_cookie(struct fscache_cookie *cookie);
+
+     These operations permit data cookies to be pinned into the cache and to
+     have the pinning removed.  They are not permitted on index cookies.
+
+     The pinning function will return 0 if successful, -ENOBUFS in the cookie
+     isn't backed by a cache, -EOPNOTSUPP if the cache doesn't support pinning,
+     -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or
+     -EIO if there's any other problem.
+
+   * Data space reservation::
+
+	int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size);
+
+     This permits a netfs to request cache space be reserved to store up to the
+     given amount of a file.  It is permitted to ask for more than the current
+     size of the file to allow for future file expansion.
+
+     If size is given as zero then the reservation will be cancelled.
+
+     The function will return 0 if successful, -ENOBUFS in the cookie isn't
+     backed by a cache, -EOPNOTSUPP if the cache doesn't support reservations,
+     -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or
+     -EIO if there's any other problem.
+
+     Note that this doesn't pin an object in a cache; it can still be culled to
+     make space if it's not in use.
+
+
+Cookie Unregistration
+=====================
+
+To get rid of a cookie, this function should be called::
+
+	void fscache_relinquish_cookie(struct fscache_cookie *cookie,
+				       const void *aux_data,
+				       bool retire);
+
+If retire is non-zero, then the object will be marked for recycling, and all
+copies of it will be removed from all active caches in which it is present.
+Not only that but all child objects will also be retired.
+
+If retire is zero, then the object may be available again when next the
+acquisition function is called.  Retirement here will overrule the pinning on a
+cookie.
+
+The cookie's auxiliary data will be updated from aux_data if that is non-NULL
+so that the cache can lazily update it on disk.
+
+One very important note - relinquish must NOT be called for a cookie unless all
+the cookies for "child" indices, objects and pages have been relinquished
+first.
+
+
+Index Invalidation
+==================
+
+There is no direct way to invalidate an index subtree.  To do this, the caller
+should relinquish and retire the cookie they have, and then acquire a new one.
+
+
+Data File Invalidation
+======================
+
+Sometimes it will be necessary to invalidate an object that contains data.
+Typically this will be necessary when the server tells the netfs of a foreign
+change - at which point the netfs has to throw away all the state it had for an
+inode and reload from the server.
+
+To indicate that a cache object should be invalidated, the following function
+can be called::
+
+	void fscache_invalidate(struct fscache_cookie *cookie);
+
+This can be called with spinlocks held as it defers the work to a thread pool.
+All extant storage, retrieval and attribute change ops at this point are
+cancelled and discarded.  Some future operations will be rejected until the
+cache has had a chance to insert a barrier in the operations queue.  After
+that, operations will be queued again behind the invalidation operation.
+
+The invalidation operation will perform an attribute change operation and an
+auxiliary data update operation as it is very likely these will have changed.
+
+Using the following function, the netfs can wait for the invalidation operation
+to have reached a point at which it can start submitting ordinary operations
+once again::
+
+	void fscache_wait_on_invalidate(struct fscache_cookie *cookie);
+
+
+FS-cache Specific Page Flag
+===========================
+
+FS-Cache makes use of a page flag, PG_private_2, for its own purpose.  This is
+given the alternative name PG_fscache.
+
+PG_fscache is used to indicate that the page is known by the cache, and that
+the cache must be informed if the page is going to go away.  It's an indication
+to the netfs that the cache has an interest in this page, where an interest may
+be a pointer to it, resources allocated or reserved for it, or I/O in progress
+upon it.
+
+The netfs can use this information in methods such as releasepage() to
+determine whether it needs to uncache a page or update it.
+
+Furthermore, if this bit is set, releasepage() and invalidatepage() operations
+will be called on a page to get rid of it, even if PG_private is not set.  This
+allows caching to attempted on a page before read_cache_pages() to be called
+after fscache_read_or_alloc_pages() as the former will try and release pages it
+was given under certain circumstances.
+
+This bit does not overlap with such as PG_private.  This means that FS-Cache
+can be used with a filesystem that uses the block buffering code.
+
+There are a number of operations defined on this flag::
+
+	int PageFsCache(struct page *page);
+	void SetPageFsCache(struct page *page)
+	void ClearPageFsCache(struct page *page)
+	int TestSetPageFsCache(struct page *page)
+	int TestClearPageFsCache(struct page *page)
+
+These functions are bit test, bit set, bit clear, bit test and set and bit
+test and clear operations on PG_fscache.
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
deleted file mode 100644
index ba968e8f5704..000000000000
--- a/Documentation/filesystems/caching/netfs-api.txt
+++ /dev/null
@@ -1,910 +0,0 @@
-			===============================
-			FS-CACHE NETWORK FILESYSTEM API
-			===============================
-
-There's an API by which a network filesystem can make use of the FS-Cache
-facilities.  This is based around a number of principles:
-
- (1) Caches can store a number of different object types.  There are two main
-     object types: indices and files.  The first is a special type used by
-     FS-Cache to make finding objects faster and to make retiring of groups of
-     objects easier.
-
- (2) Every index, file or other object is represented by a cookie.  This cookie
-     may or may not have anything associated with it, but the netfs doesn't
-     need to care.
-
- (3) Barring the top-level index (one entry per cached netfs), the index
-     hierarchy for each netfs is structured according the whim of the netfs.
-
-This API is declared in <linux/fscache.h>.
-
-This document contains the following sections:
-
-	 (1) Network filesystem definition
-	 (2) Index definition
-	 (3) Object definition
-	 (4) Network filesystem (un)registration
-	 (5) Cache tag lookup
-	 (6) Index registration
-	 (7) Data file registration
-	 (8) Miscellaneous object registration
- 	 (9) Setting the data file size
-	(10) Page alloc/read/write
-	(11) Page uncaching
-	(12) Index and data file consistency
-	(13) Cookie enablement
-	(14) Miscellaneous cookie operations
-	(15) Cookie unregistration
-	(16) Index invalidation
-	(17) Data file invalidation
-	(18) FS-Cache specific page flags.
-
-
-=============================
-NETWORK FILESYSTEM DEFINITION
-=============================
-
-FS-Cache needs a description of the network filesystem.  This is specified
-using a record of the following structure:
-
-	struct fscache_netfs {
-		uint32_t			version;
-		const char			*name;
-		struct fscache_cookie		*primary_index;
-		...
-	};
-
-This first two fields should be filled in before registration, and the third
-will be filled in by the registration function; any other fields should just be
-ignored and are for internal use only.
-
-The fields are:
-
- (1) The name of the netfs (used as the key in the toplevel index).
-
- (2) The version of the netfs (if the name matches but the version doesn't, the
-     entire in-cache hierarchy for this netfs will be scrapped and begun
-     afresh).
-
- (3) The cookie representing the primary index will be allocated according to
-     another parameter passed into the registration function.
-
-For example, kAFS (linux/fs/afs/) uses the following definitions to describe
-itself:
-
-	struct fscache_netfs afs_cache_netfs = {
-		.version	= 0,
-		.name		= "afs",
-	};
-
-
-================
-INDEX DEFINITION
-================
-
-Indices are used for two purposes:
-
- (1) To aid the finding of a file based on a series of keys (such as AFS's
-     "cell", "volume ID", "vnode ID").
-
- (2) To make it easier to discard a subset of all the files cached based around
-     a particular key - for instance to mirror the removal of an AFS volume.
-
-However, since it's unlikely that any two netfs's are going to want to define
-their index hierarchies in quite the same way, FS-Cache tries to impose as few
-restraints as possible on how an index is structured and where it is placed in
-the tree.  The netfs can even mix indices and data files at the same level, but
-it's not recommended.
-
-Each index entry consists of a key of indeterminate length plus some auxiliary
-data, also of indeterminate length.
-
-There are some limits on indices:
-
- (1) Any index containing non-index objects should be restricted to a single
-     cache.  Any such objects created within an index will be created in the
-     first cache only.  The cache in which an index is created can be
-     controlled by cache tags (see below).
-
- (2) The entry data must be atomically journallable, so it is limited to about
-     400 bytes at present.  At least 400 bytes will be available.
-
- (3) The depth of the index tree should be judged with care as the search
-     function is recursive.  Too many layers will run the kernel out of stack.
-
-
-=================
-OBJECT DEFINITION
-=================
-
-To define an object, a structure of the following type should be filled out:
-
-	struct fscache_cookie_def
-	{
-		uint8_t name[16];
-		uint8_t type;
-
-		struct fscache_cache_tag *(*select_cache)(
-			const void *parent_netfs_data,
-			const void *cookie_netfs_data);
-
-		enum fscache_checkaux (*check_aux)(void *cookie_netfs_data,
-						   const void *data,
-						   uint16_t datalen,
-						   loff_t object_size);
-
-		void (*get_context)(void *cookie_netfs_data, void *context);
-
-		void (*put_context)(void *cookie_netfs_data, void *context);
-
-		void (*mark_pages_cached)(void *cookie_netfs_data,
-					  struct address_space *mapping,
-					  struct pagevec *cached_pvec);
-	};
-
-This has the following fields:
-
- (1) The type of the object [mandatory].
-
-     This is one of the following values:
-
-	(*) FSCACHE_COOKIE_TYPE_INDEX
-
-	    This defines an index, which is a special FS-Cache type.
-
-	(*) FSCACHE_COOKIE_TYPE_DATAFILE
-
-	    This defines an ordinary data file.
-
-	(*) Any other value between 2 and 255
-
-	    This defines an extraordinary object such as an XATTR.
-
- (2) The name of the object type (NUL terminated unless all 16 chars are used)
-     [optional].
-
- (3) A function to select the cache in which to store an index [optional].
-
-     This function is invoked when an index needs to be instantiated in a cache
-     during the instantiation of a non-index object.  Only the immediate index
-     parent for the non-index object will be queried.  Any indices above that
-     in the hierarchy may be stored in multiple caches.  This function does not
-     need to be supplied for any non-index object or any index that will only
-     have index children.
-
-     If this function is not supplied or if it returns NULL then the first
-     cache in the parent's list will be chosen, or failing that, the first
-     cache in the master list.
-
- (4) A function to check the auxiliary data [optional].
-
-     This function will be called to check that a match found in the cache for
-     this object is valid.  For instance with AFS it could check the auxiliary
-     data against the data version number returned by the server to determine
-     whether the index entry in a cache is still valid.
-
-     If this function is absent, it will be assumed that matching objects in a
-     cache are always valid.
-
-     The function is also passed the cache's idea of the object size and may
-     use this to manage coherency also.
-
-     If present, the function should return one of the following values:
-
-	(*) FSCACHE_CHECKAUX_OKAY		- the entry is okay as is
-	(*) FSCACHE_CHECKAUX_NEEDS_UPDATE	- the entry requires update
-	(*) FSCACHE_CHECKAUX_OBSOLETE		- the entry should be deleted
-
-     This function can also be used to extract data from the auxiliary data in
-     the cache and copy it into the netfs's structures.
-
- (5) A pair of functions to manage contexts for the completion callback
-     [optional].
-
-     The cache read/write functions are passed a context which is then passed
-     to the I/O completion callback function.  To ensure this context remains
-     valid until after the I/O completion is called, two functions may be
-     provided: one to get an extra reference on the context, and one to drop a
-     reference to it.
-
-     If the context is not used or is a type of object that won't go out of
-     scope, then these functions are not required.  These functions are not
-     required for indices as indices may not contain data.  These functions may
-     be called in interrupt context and so may not sleep.
-
- (6) A function to mark a page as retaining cache metadata [optional].
-
-     This is called by the cache to indicate that it is retaining in-memory
-     information for this page and that the netfs should uncache the page when
-     it has finished.  This does not indicate whether there's data on the disk
-     or not.  Note that several pages at once may be presented for marking.
-
-     The PG_fscache bit is set on the pages before this function would be
-     called, so the function need not be provided if this is sufficient.
-
-     This function is not required for indices as they're not permitted data.
-
- (7) A function to unmark all the pages retaining cache metadata [mandatory].
-
-     This is called by FS-Cache to indicate that a backing store is being
-     unbound from a cookie and that all the marks on the pages should be
-     cleared to prevent confusion.  Note that the cache will have torn down all
-     its tracking information so that the pages don't need to be explicitly
-     uncached.
-
-     This function is not required for indices as they're not permitted data.
-
-
-===================================
-NETWORK FILESYSTEM (UN)REGISTRATION
-===================================
-
-The first step is to declare the network filesystem to the cache.  This also
-involves specifying the layout of the primary index (for AFS, this would be the
-"cell" level).
-
-The registration function is:
-
-	int fscache_register_netfs(struct fscache_netfs *netfs);
-
-It just takes a pointer to the netfs definition.  It returns 0 or an error as
-appropriate.
-
-For kAFS, registration is done as follows:
-
-	ret = fscache_register_netfs(&afs_cache_netfs);
-
-The last step is, of course, unregistration:
-
-	void fscache_unregister_netfs(struct fscache_netfs *netfs);
-
-
-================
-CACHE TAG LOOKUP
-================
-
-FS-Cache permits the use of more than one cache.  To permit particular index
-subtrees to be bound to particular caches, the second step is to look up cache
-representation tags.  This step is optional; it can be left entirely up to
-FS-Cache as to which cache should be used.  The problem with doing that is that
-FS-Cache will always pick the first cache that was registered.
-
-To get the representation for a named tag:
-
-	struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name);
-
-This takes a text string as the name and returns a representation of a tag.  It
-will never return an error.  It may return a dummy tag, however, if it runs out
-of memory; this will inhibit caching with this tag.
-
-Any representation so obtained must be released by passing it to this function:
-
-	void fscache_release_cache_tag(struct fscache_cache_tag *tag);
-
-The tag will be retrieved by FS-Cache when it calls the object definition
-operation select_cache().
-
-
-==================
-INDEX REGISTRATION
-==================
-
-The third step is to inform FS-Cache about part of an index hierarchy that can
-be used to locate files.  This is done by requesting a cookie for each index in
-the path to the file:
-
-	struct fscache_cookie *
-	fscache_acquire_cookie(struct fscache_cookie *parent,
-			       const struct fscache_object_def *def,
-			       const void *index_key,
-			       size_t index_key_len,
-			       const void *aux_data,
-			       size_t aux_data_len,
-			       void *netfs_data,
-			       loff_t object_size,
-			       bool enable);
-
-This function creates an index entry in the index represented by parent,
-filling in the index entry by calling the operations pointed to by def.
-
-A unique key that represents the object within the parent must be pointed to by
-index_key and is of length index_key_len.
-
-An optional blob of auxiliary data that is to be stored within the cache can be
-pointed to with aux_data and should be of length aux_data_len.  This would
-typically be used for storing coherency data.
-
-The netfs may pass an arbitrary value in netfs_data and this will be presented
-to it in the event of any calling back.  This may also be used in tracing or
-logging of messages.
-
-The cache tracks the size of the data attached to an object and this set to be
-object_size.  For indices, this should be 0.  This value will be passed to the
-->check_aux() callback.
-
-Note that this function never returns an error - all errors are handled
-internally.  It may, however, return NULL to indicate no cookie.  It is quite
-acceptable to pass this token back to this function as the parent to another
-acquisition (or even to the relinquish cookie, read page and write page
-functions - see below).
-
-Note also that no indices are actually created in a cache until a non-index
-object needs to be created somewhere down the hierarchy.  Furthermore, an index
-may be created in several different caches independently at different times.
-This is all handled transparently, and the netfs doesn't see any of it.
-
-A cookie will be created in the disabled state if enabled is false.  A cookie
-must be enabled to do anything with it.  A disabled cookie can be enabled by
-calling fscache_enable_cookie() (see below).
-
-For example, with AFS, a cell would be added to the primary index.  This index
-entry would have a dependent inode containing volume mappings within this cell:
-
-	cell->cache =
-		fscache_acquire_cookie(afs_cache_netfs.primary_index,
-				       &afs_cell_cache_index_def,
-				       cell->name, strlen(cell->name),
-				       NULL, 0,
-				       cell, 0, true);
-
-And then a particular volume could be added to that index by ID, creating
-another index for vnodes (AFS inode equivalents):
-
-	volume->cache =
-		fscache_acquire_cookie(volume->cell->cache,
-				       &afs_volume_cache_index_def,
-				       &volume->vid, sizeof(volume->vid),
-				       NULL, 0,
-				       volume, 0, true);
-
-
-======================
-DATA FILE REGISTRATION
-======================
-
-The fourth step is to request a data file be created in the cache.  This is
-identical to index cookie acquisition.  The only difference is that the type in
-the object definition should be something other than index type.
-
-	vnode->cache =
-		fscache_acquire_cookie(volume->cache,
-				       &afs_vnode_cache_object_def,
-				       &key, sizeof(key),
-				       &aux, sizeof(aux),
-				       vnode, vnode->status.size, true);
-
-
-=================================
-MISCELLANEOUS OBJECT REGISTRATION
-=================================
-
-An optional step is to request an object of miscellaneous type be created in
-the cache.  This is almost identical to index cookie acquisition.  The only
-difference is that the type in the object definition should be something other
-than index type.  While the parent object could be an index, it's more likely
-it would be some other type of object such as a data file.
-
-	xattr->cache =
-		fscache_acquire_cookie(vnode->cache,
-				       &afs_xattr_cache_object_def,
-				       &xattr->name, strlen(xattr->name),
-				       NULL, 0,
-				       xattr, strlen(xattr->val), true);
-
-Miscellaneous objects might be used to store extended attributes or directory
-entries for example.
-
-
-==========================
-SETTING THE DATA FILE SIZE
-==========================
-
-The fifth step is to set the physical attributes of the file, such as its size.
-This doesn't automatically reserve any space in the cache, but permits the
-cache to adjust its metadata for data tracking appropriately:
-
-	int fscache_attr_changed(struct fscache_cookie *cookie);
-
-The cache will return -ENOBUFS if there is no backing cache or if there is no
-space to allocate any extra metadata required in the cache.
-
-Note that attempts to read or write data pages in the cache over this size may
-be rebuffed with -ENOBUFS.
-
-This operation schedules an attribute adjustment to happen asynchronously at
-some point in the future, and as such, it may happen after the function returns
-to the caller.  The attribute adjustment excludes read and write operations.
-
-
-=====================
-PAGE ALLOC/READ/WRITE
-=====================
-
-And the sixth step is to store and retrieve pages in the cache.  There are
-three functions that are used to do this.
-
-Note:
-
- (1) A page should not be re-read or re-allocated without uncaching it first.
-
- (2) A read or allocated page must be uncached when the netfs page is released
-     from the pagecache.
-
- (3) A page should only be written to the cache if previous read or allocated.
-
-This permits the cache to maintain its page tracking in proper order.
-
-
-PAGE READ
----------
-
-Firstly, the netfs should ask FS-Cache to examine the caches and read the
-contents cached for a particular page of a particular file if present, or else
-allocate space to store the contents if not:
-
-	typedef
-	void (*fscache_rw_complete_t)(struct page *page,
-				      void *context,
-				      int error);
-
-	int fscache_read_or_alloc_page(struct fscache_cookie *cookie,
-				       struct page *page,
-				       fscache_rw_complete_t end_io_func,
-				       void *context,
-				       gfp_t gfp);
-
-The cookie argument must specify a cookie for an object that isn't an index,
-the page specified will have the data loaded into it (and is also used to
-specify the page number), and the gfp argument is used to control how any
-memory allocations made are satisfied.
-
-If the cookie indicates the inode is not cached:
-
- (1) The function will return -ENOBUFS.
-
-Else if there's a copy of the page resident in the cache:
-
- (1) The mark_pages_cached() cookie operation will be called on that page.
-
- (2) The function will submit a request to read the data from the cache's
-     backing device directly into the page specified.
-
- (3) The function will return 0.
-
- (4) When the read is complete, end_io_func() will be invoked with:
-
-     (*) The netfs data supplied when the cookie was created.
-
-     (*) The page descriptor.
-
-     (*) The context argument passed to the above function.  This will be
-         maintained with the get_context/put_context functions mentioned above.
-
-     (*) An argument that's 0 on success or negative for an error code.
-
-     If an error occurs, it should be assumed that the page contains no usable
-     data.  fscache_readpages_cancel() may need to be called.
-
-     end_io_func() will be called in process context if the read is results in
-     an error, but it might be called in interrupt context if the read is
-     successful.
-
-Otherwise, if there's not a copy available in cache, but the cache may be able
-to store the page:
-
- (1) The mark_pages_cached() cookie operation will be called on that page.
-
- (2) A block may be reserved in the cache and attached to the object at the
-     appropriate place.
-
- (3) The function will return -ENODATA.
-
-This function may also return -ENOMEM or -EINTR, in which case it won't have
-read any data from the cache.
-
-
-PAGE ALLOCATE
--------------
-
-Alternatively, if there's not expected to be any data in the cache for a page
-because the file has been extended, a block can simply be allocated instead:
-
-	int fscache_alloc_page(struct fscache_cookie *cookie,
-			       struct page *page,
-			       gfp_t gfp);
-
-This is similar to the fscache_read_or_alloc_page() function, except that it
-never reads from the cache.  It will return 0 if a block has been allocated,
-rather than -ENODATA as the other would.  One or the other must be performed
-before writing to the cache.
-
-The mark_pages_cached() cookie operation will be called on the page if
-successful.
-
-
-PAGE WRITE
-----------
-
-Secondly, if the netfs changes the contents of the page (either due to an
-initial download or if a user performs a write), then the page should be
-written back to the cache:
-
-	int fscache_write_page(struct fscache_cookie *cookie,
-			       struct page *page,
-			       loff_t object_size,
-			       gfp_t gfp);
-
-The cookie argument must specify a data file cookie, the page specified should
-contain the data to be written (and is also used to specify the page number),
-object_size is the revised size of the object and the gfp argument is used to
-control how any memory allocations made are satisfied.
-
-The page must have first been read or allocated successfully and must not have
-been uncached before writing is performed.
-
-If the cookie indicates the inode is not cached then:
-
- (1) The function will return -ENOBUFS.
-
-Else if space can be allocated in the cache to hold this page:
-
- (1) PG_fscache_write will be set on the page.
-
- (2) The function will submit a request to write the data to cache's backing
-     device directly from the page specified.
-
- (3) The function will return 0.
-
- (4) When the write is complete PG_fscache_write is cleared on the page and
-     anyone waiting for that bit will be woken up.
-
-Else if there's no space available in the cache, -ENOBUFS will be returned.  It
-is also possible for the PG_fscache_write bit to be cleared when no write took
-place if unforeseen circumstances arose (such as a disk error).
-
-Writing takes place asynchronously.
-
-
-MULTIPLE PAGE READ
-------------------
-
-A facility is provided to read several pages at once, as requested by the
-readpages() address space operation:
-
-	int fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
-					struct address_space *mapping,
-					struct list_head *pages,
-					int *nr_pages,
-					fscache_rw_complete_t end_io_func,
-					void *context,
-					gfp_t gfp);
-
-This works in a similar way to fscache_read_or_alloc_page(), except:
-
- (1) Any page it can retrieve data for is removed from pages and nr_pages and
-     dispatched for reading to the disk.  Reads of adjacent pages on disk may
-     be merged for greater efficiency.
-
- (2) The mark_pages_cached() cookie operation will be called on several pages
-     at once if they're being read or allocated.
-
- (3) If there was an general error, then that error will be returned.
-
-     Else if some pages couldn't be allocated or read, then -ENOBUFS will be
-     returned.
-
-     Else if some pages couldn't be read but were allocated, then -ENODATA will
-     be returned.
-
-     Otherwise, if all pages had reads dispatched, then 0 will be returned, the
-     list will be empty and *nr_pages will be 0.
-
- (4) end_io_func will be called once for each page being read as the reads
-     complete.  It will be called in process context if error != 0, but it may
-     be called in interrupt context if there is no error.
-
-Note that a return of -ENODATA, -ENOBUFS or any other error does not preclude
-some of the pages being read and some being allocated.  Those pages will have
-been marked appropriately and will need uncaching.
-
-
-CANCELLATION OF UNREAD PAGES
-----------------------------
-
-If one or more pages are passed to fscache_read_or_alloc_pages() but not then
-read from the cache and also not read from the underlying filesystem then
-those pages will need to have any marks and reservations removed.  This can be
-done by calling:
-
-	void fscache_readpages_cancel(struct fscache_cookie *cookie,
-				      struct list_head *pages);
-
-prior to returning to the caller.  The cookie argument should be as passed to
-fscache_read_or_alloc_pages().  Every page in the pages list will be examined
-and any that have PG_fscache set will be uncached.
-
-
-==============
-PAGE UNCACHING
-==============
-
-To uncache a page, this function should be called:
-
-	void fscache_uncache_page(struct fscache_cookie *cookie,
-				  struct page *page);
-
-This function permits the cache to release any in-memory representation it
-might be holding for this netfs page.  This function must be called once for
-each page on which the read or write page functions above have been called to
-make sure the cache's in-memory tracking information gets torn down.
-
-Note that pages can't be explicitly deleted from the a data file.  The whole
-data file must be retired (see the relinquish cookie function below).
-
-Furthermore, note that this does not cancel the asynchronous read or write
-operation started by the read/alloc and write functions, so the page
-invalidation functions must use:
-
-	bool fscache_check_page_write(struct fscache_cookie *cookie,
-				      struct page *page);
-
-to see if a page is being written to the cache, and:
-
-	void fscache_wait_on_page_write(struct fscache_cookie *cookie,
-					struct page *page);
-
-to wait for it to finish if it is.
-
-
-When releasepage() is being implemented, a special FS-Cache function exists to
-manage the heuristics of coping with vmscan trying to eject pages, which may
-conflict with the cache trying to write pages to the cache (which may itself
-need to allocate memory):
-
-	bool fscache_maybe_release_page(struct fscache_cookie *cookie,
-					struct page *page,
-					gfp_t gfp);
-
-This takes the netfs cookie, and the page and gfp arguments as supplied to
-releasepage().  It will return false if the page cannot be released yet for
-some reason and if it returns true, the page has been uncached and can now be
-released.
-
-To make a page available for release, this function may wait for an outstanding
-storage request to complete, or it may attempt to cancel the storage request -
-in which case the page will not be stored in the cache this time.
-
-
-BULK INODE PAGE UNCACHE
------------------------
-
-A convenience routine is provided to perform an uncache on all the pages
-attached to an inode.  This assumes that the pages on the inode correspond on a
-1:1 basis with the pages in the cache.
-
-	void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
-					     struct inode *inode);
-
-This takes the netfs cookie that the pages were cached with and the inode that
-the pages are attached to.  This function will wait for pages to finish being
-written to the cache and for the cache to finish with the page generally.  No
-error is returned.
-
-
-===============================
-INDEX AND DATA FILE CONSISTENCY
-===============================
-
-To find out whether auxiliary data for an object is up to data within the
-cache, the following function can be called:
-
-	int fscache_check_consistency(struct fscache_cookie *cookie,
-				      const void *aux_data);
-
-This will call back to the netfs to check whether the auxiliary data associated
-with a cookie is correct; if aux_data is non-NULL, it will update the auxiliary
-data buffer first.  It returns 0 if it is and -ESTALE if it isn't; it may also
-return -ENOMEM and -ERESTARTSYS.
-
-To request an update of the index data for an index or other object, the
-following function should be called:
-
-	void fscache_update_cookie(struct fscache_cookie *cookie,
-				   const void *aux_data);
-
-This function will update the cookie's auxiliary data buffer from aux_data if
-that is non-NULL and then schedule this to be stored on disk.  The update
-method in the parent index definition will be called to transfer the data.
-
-Note that partial updates may happen automatically at other times, such as when
-data blocks are added to a data file object.
-
-
-=================
-COOKIE ENABLEMENT
-=================
-
-Cookies exist in one of two states: enabled and disabled.  If a cookie is
-disabled, it ignores all attempts to acquire child cookies; check, update or
-invalidate its state; allocate, read or write backing pages - though it is
-still possible to uncache pages and relinquish the cookie.
-
-The initial enablement state is set by fscache_acquire_cookie(), but the cookie
-can be enabled or disabled later.  To disable a cookie, call:
-
-	void fscache_disable_cookie(struct fscache_cookie *cookie,
-				    const void *aux_data,
-    				    bool invalidate);
-
-If the cookie is not already disabled, this locks the cookie against other
-enable and disable ops, marks the cookie as being disabled, discards or
-invalidates any backing objects and waits for cessation of activity on any
-associated object before unlocking the cookie.
-
-All possible failures are handled internally.  The caller should consider
-calling fscache_uncache_all_inode_pages() afterwards to make sure all page
-markings are cleared up.
-
-Cookies can be enabled or reenabled with:
-
-    	void fscache_enable_cookie(struct fscache_cookie *cookie,
-				   const void *aux_data,
-				   loff_t object_size,
-    				   bool (*can_enable)(void *data),
-    				   void *data)
-
-If the cookie is not already enabled, this locks the cookie against other
-enable and disable ops, invokes can_enable() and, if the cookie is not an index
-cookie, will begin the procedure of acquiring backing objects.
-
-The optional can_enable() function is passed the data argument and returns a
-ruling as to whether or not enablement should actually be permitted to begin.
-
-All possible failures are handled internally.  The cookie will only be marked
-as enabled if provisional backing objects are allocated.
-
-The object's data size is updated from object_size and is passed to the
-->check_aux() function.
-
-In both cases, the cookie's auxiliary data buffer is updated from aux_data if
-that is non-NULL inside the enablement lock before proceeding.
-
-
-===============================
-MISCELLANEOUS COOKIE OPERATIONS
-===============================
-
-There are a number of operations that can be used to control cookies:
-
- (*) Cookie pinning:
-
-	int fscache_pin_cookie(struct fscache_cookie *cookie);
-	void fscache_unpin_cookie(struct fscache_cookie *cookie);
-
-     These operations permit data cookies to be pinned into the cache and to
-     have the pinning removed.  They are not permitted on index cookies.
-
-     The pinning function will return 0 if successful, -ENOBUFS in the cookie
-     isn't backed by a cache, -EOPNOTSUPP if the cache doesn't support pinning,
-     -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or
-     -EIO if there's any other problem.
-
- (*) Data space reservation:
-
-	int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size);
-
-     This permits a netfs to request cache space be reserved to store up to the
-     given amount of a file.  It is permitted to ask for more than the current
-     size of the file to allow for future file expansion.
-
-     If size is given as zero then the reservation will be cancelled.
-
-     The function will return 0 if successful, -ENOBUFS in the cookie isn't
-     backed by a cache, -EOPNOTSUPP if the cache doesn't support reservations,
-     -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or
-     -EIO if there's any other problem.
-
-     Note that this doesn't pin an object in a cache; it can still be culled to
-     make space if it's not in use.
-
-
-=====================
-COOKIE UNREGISTRATION
-=====================
-
-To get rid of a cookie, this function should be called.
-
-	void fscache_relinquish_cookie(struct fscache_cookie *cookie,
-				       const void *aux_data,
-				       bool retire);
-
-If retire is non-zero, then the object will be marked for recycling, and all
-copies of it will be removed from all active caches in which it is present.
-Not only that but all child objects will also be retired.
-
-If retire is zero, then the object may be available again when next the
-acquisition function is called.  Retirement here will overrule the pinning on a
-cookie.
-
-The cookie's auxiliary data will be updated from aux_data if that is non-NULL
-so that the cache can lazily update it on disk.
-
-One very important note - relinquish must NOT be called for a cookie unless all
-the cookies for "child" indices, objects and pages have been relinquished
-first.
-
-
-==================
-INDEX INVALIDATION
-==================
-
-There is no direct way to invalidate an index subtree.  To do this, the caller
-should relinquish and retire the cookie they have, and then acquire a new one.
-
-
-======================
-DATA FILE INVALIDATION
-======================
-
-Sometimes it will be necessary to invalidate an object that contains data.
-Typically this will be necessary when the server tells the netfs of a foreign
-change - at which point the netfs has to throw away all the state it had for an
-inode and reload from the server.
-
-To indicate that a cache object should be invalidated, the following function
-can be called:
-
-	void fscache_invalidate(struct fscache_cookie *cookie);
-
-This can be called with spinlocks held as it defers the work to a thread pool.
-All extant storage, retrieval and attribute change ops at this point are
-cancelled and discarded.  Some future operations will be rejected until the
-cache has had a chance to insert a barrier in the operations queue.  After
-that, operations will be queued again behind the invalidation operation.
-
-The invalidation operation will perform an attribute change operation and an
-auxiliary data update operation as it is very likely these will have changed.
-
-Using the following function, the netfs can wait for the invalidation operation
-to have reached a point at which it can start submitting ordinary operations
-once again:
-
-	void fscache_wait_on_invalidate(struct fscache_cookie *cookie);
-
-
-===========================
-FS-CACHE SPECIFIC PAGE FLAG
-===========================
-
-FS-Cache makes use of a page flag, PG_private_2, for its own purpose.  This is
-given the alternative name PG_fscache.
-
-PG_fscache is used to indicate that the page is known by the cache, and that
-the cache must be informed if the page is going to go away.  It's an indication
-to the netfs that the cache has an interest in this page, where an interest may
-be a pointer to it, resources allocated or reserved for it, or I/O in progress
-upon it.
-
-The netfs can use this information in methods such as releasepage() to
-determine whether it needs to uncache a page or update it.
-
-Furthermore, if this bit is set, releasepage() and invalidatepage() operations
-will be called on a page to get rid of it, even if PG_private is not set.  This
-allows caching to attempted on a page before read_cache_pages() to be called
-after fscache_read_or_alloc_pages() as the former will try and release pages it
-was given under certain circumstances.
-
-This bit does not overlap with such as PG_private.  This means that FS-Cache
-can be used with a filesystem that uses the block buffering code.
-
-There are a number of operations defined on this flag:
-
-	int PageFsCache(struct page *page);
-	void SetPageFsCache(struct page *page)
-	void ClearPageFsCache(struct page *page)
-	int TestSetPageFsCache(struct page *page)
-	int TestClearPageFsCache(struct page *page)
-
-These functions are bit test, bit set, bit clear, bit test and set and bit
-test and clear operations on PG_fscache.
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 0ce39658a620..751bc5b1cddf 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -4,7 +4,7 @@
  * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
- * See Documentation/filesystems/caching/netfs-api.txt for more information on
+ * See Documentation/filesystems/caching/netfs-api.rst for more information on
  * the netfs API.
  */
 
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index ad044c0cb1f3..a1c928fe98e7 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -6,7 +6,7 @@
  *
  * NOTE!!! See:
  *
- *	Documentation/filesystems/caching/netfs-api.txt
+ *	Documentation/filesystems/caching/netfs-api.rst
  *
  * for a description of the network filesystem interface declared here.
  */
@@ -233,7 +233,7 @@ extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_
  *
  * Register a filesystem as desiring caching services if they're available.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -253,7 +253,7 @@ int fscache_register_netfs(struct fscache_netfs *netfs)
  * Indicate that a filesystem no longer desires caching services for the
  * moment.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -270,7 +270,7 @@ void fscache_unregister_netfs(struct fscache_netfs *netfs)
  * Acquire a specific cache referral tag that can be used to select a specific
  * cache in which to cache an index.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -288,7 +288,7 @@ struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name)
  *
  * Release a reference to a cache referral tag previously looked up.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -315,7 +315,7 @@ void fscache_release_cache_tag(struct fscache_cache_tag *tag)
  * that can be used to locate files.  This is done by requesting a cookie for
  * each index in the path to the file.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -351,7 +351,7 @@ struct fscache_cookie *fscache_acquire_cookie(
  * provided to update the auxiliary data in the cache before the object is
  * disconnected.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -394,7 +394,7 @@ int fscache_check_consistency(struct fscache_cookie *cookie,
  * cookie.  The auxiliary data on the cookie will be updated first if @aux_data
  * is set.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -410,7 +410,7 @@ void fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data)
  *
  * Permit data-storage cache objects to be pinned in the cache.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -425,7 +425,7 @@ int fscache_pin_cookie(struct fscache_cookie *cookie)
  *
  * Permit data-storage cache objects to be unpinned from the cache.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -441,7 +441,7 @@ void fscache_unpin_cookie(struct fscache_cookie *cookie)
  * changed.  This includes the data size.  These attributes will be obtained
  * through the get_attr() cookie definition op.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -463,7 +463,7 @@ int fscache_attr_changed(struct fscache_cookie *cookie)
  *
  * This can be called with spinlocks held.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -479,7 +479,7 @@ void fscache_invalidate(struct fscache_cookie *cookie)
  *
  * Wait for the invalidation of an object to complete.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -498,7 +498,7 @@ void fscache_wait_on_invalidate(struct fscache_cookie *cookie)
  * cookie so that a write to that object within the space can always be
  * honoured.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -533,7 +533,7 @@ int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size)
  * Else, if the page is unbacked, -ENODATA is returned and a block may have
  * been allocated in the cache.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -582,7 +582,7 @@ int fscache_read_or_alloc_page(struct fscache_cookie *cookie,
  * regard to different pages, the return values are prioritised in that order.
  * Any pages submitted for reading are removed from the pages list.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -617,7 +617,7 @@ int fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
  * Else, a block will be allocated if one wasn't already, and 0 will be
  * returned
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -667,7 +667,7 @@ void fscache_readpages_cancel(struct fscache_cookie *cookie,
  * be cleared at the completion of the write to indicate the success or failure
  * of the operation.  Note that the completion may happen before the return.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -693,7 +693,7 @@ int fscache_write_page(struct fscache_cookie *cookie,
  * Note that this cannot cancel any outstanding I/O operations between this
  * page and the cache.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -711,7 +711,7 @@ void fscache_uncache_page(struct fscache_cookie *cookie,
  *
  * Ask the cache if a page is being written to the cache.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
@@ -731,7 +731,7 @@ bool fscache_check_page_write(struct fscache_cookie *cookie,
  * Ask the cache to wake us up when a page is no longer being written to the
  * cache.
  *
- * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * See Documentation/filesystems/caching/netfs-api.rst for a complete
  * description.
  */
 static inline
-- 
cgit v1.2.3


From 09eac7c53570038d1069d711001b478f285c30cf Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:16:56 +0200
Subject: docs: filesystems: caching/operations.txt: convert it to ReST

- Add a SPDX header;
- Adjust document and section titles;
- Comment out text ToC for html/pdf output;
- Mark literal blocks as such;
- Add it to filesystems/caching/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/97e71cc598a4f61df484ebda3ec06b63530ceb62.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/caching/index.rst      |   1 +
 Documentation/filesystems/caching/operations.rst | 210 ++++++++++++++++++++++
 Documentation/filesystems/caching/operations.txt | 213 -----------------------
 fs/fscache/operation.c                           |   2 +-
 4 files changed, 212 insertions(+), 214 deletions(-)
 create mode 100644 Documentation/filesystems/caching/operations.rst
 delete mode 100644 Documentation/filesystems/caching/operations.txt

diff --git a/Documentation/filesystems/caching/index.rst b/Documentation/filesystems/caching/index.rst
index d0651db450fb..75492b7c8ea0 100644
--- a/Documentation/filesystems/caching/index.rst
+++ b/Documentation/filesystems/caching/index.rst
@@ -9,3 +9,4 @@ Filesystem Caching
    fscache
    object
    netfs-api
+   operations
diff --git a/Documentation/filesystems/caching/operations.rst b/Documentation/filesystems/caching/operations.rst
new file mode 100644
index 000000000000..f7ddcc028939
--- /dev/null
+++ b/Documentation/filesystems/caching/operations.rst
@@ -0,0 +1,210 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
+Asynchronous Operations Handling
+================================
+
+By: David Howells <dhowells@redhat.com>
+
+.. Contents:
+
+ (*) Overview.
+
+ (*) Operation record initialisation.
+
+ (*) Parameters.
+
+ (*) Procedure.
+
+ (*) Asynchronous callback.
+
+
+Overview
+========
+
+FS-Cache has an asynchronous operations handling facility that it uses for its
+data storage and retrieval routines.  Its operations are represented by
+fscache_operation structs, though these are usually embedded into some other
+structure.
+
+This facility is available to and expected to be be used by the cache backends,
+and FS-Cache will create operations and pass them off to the appropriate cache
+backend for completion.
+
+To make use of this facility, <linux/fscache-cache.h> should be #included.
+
+
+Operation Record Initialisation
+===============================
+
+An operation is recorded in an fscache_operation struct::
+
+	struct fscache_operation {
+		union {
+			struct work_struct fast_work;
+			struct slow_work slow_work;
+		};
+		unsigned long		flags;
+		fscache_operation_processor_t processor;
+		...
+	};
+
+Someone wanting to issue an operation should allocate something with this
+struct embedded in it.  They should initialise it by calling::
+
+	void fscache_operation_init(struct fscache_operation *op,
+				    fscache_operation_release_t release);
+
+with the operation to be initialised and the release function to use.
+
+The op->flags parameter should be set to indicate the CPU time provision and
+the exclusivity (see the Parameters section).
+
+The op->fast_work, op->slow_work and op->processor flags should be set as
+appropriate for the CPU time provision (see the Parameters section).
+
+FSCACHE_OP_WAITING may be set in op->flags prior to each submission of the
+operation and waited for afterwards.
+
+
+Parameters
+==========
+
+There are a number of parameters that can be set in the operation record's flag
+parameter.  There are three options for the provision of CPU time in these
+operations:
+
+ (1) The operation may be done synchronously (FSCACHE_OP_MYTHREAD).  A thread
+     may decide it wants to handle an operation itself without deferring it to
+     another thread.
+
+     This is, for example, used in read operations for calling readpages() on
+     the backing filesystem in CacheFiles.  Although readpages() does an
+     asynchronous data fetch, the determination of whether pages exist is done
+     synchronously - and the netfs does not proceed until this has been
+     determined.
+
+     If this option is to be used, FSCACHE_OP_WAITING must be set in op->flags
+     before submitting the operation, and the operating thread must wait for it
+     to be cleared before proceeding::
+
+		wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
+			    TASK_UNINTERRUPTIBLE);
+
+
+ (2) The operation may be fast asynchronous (FSCACHE_OP_FAST), in which case it
+     will be given to keventd to process.  Such an operation is not permitted
+     to sleep on I/O.
+
+     This is, for example, used by CacheFiles to copy data from a backing fs
+     page to a netfs page after the backing fs has read the page in.
+
+     If this option is used, op->fast_work and op->processor must be
+     initialised before submitting the operation::
+
+		INIT_WORK(&op->fast_work, do_some_work);
+
+
+ (3) The operation may be slow asynchronous (FSCACHE_OP_SLOW), in which case it
+     will be given to the slow work facility to process.  Such an operation is
+     permitted to sleep on I/O.
+
+     This is, for example, used by FS-Cache to handle background writes of
+     pages that have just been fetched from a remote server.
+
+     If this option is used, op->slow_work and op->processor must be
+     initialised before submitting the operation::
+
+		fscache_operation_init_slow(op, processor)
+
+
+Furthermore, operations may be one of two types:
+
+ (1) Exclusive (FSCACHE_OP_EXCLUSIVE).  Operations of this type may not run in
+     conjunction with any other operation on the object being operated upon.
+
+     An example of this is the attribute change operation, in which the file
+     being written to may need truncation.
+
+ (2) Shareable.  Operations of this type may be running simultaneously.  It's
+     up to the operation implementation to prevent interference between other
+     operations running at the same time.
+
+
+Procedure
+=========
+
+Operations are used through the following procedure:
+
+ (1) The submitting thread must allocate the operation and initialise it
+     itself.  Normally this would be part of a more specific structure with the
+     generic op embedded within.
+
+ (2) The submitting thread must then submit the operation for processing using
+     one of the following two functions::
+
+	int fscache_submit_op(struct fscache_object *object,
+			      struct fscache_operation *op);
+
+	int fscache_submit_exclusive_op(struct fscache_object *object,
+					struct fscache_operation *op);
+
+     The first function should be used to submit non-exclusive ops and the
+     second to submit exclusive ones.  The caller must still set the
+     FSCACHE_OP_EXCLUSIVE flag.
+
+     If successful, both functions will assign the operation to the specified
+     object and return 0.  -ENOBUFS will be returned if the object specified is
+     permanently unavailable.
+
+     The operation manager will defer operations on an object that is still
+     undergoing lookup or creation.  The operation will also be deferred if an
+     operation of conflicting exclusivity is in progress on the object.
+
+     If the operation is asynchronous, the manager will retain a reference to
+     it, so the caller should put their reference to it by passing it to::
+
+	void fscache_put_operation(struct fscache_operation *op);
+
+ (3) If the submitting thread wants to do the work itself, and has marked the
+     operation with FSCACHE_OP_MYTHREAD, then it should monitor
+     FSCACHE_OP_WAITING as described above and check the state of the object if
+     necessary (the object might have died while the thread was waiting).
+
+     When it has finished doing its processing, it should call
+     fscache_op_complete() and fscache_put_operation() on it.
+
+ (4) The operation holds an effective lock upon the object, preventing other
+     exclusive ops conflicting until it is released.  The operation can be
+     enqueued for further immediate asynchronous processing by adjusting the
+     CPU time provisioning option if necessary, eg::
+
+	op->flags &= ~FSCACHE_OP_TYPE;
+	op->flags |= ~FSCACHE_OP_FAST;
+
+     and calling::
+
+	void fscache_enqueue_operation(struct fscache_operation *op)
+
+     This can be used to allow other things to have use of the worker thread
+     pools.
+
+
+Asynchronous Callback
+=====================
+
+When used in asynchronous mode, the worker thread pool will invoke the
+processor method with a pointer to the operation.  This should then get at the
+container struct by using container_of()::
+
+	static void fscache_write_op(struct fscache_operation *_op)
+	{
+		struct fscache_storage *op =
+			container_of(_op, struct fscache_storage, op);
+	...
+	}
+
+The caller holds a reference on the operation, and will invoke
+fscache_put_operation() when the processor function returns.  The processor
+function is at liberty to call fscache_enqueue_operation() or to take extra
+references.
diff --git a/Documentation/filesystems/caching/operations.txt b/Documentation/filesystems/caching/operations.txt
deleted file mode 100644
index d8976c434718..000000000000
--- a/Documentation/filesystems/caching/operations.txt
+++ /dev/null
@@ -1,213 +0,0 @@
-		       ================================
-		       ASYNCHRONOUS OPERATIONS HANDLING
-		       ================================
-
-By: David Howells <dhowells@redhat.com>
-
-Contents:
-
- (*) Overview.
-
- (*) Operation record initialisation.
-
- (*) Parameters.
-
- (*) Procedure.
-
- (*) Asynchronous callback.
-
-
-========
-OVERVIEW
-========
-
-FS-Cache has an asynchronous operations handling facility that it uses for its
-data storage and retrieval routines.  Its operations are represented by
-fscache_operation structs, though these are usually embedded into some other
-structure.
-
-This facility is available to and expected to be be used by the cache backends,
-and FS-Cache will create operations and pass them off to the appropriate cache
-backend for completion.
-
-To make use of this facility, <linux/fscache-cache.h> should be #included.
-
-
-===============================
-OPERATION RECORD INITIALISATION
-===============================
-
-An operation is recorded in an fscache_operation struct:
-
-	struct fscache_operation {
-		union {
-			struct work_struct fast_work;
-			struct slow_work slow_work;
-		};
-		unsigned long		flags;
-		fscache_operation_processor_t processor;
-		...
-	};
-
-Someone wanting to issue an operation should allocate something with this
-struct embedded in it.  They should initialise it by calling:
-
-	void fscache_operation_init(struct fscache_operation *op,
-				    fscache_operation_release_t release);
-
-with the operation to be initialised and the release function to use.
-
-The op->flags parameter should be set to indicate the CPU time provision and
-the exclusivity (see the Parameters section).
-
-The op->fast_work, op->slow_work and op->processor flags should be set as
-appropriate for the CPU time provision (see the Parameters section).
-
-FSCACHE_OP_WAITING may be set in op->flags prior to each submission of the
-operation and waited for afterwards.
-
-
-==========
-PARAMETERS
-==========
-
-There are a number of parameters that can be set in the operation record's flag
-parameter.  There are three options for the provision of CPU time in these
-operations:
-
- (1) The operation may be done synchronously (FSCACHE_OP_MYTHREAD).  A thread
-     may decide it wants to handle an operation itself without deferring it to
-     another thread.
-
-     This is, for example, used in read operations for calling readpages() on
-     the backing filesystem in CacheFiles.  Although readpages() does an
-     asynchronous data fetch, the determination of whether pages exist is done
-     synchronously - and the netfs does not proceed until this has been
-     determined.
-
-     If this option is to be used, FSCACHE_OP_WAITING must be set in op->flags
-     before submitting the operation, and the operating thread must wait for it
-     to be cleared before proceeding:
-
-		wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
-			    TASK_UNINTERRUPTIBLE);
-
-
- (2) The operation may be fast asynchronous (FSCACHE_OP_FAST), in which case it
-     will be given to keventd to process.  Such an operation is not permitted
-     to sleep on I/O.
-
-     This is, for example, used by CacheFiles to copy data from a backing fs
-     page to a netfs page after the backing fs has read the page in.
-
-     If this option is used, op->fast_work and op->processor must be
-     initialised before submitting the operation:
-
-		INIT_WORK(&op->fast_work, do_some_work);
-
-
- (3) The operation may be slow asynchronous (FSCACHE_OP_SLOW), in which case it
-     will be given to the slow work facility to process.  Such an operation is
-     permitted to sleep on I/O.
-
-     This is, for example, used by FS-Cache to handle background writes of
-     pages that have just been fetched from a remote server.
-
-     If this option is used, op->slow_work and op->processor must be
-     initialised before submitting the operation:
-
-		fscache_operation_init_slow(op, processor)
-
-
-Furthermore, operations may be one of two types:
-
- (1) Exclusive (FSCACHE_OP_EXCLUSIVE).  Operations of this type may not run in
-     conjunction with any other operation on the object being operated upon.
-
-     An example of this is the attribute change operation, in which the file
-     being written to may need truncation.
-
- (2) Shareable.  Operations of this type may be running simultaneously.  It's
-     up to the operation implementation to prevent interference between other
-     operations running at the same time.
-
-
-=========
-PROCEDURE
-=========
-
-Operations are used through the following procedure:
-
- (1) The submitting thread must allocate the operation and initialise it
-     itself.  Normally this would be part of a more specific structure with the
-     generic op embedded within.
-
- (2) The submitting thread must then submit the operation for processing using
-     one of the following two functions:
-
-	int fscache_submit_op(struct fscache_object *object,
-			      struct fscache_operation *op);
-
-	int fscache_submit_exclusive_op(struct fscache_object *object,
-					struct fscache_operation *op);
-
-     The first function should be used to submit non-exclusive ops and the
-     second to submit exclusive ones.  The caller must still set the
-     FSCACHE_OP_EXCLUSIVE flag.
-
-     If successful, both functions will assign the operation to the specified
-     object and return 0.  -ENOBUFS will be returned if the object specified is
-     permanently unavailable.
-
-     The operation manager will defer operations on an object that is still
-     undergoing lookup or creation.  The operation will also be deferred if an
-     operation of conflicting exclusivity is in progress on the object.
-
-     If the operation is asynchronous, the manager will retain a reference to
-     it, so the caller should put their reference to it by passing it to:
-
-	void fscache_put_operation(struct fscache_operation *op);
-
- (3) If the submitting thread wants to do the work itself, and has marked the
-     operation with FSCACHE_OP_MYTHREAD, then it should monitor
-     FSCACHE_OP_WAITING as described above and check the state of the object if
-     necessary (the object might have died while the thread was waiting).
-
-     When it has finished doing its processing, it should call
-     fscache_op_complete() and fscache_put_operation() on it.
-
- (4) The operation holds an effective lock upon the object, preventing other
-     exclusive ops conflicting until it is released.  The operation can be
-     enqueued for further immediate asynchronous processing by adjusting the
-     CPU time provisioning option if necessary, eg:
-
-	op->flags &= ~FSCACHE_OP_TYPE;
-	op->flags |= ~FSCACHE_OP_FAST;
-
-     and calling:
-
-	void fscache_enqueue_operation(struct fscache_operation *op)
-
-     This can be used to allow other things to have use of the worker thread
-     pools.
-
-
-=====================
-ASYNCHRONOUS CALLBACK
-=====================
-
-When used in asynchronous mode, the worker thread pool will invoke the
-processor method with a pointer to the operation.  This should then get at the
-container struct by using container_of():
-
-	static void fscache_write_op(struct fscache_operation *_op)
-	{
-		struct fscache_storage *op =
-			container_of(_op, struct fscache_storage, op);
-	...
-	}
-
-The caller holds a reference on the operation, and will invoke
-fscache_put_operation() when the processor function returns.  The processor
-function is at liberty to call fscache_enqueue_operation() or to take extra
-references.
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 1a22a55f75a0..4a5651d4904e 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -4,7 +4,7 @@
  * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
- * See Documentation/filesystems/caching/operations.txt
+ * See Documentation/filesystems/caching/operations.rst
  */
 
 #define FSCACHE_DEBUG_LEVEL OPERATION
-- 
cgit v1.2.3


From d74802ade7de6b6771ac87f7104f8b587bba37fa Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:16:57 +0200
Subject: docs: filesystems: caching/cachefiles.txt: convert to ReST

- Add a SPDX header;
- Adjust document title;
- Mark literal blocks as such;
- Add table markups;
- Comment out text ToC for html/pdf output;
- Add lists markups;
- Add it to filesystems/caching/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/eec0cfc268e8dca348f760224685100c9c2caba6.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/caching/cachefiles.rst | 484 ++++++++++++++++++++++
 Documentation/filesystems/caching/cachefiles.txt | 501 -----------------------
 Documentation/filesystems/caching/index.rst      |   1 +
 MAINTAINERS                                      |   2 +-
 fs/cachefiles/Kconfig                            |   4 +-
 5 files changed, 488 insertions(+), 504 deletions(-)
 create mode 100644 Documentation/filesystems/caching/cachefiles.rst
 delete mode 100644 Documentation/filesystems/caching/cachefiles.txt

diff --git a/Documentation/filesystems/caching/cachefiles.rst b/Documentation/filesystems/caching/cachefiles.rst
new file mode 100644
index 000000000000..65d3db476765
--- /dev/null
+++ b/Documentation/filesystems/caching/cachefiles.rst
@@ -0,0 +1,484 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================
+CacheFiles: CACHE ON ALREADY MOUNTED FILESYSTEM
+===============================================
+
+.. Contents:
+
+ (*) Overview.
+
+ (*) Requirements.
+
+ (*) Configuration.
+
+ (*) Starting the cache.
+
+ (*) Things to avoid.
+
+ (*) Cache culling.
+
+ (*) Cache structure.
+
+ (*) Security model and SELinux.
+
+ (*) A note on security.
+
+ (*) Statistical information.
+
+ (*) Debugging.
+
+
+
+Overview
+========
+
+CacheFiles is a caching backend that's meant to use as a cache a directory on
+an already mounted filesystem of a local type (such as Ext3).
+
+CacheFiles uses a userspace daemon to do some of the cache management - such as
+reaping stale nodes and culling.  This is called cachefilesd and lives in
+/sbin.
+
+The filesystem and data integrity of the cache are only as good as those of the
+filesystem providing the backing services.  Note that CacheFiles does not
+attempt to journal anything since the journalling interfaces of the various
+filesystems are very specific in nature.
+
+CacheFiles creates a misc character device - "/dev/cachefiles" - that is used
+to communication with the daemon.  Only one thing may have this open at once,
+and while it is open, a cache is at least partially in existence.  The daemon
+opens this and sends commands down it to control the cache.
+
+CacheFiles is currently limited to a single cache.
+
+CacheFiles attempts to maintain at least a certain percentage of free space on
+the filesystem, shrinking the cache by culling the objects it contains to make
+space if necessary - see the "Cache Culling" section.  This means it can be
+placed on the same medium as a live set of data, and will expand to make use of
+spare space and automatically contract when the set of data requires more
+space.
+
+
+
+Requirements
+============
+
+The use of CacheFiles and its daemon requires the following features to be
+available in the system and in the cache filesystem:
+
+	- dnotify.
+
+	- extended attributes (xattrs).
+
+	- openat() and friends.
+
+	- bmap() support on files in the filesystem (FIBMAP ioctl).
+
+	- The use of bmap() to detect a partial page at the end of the file.
+
+It is strongly recommended that the "dir_index" option is enabled on Ext3
+filesystems being used as a cache.
+
+
+Configuration
+=============
+
+The cache is configured by a script in /etc/cachefilesd.conf.  These commands
+set up cache ready for use.  The following script commands are available:
+
+ brun <N>%, bcull <N>%, bstop <N>%, frun <N>%, fcull <N>%, fstop <N>%
+	Configure the culling limits.  Optional.  See the section on culling
+	The defaults are 7% (run), 5% (cull) and 1% (stop) respectively.
+
+	The commands beginning with a 'b' are file space (block) limits, those
+	beginning with an 'f' are file count limits.
+
+ dir <path>
+	Specify the directory containing the root of the cache.  Mandatory.
+
+ tag <name>
+	Specify a tag to FS-Cache to use in distinguishing multiple caches.
+	Optional.  The default is "CacheFiles".
+
+ debug <mask>
+	Specify a numeric bitmask to control debugging in the kernel module.
+	Optional.  The default is zero (all off).  The following values can be
+	OR'd into the mask to collect various information:
+
+		==	=================================================
+		1	Turn on trace of function entry (_enter() macros)
+		2	Turn on trace of function exit (_leave() macros)
+		4	Turn on trace of internal debug points (_debug())
+		==	=================================================
+
+	This mask can also be set through sysfs, eg::
+
+		echo 5 >/sys/modules/cachefiles/parameters/debug
+
+
+Starting the Cache
+==================
+
+The cache is started by running the daemon.  The daemon opens the cache device,
+configures the cache and tells it to begin caching.  At that point the cache
+binds to fscache and the cache becomes live.
+
+The daemon is run as follows::
+
+	/sbin/cachefilesd [-d]* [-s] [-n] [-f <configfile>]
+
+The flags are:
+
+ ``-d``
+	Increase the debugging level.  This can be specified multiple times and
+	is cumulative with itself.
+
+ ``-s``
+	Send messages to stderr instead of syslog.
+
+ ``-n``
+	Don't daemonise and go into background.
+
+ ``-f <configfile>``
+	Use an alternative configuration file rather than the default one.
+
+
+Things to Avoid
+===============
+
+Do not mount other things within the cache as this will cause problems.  The
+kernel module contains its own very cut-down path walking facility that ignores
+mountpoints, but the daemon can't avoid them.
+
+Do not create, rename or unlink files and directories in the cache while the
+cache is active, as this may cause the state to become uncertain.
+
+Renaming files in the cache might make objects appear to be other objects (the
+filename is part of the lookup key).
+
+Do not change or remove the extended attributes attached to cache files by the
+cache as this will cause the cache state management to get confused.
+
+Do not create files or directories in the cache, lest the cache get confused or
+serve incorrect data.
+
+Do not chmod files in the cache.  The module creates things with minimal
+permissions to prevent random users being able to access them directly.
+
+
+Cache Culling
+=============
+
+The cache may need culling occasionally to make space.  This involves
+discarding objects from the cache that have been used less recently than
+anything else.  Culling is based on the access time of data objects.  Empty
+directories are culled if not in use.
+
+Cache culling is done on the basis of the percentage of blocks and the
+percentage of files available in the underlying filesystem.  There are six
+"limits":
+
+ brun, frun
+     If the amount of free space and the number of available files in the cache
+     rises above both these limits, then culling is turned off.
+
+ bcull, fcull
+     If the amount of available space or the number of available files in the
+     cache falls below either of these limits, then culling is started.
+
+ bstop, fstop
+     If the amount of available space or the number of available files in the
+     cache falls below either of these limits, then no further allocation of
+     disk space or files is permitted until culling has raised things above
+     these limits again.
+
+These must be configured thusly::
+
+	0 <= bstop < bcull < brun < 100
+	0 <= fstop < fcull < frun < 100
+
+Note that these are percentages of available space and available files, and do
+_not_ appear as 100 minus the percentage displayed by the "df" program.
+
+The userspace daemon scans the cache to build up a table of cullable objects.
+These are then culled in least recently used order.  A new scan of the cache is
+started as soon as space is made in the table.  Objects will be skipped if
+their atimes have changed or if the kernel module says it is still using them.
+
+
+Cache Structure
+===============
+
+The CacheFiles module will create two directories in the directory it was
+given:
+
+ * cache/
+ * graveyard/
+
+The active cache objects all reside in the first directory.  The CacheFiles
+kernel module moves any retired or culled objects that it can't simply unlink
+to the graveyard from which the daemon will actually delete them.
+
+The daemon uses dnotify to monitor the graveyard directory, and will delete
+anything that appears therein.
+
+
+The module represents index objects as directories with the filename "I..." or
+"J...".  Note that the "cache/" directory is itself a special index.
+
+Data objects are represented as files if they have no children, or directories
+if they do.  Their filenames all begin "D..." or "E...".  If represented as a
+directory, data objects will have a file in the directory called "data" that
+actually holds the data.
+
+Special objects are similar to data objects, except their filenames begin
+"S..." or "T...".
+
+
+If an object has children, then it will be represented as a directory.
+Immediately in the representative directory are a collection of directories
+named for hash values of the child object keys with an '@' prepended.  Into
+this directory, if possible, will be placed the representations of the child
+objects::
+
+	 /INDEX    /INDEX     /INDEX                            /DATA FILES
+	/=========/==========/=================================/================
+	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400
+	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...DB1ry
+	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...N22ry
+	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...FP1ry
+
+
+If the key is so long that it exceeds NAME_MAX with the decorations added on to
+it, then it will be cut into pieces, the first few of which will be used to
+make a nest of directories, and the last one of which will be the objects
+inside the last directory.  The names of the intermediate directories will have
+'+' prepended::
+
+	J1223/@23/+xy...z/+kl...m/Epqr
+
+
+Note that keys are raw data, and not only may they exceed NAME_MAX in size,
+they may also contain things like '/' and NUL characters, and so they may not
+be suitable for turning directly into a filename.
+
+To handle this, CacheFiles will use a suitably printable filename directly and
+"base-64" encode ones that aren't directly suitable.  The two versions of
+object filenames indicate the encoding:
+
+	===============	===============	===============
+	OBJECT TYPE	PRINTABLE	ENCODED
+	===============	===============	===============
+	Index		"I..."		"J..."
+	Data		"D..."		"E..."
+	Special		"S..."		"T..."
+	===============	===============	===============
+
+Intermediate directories are always "@" or "+" as appropriate.
+
+
+Each object in the cache has an extended attribute label that holds the object
+type ID (required to distinguish special objects) and the auxiliary data from
+the netfs.  The latter is used to detect stale objects in the cache and update
+or retire them.
+
+
+Note that CacheFiles will erase from the cache any file it doesn't recognise or
+any file of an incorrect type (such as a FIFO file or a device file).
+
+
+Security Model and SELinux
+==========================
+
+CacheFiles is implemented to deal properly with the LSM security features of
+the Linux kernel and the SELinux facility.
+
+One of the problems that CacheFiles faces is that it is generally acting on
+behalf of a process, and running in that process's context, and that includes a
+security context that is not appropriate for accessing the cache - either
+because the files in the cache are inaccessible to that process, or because if
+the process creates a file in the cache, that file may be inaccessible to other
+processes.
+
+The way CacheFiles works is to temporarily change the security context (fsuid,
+fsgid and actor security label) that the process acts as - without changing the
+security context of the process when it the target of an operation performed by
+some other process (so signalling and suchlike still work correctly).
+
+
+When the CacheFiles module is asked to bind to its cache, it:
+
+ (1) Finds the security label attached to the root cache directory and uses
+     that as the security label with which it will create files.  By default,
+     this is::
+
+	cachefiles_var_t
+
+ (2) Finds the security label of the process which issued the bind request
+     (presumed to be the cachefilesd daemon), which by default will be::
+
+	cachefilesd_t
+
+     and asks LSM to supply a security ID as which it should act given the
+     daemon's label.  By default, this will be::
+
+	cachefiles_kernel_t
+
+     SELinux transitions the daemon's security ID to the module's security ID
+     based on a rule of this form in the policy::
+
+	type_transition <daemon's-ID> kernel_t : process <module's-ID>;
+
+     For instance::
+
+	type_transition cachefilesd_t kernel_t : process cachefiles_kernel_t;
+
+
+The module's security ID gives it permission to create, move and remove files
+and directories in the cache, to find and access directories and files in the
+cache, to set and access extended attributes on cache objects, and to read and
+write files in the cache.
+
+The daemon's security ID gives it only a very restricted set of permissions: it
+may scan directories, stat files and erase files and directories.  It may
+not read or write files in the cache, and so it is precluded from accessing the
+data cached therein; nor is it permitted to create new files in the cache.
+
+
+There are policy source files available in:
+
+	http://people.redhat.com/~dhowells/fscache/cachefilesd-0.8.tar.bz2
+
+and later versions.  In that tarball, see the files::
+
+	cachefilesd.te
+	cachefilesd.fc
+	cachefilesd.if
+
+They are built and installed directly by the RPM.
+
+If a non-RPM based system is being used, then copy the above files to their own
+directory and run::
+
+	make -f /usr/share/selinux/devel/Makefile
+	semodule -i cachefilesd.pp
+
+You will need checkpolicy and selinux-policy-devel installed prior to the
+build.
+
+
+By default, the cache is located in /var/fscache, but if it is desirable that
+it should be elsewhere, than either the above policy files must be altered, or
+an auxiliary policy must be installed to label the alternate location of the
+cache.
+
+For instructions on how to add an auxiliary policy to enable the cache to be
+located elsewhere when SELinux is in enforcing mode, please see::
+
+	/usr/share/doc/cachefilesd-*/move-cache.txt
+
+When the cachefilesd rpm is installed; alternatively, the document can be found
+in the sources.
+
+
+A Note on Security
+==================
+
+CacheFiles makes use of the split security in the task_struct.  It allocates
+its own task_security structure, and redirects current->cred to point to it
+when it acts on behalf of another process, in that process's context.
+
+The reason it does this is that it calls vfs_mkdir() and suchlike rather than
+bypassing security and calling inode ops directly.  Therefore the VFS and LSM
+may deny the CacheFiles access to the cache data because under some
+circumstances the caching code is running in the security context of whatever
+process issued the original syscall on the netfs.
+
+Furthermore, should CacheFiles create a file or directory, the security
+parameters with that object is created (UID, GID, security label) would be
+derived from that process that issued the system call, thus potentially
+preventing other processes from accessing the cache - including CacheFiles's
+cache management daemon (cachefilesd).
+
+What is required is to temporarily override the security of the process that
+issued the system call.  We can't, however, just do an in-place change of the
+security data as that affects the process as an object, not just as a subject.
+This means it may lose signals or ptrace events for example, and affects what
+the process looks like in /proc.
+
+So CacheFiles makes use of a logical split in the security between the
+objective security (task->real_cred) and the subjective security (task->cred).
+The objective security holds the intrinsic security properties of a process and
+is never overridden.  This is what appears in /proc, and is what is used when a
+process is the target of an operation by some other process (SIGKILL for
+example).
+
+The subjective security holds the active security properties of a process, and
+may be overridden.  This is not seen externally, and is used whan a process
+acts upon another object, for example SIGKILLing another process or opening a
+file.
+
+LSM hooks exist that allow SELinux (or Smack or whatever) to reject a request
+for CacheFiles to run in a context of a specific security label, or to create
+files and directories with another security label.
+
+
+Statistical Information
+=======================
+
+If FS-Cache is compiled with the following option enabled::
+
+	CONFIG_CACHEFILES_HISTOGRAM=y
+
+then it will gather certain statistics and display them through a proc file.
+
+ /proc/fs/cachefiles/histogram
+
+     ::
+
+	cat /proc/fs/cachefiles/histogram
+	JIFS  SECS  LOOKUPS   MKDIRS    CREATES
+	===== ===== ========= ========= =========
+
+     This shows the breakdown of the number of times each amount of time
+     between 0 jiffies and HZ-1 jiffies a variety of tasks took to run.  The
+     columns are as follows:
+
+	=======		=======================================================
+	COLUMN		TIME MEASUREMENT
+	=======		=======================================================
+	LOOKUPS		Length of time to perform a lookup on the backing fs
+	MKDIRS		Length of time to perform a mkdir on the backing fs
+	CREATES		Length of time to perform a create on the backing fs
+	=======		=======================================================
+
+     Each row shows the number of events that took a particular range of times.
+     Each step is 1 jiffy in size.  The JIFS column indicates the particular
+     jiffy range covered, and the SECS field the equivalent number of seconds.
+
+
+Debugging
+=========
+
+If CONFIG_CACHEFILES_DEBUG is enabled, the CacheFiles facility can have runtime
+debugging enabled by adjusting the value in::
+
+	/sys/module/cachefiles/parameters/debug
+
+This is a bitmask of debugging streams to enable:
+
+	=======	=======	===============================	=======================
+	BIT	VALUE	STREAM				POINT
+	=======	=======	===============================	=======================
+	0	1	General				Function entry trace
+	1	2					Function exit trace
+	2	4					General
+	=======	=======	===============================	=======================
+
+The appropriate set of values should be OR'd together and the result written to
+the control file.  For example::
+
+	echo $((1|4|8)) >/sys/module/cachefiles/parameters/debug
+
+will turn on all function entry debugging.
diff --git a/Documentation/filesystems/caching/cachefiles.txt b/Documentation/filesystems/caching/cachefiles.txt
deleted file mode 100644
index 28aefcbb1442..000000000000
--- a/Documentation/filesystems/caching/cachefiles.txt
+++ /dev/null
@@ -1,501 +0,0 @@
-	       ===============================================
-	       CacheFiles: CACHE ON ALREADY MOUNTED FILESYSTEM
-	       ===============================================
-
-Contents:
-
- (*) Overview.
-
- (*) Requirements.
-
- (*) Configuration.
-
- (*) Starting the cache.
-
- (*) Things to avoid.
-
- (*) Cache culling.
-
- (*) Cache structure.
-
- (*) Security model and SELinux.
-
- (*) A note on security.
-
- (*) Statistical information.
-
- (*) Debugging.
-
-
-========
-OVERVIEW
-========
-
-CacheFiles is a caching backend that's meant to use as a cache a directory on
-an already mounted filesystem of a local type (such as Ext3).
-
-CacheFiles uses a userspace daemon to do some of the cache management - such as
-reaping stale nodes and culling.  This is called cachefilesd and lives in
-/sbin.
-
-The filesystem and data integrity of the cache are only as good as those of the
-filesystem providing the backing services.  Note that CacheFiles does not
-attempt to journal anything since the journalling interfaces of the various
-filesystems are very specific in nature.
-
-CacheFiles creates a misc character device - "/dev/cachefiles" - that is used
-to communication with the daemon.  Only one thing may have this open at once,
-and while it is open, a cache is at least partially in existence.  The daemon
-opens this and sends commands down it to control the cache.
-
-CacheFiles is currently limited to a single cache.
-
-CacheFiles attempts to maintain at least a certain percentage of free space on
-the filesystem, shrinking the cache by culling the objects it contains to make
-space if necessary - see the "Cache Culling" section.  This means it can be
-placed on the same medium as a live set of data, and will expand to make use of
-spare space and automatically contract when the set of data requires more
-space.
-
-
-============
-REQUIREMENTS
-============
-
-The use of CacheFiles and its daemon requires the following features to be
-available in the system and in the cache filesystem:
-
-	- dnotify.
-
-	- extended attributes (xattrs).
-
-	- openat() and friends.
-
-	- bmap() support on files in the filesystem (FIBMAP ioctl).
-
-	- The use of bmap() to detect a partial page at the end of the file.
-
-It is strongly recommended that the "dir_index" option is enabled on Ext3
-filesystems being used as a cache.
-
-
-=============
-CONFIGURATION
-=============
-
-The cache is configured by a script in /etc/cachefilesd.conf.  These commands
-set up cache ready for use.  The following script commands are available:
-
- (*) brun <N>%
- (*) bcull <N>%
- (*) bstop <N>%
- (*) frun <N>%
- (*) fcull <N>%
- (*) fstop <N>%
-
-	Configure the culling limits.  Optional.  See the section on culling
-	The defaults are 7% (run), 5% (cull) and 1% (stop) respectively.
-
-	The commands beginning with a 'b' are file space (block) limits, those
-	beginning with an 'f' are file count limits.
-
- (*) dir <path>
-
-	Specify the directory containing the root of the cache.  Mandatory.
-
- (*) tag <name>
-
-	Specify a tag to FS-Cache to use in distinguishing multiple caches.
-	Optional.  The default is "CacheFiles".
-
- (*) debug <mask>
-
-	Specify a numeric bitmask to control debugging in the kernel module.
-	Optional.  The default is zero (all off).  The following values can be
-	OR'd into the mask to collect various information:
-
-		1	Turn on trace of function entry (_enter() macros)
-		2	Turn on trace of function exit (_leave() macros)
-		4	Turn on trace of internal debug points (_debug())
-
-	This mask can also be set through sysfs, eg:
-
-		echo 5 >/sys/modules/cachefiles/parameters/debug
-
-
-==================
-STARTING THE CACHE
-==================
-
-The cache is started by running the daemon.  The daemon opens the cache device,
-configures the cache and tells it to begin caching.  At that point the cache
-binds to fscache and the cache becomes live.
-
-The daemon is run as follows:
-
-	/sbin/cachefilesd [-d]* [-s] [-n] [-f <configfile>]
-
-The flags are:
-
- (*) -d
-
-	Increase the debugging level.  This can be specified multiple times and
-	is cumulative with itself.
-
- (*) -s
-
-	Send messages to stderr instead of syslog.
-
- (*) -n
-
-	Don't daemonise and go into background.
-
- (*) -f <configfile>
-
-	Use an alternative configuration file rather than the default one.
-
-
-===============
-THINGS TO AVOID
-===============
-
-Do not mount other things within the cache as this will cause problems.  The
-kernel module contains its own very cut-down path walking facility that ignores
-mountpoints, but the daemon can't avoid them.
-
-Do not create, rename or unlink files and directories in the cache while the
-cache is active, as this may cause the state to become uncertain.
-
-Renaming files in the cache might make objects appear to be other objects (the
-filename is part of the lookup key).
-
-Do not change or remove the extended attributes attached to cache files by the
-cache as this will cause the cache state management to get confused.
-
-Do not create files or directories in the cache, lest the cache get confused or
-serve incorrect data.
-
-Do not chmod files in the cache.  The module creates things with minimal
-permissions to prevent random users being able to access them directly.
-
-
-=============
-CACHE CULLING
-=============
-
-The cache may need culling occasionally to make space.  This involves
-discarding objects from the cache that have been used less recently than
-anything else.  Culling is based on the access time of data objects.  Empty
-directories are culled if not in use.
-
-Cache culling is done on the basis of the percentage of blocks and the
-percentage of files available in the underlying filesystem.  There are six
-"limits":
-
- (*) brun
- (*) frun
-
-     If the amount of free space and the number of available files in the cache
-     rises above both these limits, then culling is turned off.
-
- (*) bcull
- (*) fcull
-
-     If the amount of available space or the number of available files in the
-     cache falls below either of these limits, then culling is started.
-
- (*) bstop
- (*) fstop
-
-     If the amount of available space or the number of available files in the
-     cache falls below either of these limits, then no further allocation of
-     disk space or files is permitted until culling has raised things above
-     these limits again.
-
-These must be configured thusly:
-
-	0 <= bstop < bcull < brun < 100
-	0 <= fstop < fcull < frun < 100
-
-Note that these are percentages of available space and available files, and do
-_not_ appear as 100 minus the percentage displayed by the "df" program.
-
-The userspace daemon scans the cache to build up a table of cullable objects.
-These are then culled in least recently used order.  A new scan of the cache is
-started as soon as space is made in the table.  Objects will be skipped if
-their atimes have changed or if the kernel module says it is still using them.
-
-
-===============
-CACHE STRUCTURE
-===============
-
-The CacheFiles module will create two directories in the directory it was
-given:
-
- (*) cache/
-
- (*) graveyard/
-
-The active cache objects all reside in the first directory.  The CacheFiles
-kernel module moves any retired or culled objects that it can't simply unlink
-to the graveyard from which the daemon will actually delete them.
-
-The daemon uses dnotify to monitor the graveyard directory, and will delete
-anything that appears therein.
-
-
-The module represents index objects as directories with the filename "I..." or
-"J...".  Note that the "cache/" directory is itself a special index.
-
-Data objects are represented as files if they have no children, or directories
-if they do.  Their filenames all begin "D..." or "E...".  If represented as a
-directory, data objects will have a file in the directory called "data" that
-actually holds the data.
-
-Special objects are similar to data objects, except their filenames begin
-"S..." or "T...".
-
-
-If an object has children, then it will be represented as a directory.
-Immediately in the representative directory are a collection of directories
-named for hash values of the child object keys with an '@' prepended.  Into
-this directory, if possible, will be placed the representations of the child
-objects:
-
-	INDEX     INDEX      INDEX                             DATA FILES
-	========= ========== ================================= ================
-	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400
-	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...DB1ry
-	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...N22ry
-	cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...FP1ry
-
-
-If the key is so long that it exceeds NAME_MAX with the decorations added on to
-it, then it will be cut into pieces, the first few of which will be used to
-make a nest of directories, and the last one of which will be the objects
-inside the last directory.  The names of the intermediate directories will have
-'+' prepended:
-
-	J1223/@23/+xy...z/+kl...m/Epqr
-
-
-Note that keys are raw data, and not only may they exceed NAME_MAX in size,
-they may also contain things like '/' and NUL characters, and so they may not
-be suitable for turning directly into a filename.
-
-To handle this, CacheFiles will use a suitably printable filename directly and
-"base-64" encode ones that aren't directly suitable.  The two versions of
-object filenames indicate the encoding:
-
-	OBJECT TYPE	PRINTABLE	ENCODED
-	===============	===============	===============
-	Index		"I..."		"J..."
-	Data		"D..."		"E..."
-	Special		"S..."		"T..."
-
-Intermediate directories are always "@" or "+" as appropriate.
-
-
-Each object in the cache has an extended attribute label that holds the object
-type ID (required to distinguish special objects) and the auxiliary data from
-the netfs.  The latter is used to detect stale objects in the cache and update
-or retire them.
-
-
-Note that CacheFiles will erase from the cache any file it doesn't recognise or
-any file of an incorrect type (such as a FIFO file or a device file).
-
-
-==========================
-SECURITY MODEL AND SELINUX
-==========================
-
-CacheFiles is implemented to deal properly with the LSM security features of
-the Linux kernel and the SELinux facility.
-
-One of the problems that CacheFiles faces is that it is generally acting on
-behalf of a process, and running in that process's context, and that includes a
-security context that is not appropriate for accessing the cache - either
-because the files in the cache are inaccessible to that process, or because if
-the process creates a file in the cache, that file may be inaccessible to other
-processes.
-
-The way CacheFiles works is to temporarily change the security context (fsuid,
-fsgid and actor security label) that the process acts as - without changing the
-security context of the process when it the target of an operation performed by
-some other process (so signalling and suchlike still work correctly).
-
-
-When the CacheFiles module is asked to bind to its cache, it:
-
- (1) Finds the security label attached to the root cache directory and uses
-     that as the security label with which it will create files.  By default,
-     this is:
-
-	cachefiles_var_t
-
- (2) Finds the security label of the process which issued the bind request
-     (presumed to be the cachefilesd daemon), which by default will be:
-
-	cachefilesd_t
-
-     and asks LSM to supply a security ID as which it should act given the
-     daemon's label.  By default, this will be:
-
-	cachefiles_kernel_t
-
-     SELinux transitions the daemon's security ID to the module's security ID
-     based on a rule of this form in the policy.
-
-	type_transition <daemon's-ID> kernel_t : process <module's-ID>;
-
-     For instance:
-
-	type_transition cachefilesd_t kernel_t : process cachefiles_kernel_t;
-
-
-The module's security ID gives it permission to create, move and remove files
-and directories in the cache, to find and access directories and files in the
-cache, to set and access extended attributes on cache objects, and to read and
-write files in the cache.
-
-The daemon's security ID gives it only a very restricted set of permissions: it
-may scan directories, stat files and erase files and directories.  It may
-not read or write files in the cache, and so it is precluded from accessing the
-data cached therein; nor is it permitted to create new files in the cache.
-
-
-There are policy source files available in:
-
-	http://people.redhat.com/~dhowells/fscache/cachefilesd-0.8.tar.bz2
-
-and later versions.  In that tarball, see the files:
-
-	cachefilesd.te
-	cachefilesd.fc
-	cachefilesd.if
-
-They are built and installed directly by the RPM.
-
-If a non-RPM based system is being used, then copy the above files to their own
-directory and run:
-
-	make -f /usr/share/selinux/devel/Makefile
-	semodule -i cachefilesd.pp
-
-You will need checkpolicy and selinux-policy-devel installed prior to the
-build.
-
-
-By default, the cache is located in /var/fscache, but if it is desirable that
-it should be elsewhere, than either the above policy files must be altered, or
-an auxiliary policy must be installed to label the alternate location of the
-cache.
-
-For instructions on how to add an auxiliary policy to enable the cache to be
-located elsewhere when SELinux is in enforcing mode, please see:
-
-	/usr/share/doc/cachefilesd-*/move-cache.txt
-
-When the cachefilesd rpm is installed; alternatively, the document can be found
-in the sources.
-
-
-==================
-A NOTE ON SECURITY
-==================
-
-CacheFiles makes use of the split security in the task_struct.  It allocates
-its own task_security structure, and redirects current->cred to point to it
-when it acts on behalf of another process, in that process's context.
-
-The reason it does this is that it calls vfs_mkdir() and suchlike rather than
-bypassing security and calling inode ops directly.  Therefore the VFS and LSM
-may deny the CacheFiles access to the cache data because under some
-circumstances the caching code is running in the security context of whatever
-process issued the original syscall on the netfs.
-
-Furthermore, should CacheFiles create a file or directory, the security
-parameters with that object is created (UID, GID, security label) would be
-derived from that process that issued the system call, thus potentially
-preventing other processes from accessing the cache - including CacheFiles's
-cache management daemon (cachefilesd).
-
-What is required is to temporarily override the security of the process that
-issued the system call.  We can't, however, just do an in-place change of the
-security data as that affects the process as an object, not just as a subject.
-This means it may lose signals or ptrace events for example, and affects what
-the process looks like in /proc.
-
-So CacheFiles makes use of a logical split in the security between the
-objective security (task->real_cred) and the subjective security (task->cred).
-The objective security holds the intrinsic security properties of a process and
-is never overridden.  This is what appears in /proc, and is what is used when a
-process is the target of an operation by some other process (SIGKILL for
-example).
-
-The subjective security holds the active security properties of a process, and
-may be overridden.  This is not seen externally, and is used whan a process
-acts upon another object, for example SIGKILLing another process or opening a
-file.
-
-LSM hooks exist that allow SELinux (or Smack or whatever) to reject a request
-for CacheFiles to run in a context of a specific security label, or to create
-files and directories with another security label.
-
-
-=======================
-STATISTICAL INFORMATION
-=======================
-
-If FS-Cache is compiled with the following option enabled:
-
-	CONFIG_CACHEFILES_HISTOGRAM=y
-
-then it will gather certain statistics and display them through a proc file.
-
- (*) /proc/fs/cachefiles/histogram
-
-	cat /proc/fs/cachefiles/histogram
-	JIFS  SECS  LOOKUPS   MKDIRS    CREATES
-	===== ===== ========= ========= =========
-
-     This shows the breakdown of the number of times each amount of time
-     between 0 jiffies and HZ-1 jiffies a variety of tasks took to run.  The
-     columns are as follows:
-
-	COLUMN		TIME MEASUREMENT
-	=======		=======================================================
-	LOOKUPS		Length of time to perform a lookup on the backing fs
-	MKDIRS		Length of time to perform a mkdir on the backing fs
-	CREATES		Length of time to perform a create on the backing fs
-
-     Each row shows the number of events that took a particular range of times.
-     Each step is 1 jiffy in size.  The JIFS column indicates the particular
-     jiffy range covered, and the SECS field the equivalent number of seconds.
-
-
-=========
-DEBUGGING
-=========
-
-If CONFIG_CACHEFILES_DEBUG is enabled, the CacheFiles facility can have runtime
-debugging enabled by adjusting the value in:
-
-	/sys/module/cachefiles/parameters/debug
-
-This is a bitmask of debugging streams to enable:
-
-	BIT	VALUE	STREAM				POINT
-	=======	=======	===============================	=======================
-	0	1	General				Function entry trace
-	1	2					Function exit trace
-	2	4					General
-
-The appropriate set of values should be OR'd together and the result written to
-the control file.  For example:
-
-	echo $((1|4|8)) >/sys/module/cachefiles/parameters/debug
-
-will turn on all function entry debugging.
diff --git a/Documentation/filesystems/caching/index.rst b/Documentation/filesystems/caching/index.rst
index 75492b7c8ea0..a2cf35f89e28 100644
--- a/Documentation/filesystems/caching/index.rst
+++ b/Documentation/filesystems/caching/index.rst
@@ -8,5 +8,6 @@ Filesystem Caching
 
    fscache
    object
+   cachefiles
    netfs-api
    operations
diff --git a/MAINTAINERS b/MAINTAINERS
index 82e4b0a0c921..a103117dec85 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3731,7 +3731,7 @@ CACHEFILES: FS-CACHE BACKEND FOR CACHING ON MOUNTED FILESYSTEMS
 M:	David Howells <dhowells@redhat.com>
 L:	linux-cachefs@redhat.com (moderated for non-subscribers)
 S:	Supported
-F:	Documentation/filesystems/caching/cachefiles.txt
+F:	Documentation/filesystems/caching/cachefiles.rst
 F:	fs/cachefiles/
 
 CADENCE MIPI-CSI2 BRIDGES
diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig
index ae559ed5b3b3..ff9ca55a9ae9 100644
--- a/fs/cachefiles/Kconfig
+++ b/fs/cachefiles/Kconfig
@@ -8,7 +8,7 @@ config CACHEFILES
 	  filesystems - primarily networking filesystems - thus allowing fast
 	  local disk to enhance the speed of slower devices.
 
-	  See Documentation/filesystems/caching/cachefiles.txt for more
+	  See Documentation/filesystems/caching/cachefiles.rst for more
 	  information.
 
 config CACHEFILES_DEBUG
@@ -36,5 +36,5 @@ config CACHEFILES_HISTOGRAM
 	  bouncing between CPUs.  On the other hand, the histogram may be
 	  useful for debugging purposes.  Saying 'N' here is recommended.
 
-	  See Documentation/filesystems/caching/cachefiles.txt for more
+	  See Documentation/filesystems/caching/cachefiles.rst for more
 	  information.
-- 
cgit v1.2.3


From 0e822145b564204cd5e9dd67a7fd37d4a7b8253b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:16:58 +0200
Subject: docs: filesystems: caching/backend-api.txt: convert it to ReST

- Add a SPDX header;
- Adjust document and section titles;
- Some whitespace fixes and new line breaks;
- Mark literal blocks as such;
- Add table markups;
- Add it to filesystems/caching/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/5d0a61abaa87bfe913b9e2f321e74ef7af0f3dfc.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/caching/backend-api.rst | 727 ++++++++++++++++++++++
 Documentation/filesystems/caching/backend-api.txt | 726 ---------------------
 Documentation/filesystems/caching/fscache.rst     |   2 +-
 Documentation/filesystems/caching/index.rst       |   1 +
 fs/fscache/cache.c                                |   8 +-
 fs/fscache/object.c                               |   2 +-
 include/linux/fscache-cache.h                     |   4 +-
 7 files changed, 736 insertions(+), 734 deletions(-)
 create mode 100644 Documentation/filesystems/caching/backend-api.rst
 delete mode 100644 Documentation/filesystems/caching/backend-api.txt

diff --git a/Documentation/filesystems/caching/backend-api.rst b/Documentation/filesystems/caching/backend-api.rst
new file mode 100644
index 000000000000..19fbf6b9aa36
--- /dev/null
+++ b/Documentation/filesystems/caching/backend-api.rst
@@ -0,0 +1,727 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+FS-Cache Cache backend API
+==========================
+
+The FS-Cache system provides an API by which actual caches can be supplied to
+FS-Cache for it to then serve out to network filesystems and other interested
+parties.
+
+This API is declared in <linux/fscache-cache.h>.
+
+
+Initialising and Registering a Cache
+====================================
+
+To start off, a cache definition must be initialised and registered for each
+cache the backend wants to make available.  For instance, CacheFS does this in
+the fill_super() operation on mounting.
+
+The cache definition (struct fscache_cache) should be initialised by calling::
+
+	void fscache_init_cache(struct fscache_cache *cache,
+				struct fscache_cache_ops *ops,
+				const char *idfmt,
+				...);
+
+Where:
+
+   * "cache" is a pointer to the cache definition;
+
+   * "ops" is a pointer to the table of operations that the backend supports on
+     this cache; and
+
+   * "idfmt" is a format and printf-style arguments for constructing a label
+     for the cache.
+
+
+The cache should then be registered with FS-Cache by passing a pointer to the
+previously initialised cache definition to::
+
+	int fscache_add_cache(struct fscache_cache *cache,
+			      struct fscache_object *fsdef,
+			      const char *tagname);
+
+Two extra arguments should also be supplied:
+
+   * "fsdef" which should point to the object representation for the FS-Cache
+     master index in this cache.  Netfs primary index entries will be created
+     here.  FS-Cache keeps the caller's reference to the index object if
+     successful and will release it upon withdrawal of the cache.
+
+   * "tagname" which, if given, should be a text string naming this cache.  If
+     this is NULL, the identifier will be used instead.  For CacheFS, the
+     identifier is set to name the underlying block device and the tag can be
+     supplied by mount.
+
+This function may return -ENOMEM if it ran out of memory or -EEXIST if the tag
+is already in use.  0 will be returned on success.
+
+
+Unregistering a Cache
+=====================
+
+A cache can be withdrawn from the system by calling this function with a
+pointer to the cache definition::
+
+	void fscache_withdraw_cache(struct fscache_cache *cache);
+
+In CacheFS's case, this is called by put_super().
+
+
+Security
+========
+
+The cache methods are executed one of two contexts:
+
+ (1) that of the userspace process that issued the netfs operation that caused
+     the cache method to be invoked, or
+
+ (2) that of one of the processes in the FS-Cache thread pool.
+
+In either case, this may not be an appropriate context in which to access the
+cache.
+
+The calling process's fsuid, fsgid and SELinux security identities may need to
+be masqueraded for the duration of the cache driver's access to the cache.
+This is left to the cache to handle; FS-Cache makes no effort in this regard.
+
+
+Control and Statistics Presentation
+===================================
+
+The cache may present data to the outside world through FS-Cache's interfaces
+in sysfs and procfs - the former for control and the latter for statistics.
+
+A sysfs directory called /sys/fs/fscache/<cachetag>/ is created if CONFIG_SYSFS
+is enabled.  This is accessible through the kobject struct fscache_cache::kobj
+and is for use by the cache as it sees fit.
+
+
+Relevant Data Structures
+========================
+
+   * Index/Data file FS-Cache representation cookie::
+
+	struct fscache_cookie {
+		struct fscache_object_def	*def;
+		struct fscache_netfs		*netfs;
+		void				*netfs_data;
+		...
+	};
+
+     The fields that might be of use to the backend describe the object
+     definition, the netfs definition and the netfs's data for this cookie.
+     The object definition contain functions supplied by the netfs for loading
+     and matching index entries; these are required to provide some of the
+     cache operations.
+
+
+   * In-cache object representation::
+
+	struct fscache_object {
+		int				debug_id;
+		enum {
+			FSCACHE_OBJECT_RECYCLING,
+			...
+		}				state;
+		spinlock_t			lock
+		struct fscache_cache		*cache;
+		struct fscache_cookie		*cookie;
+		...
+	};
+
+     Structures of this type should be allocated by the cache backend and
+     passed to FS-Cache when requested by the appropriate cache operation.  In
+     the case of CacheFS, they're embedded in CacheFS's internal object
+     structures.
+
+     The debug_id is a simple integer that can be used in debugging messages
+     that refer to a particular object.  In such a case it should be printed
+     using "OBJ%x" to be consistent with FS-Cache.
+
+     Each object contains a pointer to the cookie that represents the object it
+     is backing.  An object should retired when put_object() is called if it is
+     in state FSCACHE_OBJECT_RECYCLING.  The fscache_object struct should be
+     initialised by calling fscache_object_init(object).
+
+
+   * FS-Cache operation record::
+
+	struct fscache_operation {
+		atomic_t		usage;
+		struct fscache_object	*object;
+		unsigned long		flags;
+	#define FSCACHE_OP_EXCLUSIVE
+		void (*processor)(struct fscache_operation *op);
+		void (*release)(struct fscache_operation *op);
+		...
+	};
+
+     FS-Cache has a pool of threads that it uses to give CPU time to the
+     various asynchronous operations that need to be done as part of driving
+     the cache.  These are represented by the above structure.  The processor
+     method is called to give the op CPU time, and the release method to get
+     rid of it when its usage count reaches 0.
+
+     An operation can be made exclusive upon an object by setting the
+     appropriate flag before enqueuing it with fscache_enqueue_operation().  If
+     an operation needs more processing time, it should be enqueued again.
+
+
+   * FS-Cache retrieval operation record::
+
+	struct fscache_retrieval {
+		struct fscache_operation op;
+		struct address_space	*mapping;
+		struct list_head	*to_do;
+		...
+	};
+
+     A structure of this type is allocated by FS-Cache to record retrieval and
+     allocation requests made by the netfs.  This struct is then passed to the
+     backend to do the operation.  The backend may get extra refs to it by
+     calling fscache_get_retrieval() and refs may be discarded by calling
+     fscache_put_retrieval().
+
+     A retrieval operation can be used by the backend to do retrieval work.  To
+     do this, the retrieval->op.processor method pointer should be set
+     appropriately by the backend and fscache_enqueue_retrieval() called to
+     submit it to the thread pool.  CacheFiles, for example, uses this to queue
+     page examination when it detects PG_lock being cleared.
+
+     The to_do field is an empty list available for the cache backend to use as
+     it sees fit.
+
+
+   * FS-Cache storage operation record::
+
+	struct fscache_storage {
+		struct fscache_operation op;
+		pgoff_t			store_limit;
+		...
+	};
+
+     A structure of this type is allocated by FS-Cache to record outstanding
+     writes to be made.  FS-Cache itself enqueues this operation and invokes
+     the write_page() method on the object at appropriate times to effect
+     storage.
+
+
+Cache Operations
+================
+
+The cache backend provides FS-Cache with a table of operations that can be
+performed on the denizens of the cache.  These are held in a structure of type:
+
+	::
+
+	    struct fscache_cache_ops
+
+   * Name of cache provider [mandatory]::
+
+	const char *name
+
+     This isn't strictly an operation, but should be pointed at a string naming
+     the backend.
+
+
+   * Allocate a new object [mandatory]::
+
+	struct fscache_object *(*alloc_object)(struct fscache_cache *cache,
+					       struct fscache_cookie *cookie)
+
+     This method is used to allocate a cache object representation to back a
+     cookie in a particular cache.  fscache_object_init() should be called on
+     the object to initialise it prior to returning.
+
+     This function may also be used to parse the index key to be used for
+     multiple lookup calls to turn it into a more convenient form.  FS-Cache
+     will call the lookup_complete() method to allow the cache to release the
+     form once lookup is complete or aborted.
+
+
+   * Look up and create object [mandatory]::
+
+	void (*lookup_object)(struct fscache_object *object)
+
+     This method is used to look up an object, given that the object is already
+     allocated and attached to the cookie.  This should instantiate that object
+     in the cache if it can.
+
+     The method should call fscache_object_lookup_negative() as soon as
+     possible if it determines the object doesn't exist in the cache.  If the
+     object is found to exist and the netfs indicates that it is valid then
+     fscache_obtained_object() should be called once the object is in a
+     position to have data stored in it.  Similarly, fscache_obtained_object()
+     should also be called once a non-present object has been created.
+
+     If a lookup error occurs, fscache_object_lookup_error() should be called
+     to abort the lookup of that object.
+
+
+   * Release lookup data [mandatory]::
+
+	void (*lookup_complete)(struct fscache_object *object)
+
+     This method is called to ask the cache to release any resources it was
+     using to perform a lookup.
+
+
+   * Increment object refcount [mandatory]::
+
+	struct fscache_object *(*grab_object)(struct fscache_object *object)
+
+     This method is called to increment the reference count on an object.  It
+     may fail (for instance if the cache is being withdrawn) by returning NULL.
+     It should return the object pointer if successful.
+
+
+   * Lock/Unlock object [mandatory]::
+
+	void (*lock_object)(struct fscache_object *object)
+	void (*unlock_object)(struct fscache_object *object)
+
+     These methods are used to exclusively lock an object.  It must be possible
+     to schedule with the lock held, so a spinlock isn't sufficient.
+
+
+   * Pin/Unpin object [optional]::
+
+	int (*pin_object)(struct fscache_object *object)
+	void (*unpin_object)(struct fscache_object *object)
+
+     These methods are used to pin an object into the cache.  Once pinned an
+     object cannot be reclaimed to make space.  Return -ENOSPC if there's not
+     enough space in the cache to permit this.
+
+
+   * Check coherency state of an object [mandatory]::
+
+	int (*check_consistency)(struct fscache_object *object)
+
+     This method is called to have the cache check the saved auxiliary data of
+     the object against the netfs's idea of the state.  0 should be returned
+     if they're consistent and -ESTALE otherwise.  -ENOMEM and -ERESTARTSYS
+     may also be returned.
+
+   * Update object [mandatory]::
+
+	int (*update_object)(struct fscache_object *object)
+
+     This is called to update the index entry for the specified object.  The
+     new information should be in object->cookie->netfs_data.  This can be
+     obtained by calling object->cookie->def->get_aux()/get_attr().
+
+
+   * Invalidate data object [mandatory]::
+
+	int (*invalidate_object)(struct fscache_operation *op)
+
+     This is called to invalidate a data object (as pointed to by op->object).
+     All the data stored for this object should be discarded and an
+     attr_changed operation should be performed.  The caller will follow up
+     with an object update operation.
+
+     fscache_op_complete() must be called on op before returning.
+
+
+   * Discard object [mandatory]::
+
+	void (*drop_object)(struct fscache_object *object)
+
+     This method is called to indicate that an object has been unbound from its
+     cookie, and that the cache should release the object's resources and
+     retire it if it's in state FSCACHE_OBJECT_RECYCLING.
+
+     This method should not attempt to release any references held by the
+     caller.  The caller will invoke the put_object() method as appropriate.
+
+
+   * Release object reference [mandatory]::
+
+	void (*put_object)(struct fscache_object *object)
+
+     This method is used to discard a reference to an object.  The object may
+     be freed when all the references to it are released.
+
+
+   * Synchronise a cache [mandatory]::
+
+	void (*sync)(struct fscache_cache *cache)
+
+     This is called to ask the backend to synchronise a cache with its backing
+     device.
+
+
+   * Dissociate a cache [mandatory]::
+
+	void (*dissociate_pages)(struct fscache_cache *cache)
+
+     This is called to ask a cache to perform any page dissociations as part of
+     cache withdrawal.
+
+
+   * Notification that the attributes on a netfs file changed [mandatory]::
+
+	int (*attr_changed)(struct fscache_object *object);
+
+     This is called to indicate to the cache that certain attributes on a netfs
+     file have changed (for example the maximum size a file may reach).  The
+     cache can read these from the netfs by calling the cookie's get_attr()
+     method.
+
+     The cache may use the file size information to reserve space on the cache.
+     It should also call fscache_set_store_limit() to indicate to FS-Cache the
+     highest byte it's willing to store for an object.
+
+     This method may return -ve if an error occurred or the cache object cannot
+     be expanded.  In such a case, the object will be withdrawn from service.
+
+     This operation is run asynchronously from FS-Cache's thread pool, and
+     storage and retrieval operations from the netfs are excluded during the
+     execution of this operation.
+
+
+   * Reserve cache space for an object's data [optional]::
+
+	int (*reserve_space)(struct fscache_object *object, loff_t size);
+
+     This is called to request that cache space be reserved to hold the data
+     for an object and the metadata used to track it.  Zero size should be
+     taken as request to cancel a reservation.
+
+     This should return 0 if successful, -ENOSPC if there isn't enough space
+     available, or -ENOMEM or -EIO on other errors.
+
+     The reservation may exceed the current size of the object, thus permitting
+     future expansion.  If the amount of space consumed by an object would
+     exceed the reservation, it's permitted to refuse requests to allocate
+     pages, but not required.  An object may be pruned down to its reservation
+     size if larger than that already.
+
+
+   * Request page be read from cache [mandatory]::
+
+	int (*read_or_alloc_page)(struct fscache_retrieval *op,
+				  struct page *page,
+				  gfp_t gfp)
+
+     This is called to attempt to read a netfs page from the cache, or to
+     reserve a backing block if not.  FS-Cache will have done as much checking
+     as it can before calling, but most of the work belongs to the backend.
+
+     If there's no page in the cache, then -ENODATA should be returned if the
+     backend managed to reserve a backing block; -ENOBUFS or -ENOMEM if it
+     didn't.
+
+     If there is suitable data in the cache, then a read operation should be
+     queued and 0 returned.  When the read finishes, fscache_end_io() should be
+     called.
+
+     The fscache_mark_pages_cached() should be called for the page if any cache
+     metadata is retained.  This will indicate to the netfs that the page needs
+     explicit uncaching.  This operation takes a pagevec, thus allowing several
+     pages to be marked at once.
+
+     The retrieval record pointed to by op should be retained for each page
+     queued and released when I/O on the page has been formally ended.
+     fscache_get/put_retrieval() are available for this purpose.
+
+     The retrieval record may be used to get CPU time via the FS-Cache thread
+     pool.  If this is desired, the op->op.processor should be set to point to
+     the appropriate processing routine, and fscache_enqueue_retrieval() should
+     be called at an appropriate point to request CPU time.  For instance, the
+     retrieval routine could be enqueued upon the completion of a disk read.
+     The to_do field in the retrieval record is provided to aid in this.
+
+     If an I/O error occurs, fscache_io_error() should be called and -ENOBUFS
+     returned if possible or fscache_end_io() called with a suitable error
+     code.
+
+     fscache_put_retrieval() should be called after a page or pages are dealt
+     with.  This will complete the operation when all pages are dealt with.
+
+
+   * Request pages be read from cache [mandatory]::
+
+	int (*read_or_alloc_pages)(struct fscache_retrieval *op,
+				   struct list_head *pages,
+				   unsigned *nr_pages,
+				   gfp_t gfp)
+
+     This is like the read_or_alloc_page() method, except it is handed a list
+     of pages instead of one page.  Any pages on which a read operation is
+     started must be added to the page cache for the specified mapping and also
+     to the LRU.  Such pages must also be removed from the pages list and
+     ``*nr_pages`` decremented per page.
+
+     If there was an error such as -ENOMEM, then that should be returned; else
+     if one or more pages couldn't be read or allocated, then -ENOBUFS should
+     be returned; else if one or more pages couldn't be read, then -ENODATA
+     should be returned.  If all the pages are dispatched then 0 should be
+     returned.
+
+
+   * Request page be allocated in the cache [mandatory]::
+
+	int (*allocate_page)(struct fscache_retrieval *op,
+			     struct page *page,
+			     gfp_t gfp)
+
+     This is like the read_or_alloc_page() method, except that it shouldn't
+     read from the cache, even if there's data there that could be retrieved.
+     It should, however, set up any internal metadata required such that
+     the write_page() method can write to the cache.
+
+     If there's no backing block available, then -ENOBUFS should be returned
+     (or -ENOMEM if there were other problems).  If a block is successfully
+     allocated, then the netfs page should be marked and 0 returned.
+
+
+   * Request pages be allocated in the cache [mandatory]::
+
+	int (*allocate_pages)(struct fscache_retrieval *op,
+			      struct list_head *pages,
+			      unsigned *nr_pages,
+			      gfp_t gfp)
+
+     This is an multiple page version of the allocate_page() method.  pages and
+     nr_pages should be treated as for the read_or_alloc_pages() method.
+
+
+   * Request page be written to cache [mandatory]::
+
+	int (*write_page)(struct fscache_storage *op,
+			  struct page *page);
+
+     This is called to write from a page on which there was a previously
+     successful read_or_alloc_page() call or similar.  FS-Cache filters out
+     pages that don't have mappings.
+
+     This method is called asynchronously from the FS-Cache thread pool.  It is
+     not required to actually store anything, provided -ENODATA is then
+     returned to the next read of this page.
+
+     If an error occurred, then a negative error code should be returned,
+     otherwise zero should be returned.  FS-Cache will take appropriate action
+     in response to an error, such as withdrawing this object.
+
+     If this method returns success then FS-Cache will inform the netfs
+     appropriately.
+
+
+   * Discard retained per-page metadata [mandatory]::
+
+	void (*uncache_page)(struct fscache_object *object, struct page *page)
+
+     This is called when a netfs page is being evicted from the pagecache.  The
+     cache backend should tear down any internal representation or tracking it
+     maintains for this page.
+
+
+FS-Cache Utilities
+==================
+
+FS-Cache provides some utilities that a cache backend may make use of:
+
+   * Note occurrence of an I/O error in a cache::
+
+	void fscache_io_error(struct fscache_cache *cache)
+
+     This tells FS-Cache that an I/O error occurred in the cache.  After this
+     has been called, only resource dissociation operations (object and page
+     release) will be passed from the netfs to the cache backend for the
+     specified cache.
+
+     This does not actually withdraw the cache.  That must be done separately.
+
+
+   * Invoke the retrieval I/O completion function::
+
+	void fscache_end_io(struct fscache_retrieval *op, struct page *page,
+			    int error);
+
+     This is called to note the end of an attempt to retrieve a page.  The
+     error value should be 0 if successful and an error otherwise.
+
+
+   * Record that one or more pages being retrieved or allocated have been dealt
+     with::
+
+	void fscache_retrieval_complete(struct fscache_retrieval *op,
+					int n_pages);
+
+     This is called to record the fact that one or more pages have been dealt
+     with and are no longer the concern of this operation.  When the number of
+     pages remaining in the operation reaches 0, the operation will be
+     completed.
+
+
+   * Record operation completion::
+
+	void fscache_op_complete(struct fscache_operation *op);
+
+     This is called to record the completion of an operation.  This deducts
+     this operation from the parent object's run state, potentially permitting
+     one or more pending operations to start running.
+
+
+   * Set highest store limit::
+
+	void fscache_set_store_limit(struct fscache_object *object,
+				     loff_t i_size);
+
+     This sets the limit FS-Cache imposes on the highest byte it's willing to
+     try and store for a netfs.  Any page over this limit is automatically
+     rejected by fscache_read_alloc_page() and co with -ENOBUFS.
+
+
+   * Mark pages as being cached::
+
+	void fscache_mark_pages_cached(struct fscache_retrieval *op,
+				       struct pagevec *pagevec);
+
+     This marks a set of pages as being cached.  After this has been called,
+     the netfs must call fscache_uncache_page() to unmark the pages.
+
+
+   * Perform coherency check on an object::
+
+	enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
+						const void *data,
+						uint16_t datalen);
+
+     This asks the netfs to perform a coherency check on an object that has
+     just been looked up.  The cookie attached to the object will determine the
+     netfs to use.  data and datalen should specify where the auxiliary data
+     retrieved from the cache can be found.
+
+     One of three values will be returned:
+
+	FSCACHE_CHECKAUX_OKAY
+	    The coherency data indicates the object is valid as is.
+
+	FSCACHE_CHECKAUX_NEEDS_UPDATE
+	    The coherency data needs updating, but otherwise the object is
+	    valid.
+
+	FSCACHE_CHECKAUX_OBSOLETE
+	    The coherency data indicates that the object is obsolete and should
+	    be discarded.
+
+
+   * Initialise a freshly allocated object::
+
+	void fscache_object_init(struct fscache_object *object);
+
+     This initialises all the fields in an object representation.
+
+
+   * Indicate the destruction of an object::
+
+	void fscache_object_destroyed(struct fscache_cache *cache);
+
+     This must be called to inform FS-Cache that an object that belonged to a
+     cache has been destroyed and deallocated.  This will allow continuation
+     of the cache withdrawal process when it is stopped pending destruction of
+     all the objects.
+
+
+   * Indicate negative lookup on an object::
+
+	void fscache_object_lookup_negative(struct fscache_object *object);
+
+     This is called to indicate to FS-Cache that a lookup process for an object
+     found a negative result.
+
+     This changes the state of an object to permit reads pending on lookup
+     completion to go off and start fetching data from the netfs server as it's
+     known at this point that there can't be any data in the cache.
+
+     This may be called multiple times on an object.  Only the first call is
+     significant - all subsequent calls are ignored.
+
+
+   * Indicate an object has been obtained::
+
+	void fscache_obtained_object(struct fscache_object *object);
+
+     This is called to indicate to FS-Cache that a lookup process for an object
+     produced a positive result, or that an object was created.  This should
+     only be called once for any particular object.
+
+     This changes the state of an object to indicate:
+
+	(1) if no call to fscache_object_lookup_negative() has been made on
+	    this object, that there may be data available, and that reads can
+	    now go and look for it; and
+
+        (2) that writes may now proceed against this object.
+
+
+   * Indicate that object lookup failed::
+
+	void fscache_object_lookup_error(struct fscache_object *object);
+
+     This marks an object as having encountered a fatal error (usually EIO)
+     and causes it to move into a state whereby it will be withdrawn as soon
+     as possible.
+
+
+   * Indicate that a stale object was found and discarded::
+
+	void fscache_object_retrying_stale(struct fscache_object *object);
+
+     This is called to indicate that the lookup procedure found an object in
+     the cache that the netfs decided was stale.  The object has been
+     discarded from the cache and the lookup will be performed again.
+
+
+   * Indicate that the caching backend killed an object::
+
+	void fscache_object_mark_killed(struct fscache_object *object,
+					enum fscache_why_object_killed why);
+
+     This is called to indicate that the cache backend preemptively killed an
+     object.  The why parameter should be set to indicate the reason:
+
+	FSCACHE_OBJECT_IS_STALE
+	    - the object was stale and needs discarding.
+
+	FSCACHE_OBJECT_NO_SPACE
+	    - there was insufficient cache space
+
+	FSCACHE_OBJECT_WAS_RETIRED
+	    - the object was retired when relinquished.
+
+	FSCACHE_OBJECT_WAS_CULLED
+	    - the object was culled to make space.
+
+
+   * Get and release references on a retrieval record::
+
+	void fscache_get_retrieval(struct fscache_retrieval *op);
+	void fscache_put_retrieval(struct fscache_retrieval *op);
+
+     These two functions are used to retain a retrieval record while doing
+     asynchronous data retrieval and block allocation.
+
+
+   * Enqueue a retrieval record for processing::
+
+	void fscache_enqueue_retrieval(struct fscache_retrieval *op);
+
+     This enqueues a retrieval record for processing by the FS-Cache thread
+     pool.  One of the threads in the pool will invoke the retrieval record's
+     op->op.processor callback function.  This function may be called from
+     within the callback function.
+
+
+   * List of object state names::
+
+	const char *fscache_object_states[];
+
+     For debugging purposes, this may be used to turn the state that an object
+     is in into a text string for display purposes.
diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt
deleted file mode 100644
index c418280c915f..000000000000
--- a/Documentation/filesystems/caching/backend-api.txt
+++ /dev/null
@@ -1,726 +0,0 @@
-			  ==========================
-			  FS-CACHE CACHE BACKEND API
-			  ==========================
-
-The FS-Cache system provides an API by which actual caches can be supplied to
-FS-Cache for it to then serve out to network filesystems and other interested
-parties.
-
-This API is declared in <linux/fscache-cache.h>.
-
-
-====================================
-INITIALISING AND REGISTERING A CACHE
-====================================
-
-To start off, a cache definition must be initialised and registered for each
-cache the backend wants to make available.  For instance, CacheFS does this in
-the fill_super() operation on mounting.
-
-The cache definition (struct fscache_cache) should be initialised by calling:
-
-	void fscache_init_cache(struct fscache_cache *cache,
-				struct fscache_cache_ops *ops,
-				const char *idfmt,
-				...);
-
-Where:
-
- (*) "cache" is a pointer to the cache definition;
-
- (*) "ops" is a pointer to the table of operations that the backend supports on
-     this cache; and
-
- (*) "idfmt" is a format and printf-style arguments for constructing a label
-     for the cache.
-
-
-The cache should then be registered with FS-Cache by passing a pointer to the
-previously initialised cache definition to:
-
-	int fscache_add_cache(struct fscache_cache *cache,
-			      struct fscache_object *fsdef,
-			      const char *tagname);
-
-Two extra arguments should also be supplied:
-
- (*) "fsdef" which should point to the object representation for the FS-Cache
-     master index in this cache.  Netfs primary index entries will be created
-     here.  FS-Cache keeps the caller's reference to the index object if
-     successful and will release it upon withdrawal of the cache.
-
- (*) "tagname" which, if given, should be a text string naming this cache.  If
-     this is NULL, the identifier will be used instead.  For CacheFS, the
-     identifier is set to name the underlying block device and the tag can be
-     supplied by mount.
-
-This function may return -ENOMEM if it ran out of memory or -EEXIST if the tag
-is already in use.  0 will be returned on success.
-
-
-=====================
-UNREGISTERING A CACHE
-=====================
-
-A cache can be withdrawn from the system by calling this function with a
-pointer to the cache definition:
-
-	void fscache_withdraw_cache(struct fscache_cache *cache);
-
-In CacheFS's case, this is called by put_super().
-
-
-========
-SECURITY
-========
-
-The cache methods are executed one of two contexts:
-
- (1) that of the userspace process that issued the netfs operation that caused
-     the cache method to be invoked, or
-
- (2) that of one of the processes in the FS-Cache thread pool.
-
-In either case, this may not be an appropriate context in which to access the
-cache.
-
-The calling process's fsuid, fsgid and SELinux security identities may need to
-be masqueraded for the duration of the cache driver's access to the cache.
-This is left to the cache to handle; FS-Cache makes no effort in this regard.
-
-
-===================================
-CONTROL AND STATISTICS PRESENTATION
-===================================
-
-The cache may present data to the outside world through FS-Cache's interfaces
-in sysfs and procfs - the former for control and the latter for statistics.
-
-A sysfs directory called /sys/fs/fscache/<cachetag>/ is created if CONFIG_SYSFS
-is enabled.  This is accessible through the kobject struct fscache_cache::kobj
-and is for use by the cache as it sees fit.
-
-
-========================
-RELEVANT DATA STRUCTURES
-========================
-
- (*) Index/Data file FS-Cache representation cookie:
-
-	struct fscache_cookie {
-		struct fscache_object_def	*def;
-		struct fscache_netfs		*netfs;
-		void				*netfs_data;
-		...
-	};
-
-     The fields that might be of use to the backend describe the object
-     definition, the netfs definition and the netfs's data for this cookie.
-     The object definition contain functions supplied by the netfs for loading
-     and matching index entries; these are required to provide some of the
-     cache operations.
-
-
- (*) In-cache object representation:
-
-	struct fscache_object {
-		int				debug_id;
-		enum {
-			FSCACHE_OBJECT_RECYCLING,
-			...
-		}				state;
-		spinlock_t			lock
-		struct fscache_cache		*cache;
-		struct fscache_cookie		*cookie;
-		...
-	};
-
-     Structures of this type should be allocated by the cache backend and
-     passed to FS-Cache when requested by the appropriate cache operation.  In
-     the case of CacheFS, they're embedded in CacheFS's internal object
-     structures.
-
-     The debug_id is a simple integer that can be used in debugging messages
-     that refer to a particular object.  In such a case it should be printed
-     using "OBJ%x" to be consistent with FS-Cache.
-
-     Each object contains a pointer to the cookie that represents the object it
-     is backing.  An object should retired when put_object() is called if it is
-     in state FSCACHE_OBJECT_RECYCLING.  The fscache_object struct should be
-     initialised by calling fscache_object_init(object).
-
-
- (*) FS-Cache operation record:
-
-	struct fscache_operation {
-		atomic_t		usage;
-		struct fscache_object	*object;
-		unsigned long		flags;
-	#define FSCACHE_OP_EXCLUSIVE
-		void (*processor)(struct fscache_operation *op);
-		void (*release)(struct fscache_operation *op);
-		...
-	};
-
-     FS-Cache has a pool of threads that it uses to give CPU time to the
-     various asynchronous operations that need to be done as part of driving
-     the cache.  These are represented by the above structure.  The processor
-     method is called to give the op CPU time, and the release method to get
-     rid of it when its usage count reaches 0.
-
-     An operation can be made exclusive upon an object by setting the
-     appropriate flag before enqueuing it with fscache_enqueue_operation().  If
-     an operation needs more processing time, it should be enqueued again.
-
-
- (*) FS-Cache retrieval operation record:
-
-	struct fscache_retrieval {
-		struct fscache_operation op;
-		struct address_space	*mapping;
-		struct list_head	*to_do;
-		...
-	};
-
-     A structure of this type is allocated by FS-Cache to record retrieval and
-     allocation requests made by the netfs.  This struct is then passed to the
-     backend to do the operation.  The backend may get extra refs to it by
-     calling fscache_get_retrieval() and refs may be discarded by calling
-     fscache_put_retrieval().
-
-     A retrieval operation can be used by the backend to do retrieval work.  To
-     do this, the retrieval->op.processor method pointer should be set
-     appropriately by the backend and fscache_enqueue_retrieval() called to
-     submit it to the thread pool.  CacheFiles, for example, uses this to queue
-     page examination when it detects PG_lock being cleared.
-
-     The to_do field is an empty list available for the cache backend to use as
-     it sees fit.
-
-
- (*) FS-Cache storage operation record:
-
-	struct fscache_storage {
-		struct fscache_operation op;
-		pgoff_t			store_limit;
-		...
-	};
-
-     A structure of this type is allocated by FS-Cache to record outstanding
-     writes to be made.  FS-Cache itself enqueues this operation and invokes
-     the write_page() method on the object at appropriate times to effect
-     storage.
-
-
-================
-CACHE OPERATIONS
-================
-
-The cache backend provides FS-Cache with a table of operations that can be
-performed on the denizens of the cache.  These are held in a structure of type:
-
-	struct fscache_cache_ops
-
- (*) Name of cache provider [mandatory]:
-
-	const char *name
-
-     This isn't strictly an operation, but should be pointed at a string naming
-     the backend.
-
-
- (*) Allocate a new object [mandatory]:
-
-	struct fscache_object *(*alloc_object)(struct fscache_cache *cache,
-					       struct fscache_cookie *cookie)
-
-     This method is used to allocate a cache object representation to back a
-     cookie in a particular cache.  fscache_object_init() should be called on
-     the object to initialise it prior to returning.
-
-     This function may also be used to parse the index key to be used for
-     multiple lookup calls to turn it into a more convenient form.  FS-Cache
-     will call the lookup_complete() method to allow the cache to release the
-     form once lookup is complete or aborted.
-
-
- (*) Look up and create object [mandatory]:
-
-	void (*lookup_object)(struct fscache_object *object)
-
-     This method is used to look up an object, given that the object is already
-     allocated and attached to the cookie.  This should instantiate that object
-     in the cache if it can.
-
-     The method should call fscache_object_lookup_negative() as soon as
-     possible if it determines the object doesn't exist in the cache.  If the
-     object is found to exist and the netfs indicates that it is valid then
-     fscache_obtained_object() should be called once the object is in a
-     position to have data stored in it.  Similarly, fscache_obtained_object()
-     should also be called once a non-present object has been created.
-
-     If a lookup error occurs, fscache_object_lookup_error() should be called
-     to abort the lookup of that object.
-
-
- (*) Release lookup data [mandatory]:
-
-	void (*lookup_complete)(struct fscache_object *object)
-
-     This method is called to ask the cache to release any resources it was
-     using to perform a lookup.
-
-
- (*) Increment object refcount [mandatory]:
-
-	struct fscache_object *(*grab_object)(struct fscache_object *object)
-
-     This method is called to increment the reference count on an object.  It
-     may fail (for instance if the cache is being withdrawn) by returning NULL.
-     It should return the object pointer if successful.
-
-
- (*) Lock/Unlock object [mandatory]:
-
-	void (*lock_object)(struct fscache_object *object)
-	void (*unlock_object)(struct fscache_object *object)
-
-     These methods are used to exclusively lock an object.  It must be possible
-     to schedule with the lock held, so a spinlock isn't sufficient.
-
-
- (*) Pin/Unpin object [optional]:
-
-	int (*pin_object)(struct fscache_object *object)
-	void (*unpin_object)(struct fscache_object *object)
-
-     These methods are used to pin an object into the cache.  Once pinned an
-     object cannot be reclaimed to make space.  Return -ENOSPC if there's not
-     enough space in the cache to permit this.
-
-
- (*) Check coherency state of an object [mandatory]:
-
-	int (*check_consistency)(struct fscache_object *object)
-
-     This method is called to have the cache check the saved auxiliary data of
-     the object against the netfs's idea of the state.  0 should be returned
-     if they're consistent and -ESTALE otherwise.  -ENOMEM and -ERESTARTSYS
-     may also be returned.
-
- (*) Update object [mandatory]:
-
-	int (*update_object)(struct fscache_object *object)
-
-     This is called to update the index entry for the specified object.  The
-     new information should be in object->cookie->netfs_data.  This can be
-     obtained by calling object->cookie->def->get_aux()/get_attr().
-
-
- (*) Invalidate data object [mandatory]:
-
-	int (*invalidate_object)(struct fscache_operation *op)
-
-     This is called to invalidate a data object (as pointed to by op->object).
-     All the data stored for this object should be discarded and an
-     attr_changed operation should be performed.  The caller will follow up
-     with an object update operation.
-
-     fscache_op_complete() must be called on op before returning.
-
-
- (*) Discard object [mandatory]:
-
-	void (*drop_object)(struct fscache_object *object)
-
-     This method is called to indicate that an object has been unbound from its
-     cookie, and that the cache should release the object's resources and
-     retire it if it's in state FSCACHE_OBJECT_RECYCLING.
-
-     This method should not attempt to release any references held by the
-     caller.  The caller will invoke the put_object() method as appropriate.
-
-
- (*) Release object reference [mandatory]:
-
-	void (*put_object)(struct fscache_object *object)
-
-     This method is used to discard a reference to an object.  The object may
-     be freed when all the references to it are released.
-
-
- (*) Synchronise a cache [mandatory]:
-
-	void (*sync)(struct fscache_cache *cache)
-
-     This is called to ask the backend to synchronise a cache with its backing
-     device.
-
-
- (*) Dissociate a cache [mandatory]:
-
-	void (*dissociate_pages)(struct fscache_cache *cache)
-
-     This is called to ask a cache to perform any page dissociations as part of
-     cache withdrawal.
-
-
- (*) Notification that the attributes on a netfs file changed [mandatory]:
-
-	int (*attr_changed)(struct fscache_object *object);
-
-     This is called to indicate to the cache that certain attributes on a netfs
-     file have changed (for example the maximum size a file may reach).  The
-     cache can read these from the netfs by calling the cookie's get_attr()
-     method.
-
-     The cache may use the file size information to reserve space on the cache.
-     It should also call fscache_set_store_limit() to indicate to FS-Cache the
-     highest byte it's willing to store for an object.
-
-     This method may return -ve if an error occurred or the cache object cannot
-     be expanded.  In such a case, the object will be withdrawn from service.
-
-     This operation is run asynchronously from FS-Cache's thread pool, and
-     storage and retrieval operations from the netfs are excluded during the
-     execution of this operation.
-
-
- (*) Reserve cache space for an object's data [optional]:
-
-	int (*reserve_space)(struct fscache_object *object, loff_t size);
-
-     This is called to request that cache space be reserved to hold the data
-     for an object and the metadata used to track it.  Zero size should be
-     taken as request to cancel a reservation.
-
-     This should return 0 if successful, -ENOSPC if there isn't enough space
-     available, or -ENOMEM or -EIO on other errors.
-
-     The reservation may exceed the current size of the object, thus permitting
-     future expansion.  If the amount of space consumed by an object would
-     exceed the reservation, it's permitted to refuse requests to allocate
-     pages, but not required.  An object may be pruned down to its reservation
-     size if larger than that already.
-
-
- (*) Request page be read from cache [mandatory]:
-
-	int (*read_or_alloc_page)(struct fscache_retrieval *op,
-				  struct page *page,
-				  gfp_t gfp)
-
-     This is called to attempt to read a netfs page from the cache, or to
-     reserve a backing block if not.  FS-Cache will have done as much checking
-     as it can before calling, but most of the work belongs to the backend.
-
-     If there's no page in the cache, then -ENODATA should be returned if the
-     backend managed to reserve a backing block; -ENOBUFS or -ENOMEM if it
-     didn't.
-
-     If there is suitable data in the cache, then a read operation should be
-     queued and 0 returned.  When the read finishes, fscache_end_io() should be
-     called.
-
-     The fscache_mark_pages_cached() should be called for the page if any cache
-     metadata is retained.  This will indicate to the netfs that the page needs
-     explicit uncaching.  This operation takes a pagevec, thus allowing several
-     pages to be marked at once.
-
-     The retrieval record pointed to by op should be retained for each page
-     queued and released when I/O on the page has been formally ended.
-     fscache_get/put_retrieval() are available for this purpose.
-
-     The retrieval record may be used to get CPU time via the FS-Cache thread
-     pool.  If this is desired, the op->op.processor should be set to point to
-     the appropriate processing routine, and fscache_enqueue_retrieval() should
-     be called at an appropriate point to request CPU time.  For instance, the
-     retrieval routine could be enqueued upon the completion of a disk read.
-     The to_do field in the retrieval record is provided to aid in this.
-
-     If an I/O error occurs, fscache_io_error() should be called and -ENOBUFS
-     returned if possible or fscache_end_io() called with a suitable error
-     code.
-
-     fscache_put_retrieval() should be called after a page or pages are dealt
-     with.  This will complete the operation when all pages are dealt with.
-
-
- (*) Request pages be read from cache [mandatory]:
-
-	int (*read_or_alloc_pages)(struct fscache_retrieval *op,
-				   struct list_head *pages,
-				   unsigned *nr_pages,
-				   gfp_t gfp)
-
-     This is like the read_or_alloc_page() method, except it is handed a list
-     of pages instead of one page.  Any pages on which a read operation is
-     started must be added to the page cache for the specified mapping and also
-     to the LRU.  Such pages must also be removed from the pages list and
-     *nr_pages decremented per page.
-
-     If there was an error such as -ENOMEM, then that should be returned; else
-     if one or more pages couldn't be read or allocated, then -ENOBUFS should
-     be returned; else if one or more pages couldn't be read, then -ENODATA
-     should be returned.  If all the pages are dispatched then 0 should be
-     returned.
-
-
- (*) Request page be allocated in the cache [mandatory]:
-
-	int (*allocate_page)(struct fscache_retrieval *op,
-			     struct page *page,
-			     gfp_t gfp)
-
-     This is like the read_or_alloc_page() method, except that it shouldn't
-     read from the cache, even if there's data there that could be retrieved.
-     It should, however, set up any internal metadata required such that
-     the write_page() method can write to the cache.
-
-     If there's no backing block available, then -ENOBUFS should be returned
-     (or -ENOMEM if there were other problems).  If a block is successfully
-     allocated, then the netfs page should be marked and 0 returned.
-
-
- (*) Request pages be allocated in the cache [mandatory]:
-
-	int (*allocate_pages)(struct fscache_retrieval *op,
-			      struct list_head *pages,
-			      unsigned *nr_pages,
-			      gfp_t gfp)
-
-     This is an multiple page version of the allocate_page() method.  pages and
-     nr_pages should be treated as for the read_or_alloc_pages() method.
-
-
- (*) Request page be written to cache [mandatory]:
-
-	int (*write_page)(struct fscache_storage *op,
-			  struct page *page);
-
-     This is called to write from a page on which there was a previously
-     successful read_or_alloc_page() call or similar.  FS-Cache filters out
-     pages that don't have mappings.
-
-     This method is called asynchronously from the FS-Cache thread pool.  It is
-     not required to actually store anything, provided -ENODATA is then
-     returned to the next read of this page.
-
-     If an error occurred, then a negative error code should be returned,
-     otherwise zero should be returned.  FS-Cache will take appropriate action
-     in response to an error, such as withdrawing this object.
-
-     If this method returns success then FS-Cache will inform the netfs
-     appropriately.
-
-
- (*) Discard retained per-page metadata [mandatory]:
-
-	void (*uncache_page)(struct fscache_object *object, struct page *page)
-
-     This is called when a netfs page is being evicted from the pagecache.  The
-     cache backend should tear down any internal representation or tracking it
-     maintains for this page.
-
-
-==================
-FS-CACHE UTILITIES
-==================
-
-FS-Cache provides some utilities that a cache backend may make use of:
-
- (*) Note occurrence of an I/O error in a cache:
-
-	void fscache_io_error(struct fscache_cache *cache)
-
-     This tells FS-Cache that an I/O error occurred in the cache.  After this
-     has been called, only resource dissociation operations (object and page
-     release) will be passed from the netfs to the cache backend for the
-     specified cache.
-
-     This does not actually withdraw the cache.  That must be done separately.
-
-
- (*) Invoke the retrieval I/O completion function:
-
-	void fscache_end_io(struct fscache_retrieval *op, struct page *page,
-			    int error);
-
-     This is called to note the end of an attempt to retrieve a page.  The
-     error value should be 0 if successful and an error otherwise.
-
-
- (*) Record that one or more pages being retrieved or allocated have been dealt
-     with:
-
-	void fscache_retrieval_complete(struct fscache_retrieval *op,
-					int n_pages);
-
-     This is called to record the fact that one or more pages have been dealt
-     with and are no longer the concern of this operation.  When the number of
-     pages remaining in the operation reaches 0, the operation will be
-     completed.
-
-
- (*) Record operation completion:
-
-	void fscache_op_complete(struct fscache_operation *op);
-
-     This is called to record the completion of an operation.  This deducts
-     this operation from the parent object's run state, potentially permitting
-     one or more pending operations to start running.
-
-
- (*) Set highest store limit:
-
-	void fscache_set_store_limit(struct fscache_object *object,
-				     loff_t i_size);
-
-     This sets the limit FS-Cache imposes on the highest byte it's willing to
-     try and store for a netfs.  Any page over this limit is automatically
-     rejected by fscache_read_alloc_page() and co with -ENOBUFS.
-
-
- (*) Mark pages as being cached:
-
-	void fscache_mark_pages_cached(struct fscache_retrieval *op,
-				       struct pagevec *pagevec);
-
-     This marks a set of pages as being cached.  After this has been called,
-     the netfs must call fscache_uncache_page() to unmark the pages.
-
-
- (*) Perform coherency check on an object:
-
-	enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
-						const void *data,
-						uint16_t datalen);
-
-     This asks the netfs to perform a coherency check on an object that has
-     just been looked up.  The cookie attached to the object will determine the
-     netfs to use.  data and datalen should specify where the auxiliary data
-     retrieved from the cache can be found.
-
-     One of three values will be returned:
-
-	(*) FSCACHE_CHECKAUX_OKAY
-
-	    The coherency data indicates the object is valid as is.
-
-	(*) FSCACHE_CHECKAUX_NEEDS_UPDATE
-
-	    The coherency data needs updating, but otherwise the object is
-	    valid.
-
-	(*) FSCACHE_CHECKAUX_OBSOLETE
-
-	    The coherency data indicates that the object is obsolete and should
-	    be discarded.
-
-
- (*) Initialise a freshly allocated object:
-
-	void fscache_object_init(struct fscache_object *object);
-
-     This initialises all the fields in an object representation.
-
-
- (*) Indicate the destruction of an object:
-
-	void fscache_object_destroyed(struct fscache_cache *cache);
-
-     This must be called to inform FS-Cache that an object that belonged to a
-     cache has been destroyed and deallocated.  This will allow continuation
-     of the cache withdrawal process when it is stopped pending destruction of
-     all the objects.
-
-
- (*) Indicate negative lookup on an object:
-
-	void fscache_object_lookup_negative(struct fscache_object *object);
-
-     This is called to indicate to FS-Cache that a lookup process for an object
-     found a negative result.
-
-     This changes the state of an object to permit reads pending on lookup
-     completion to go off and start fetching data from the netfs server as it's
-     known at this point that there can't be any data in the cache.
-
-     This may be called multiple times on an object.  Only the first call is
-     significant - all subsequent calls are ignored.
-
-
- (*) Indicate an object has been obtained:
-
-	void fscache_obtained_object(struct fscache_object *object);
-
-     This is called to indicate to FS-Cache that a lookup process for an object
-     produced a positive result, or that an object was created.  This should
-     only be called once for any particular object.
-
-     This changes the state of an object to indicate:
-
-	(1) if no call to fscache_object_lookup_negative() has been made on
-	    this object, that there may be data available, and that reads can
-	    now go and look for it; and
-
-        (2) that writes may now proceed against this object.
-
-
- (*) Indicate that object lookup failed:
-
-	void fscache_object_lookup_error(struct fscache_object *object);
-
-     This marks an object as having encountered a fatal error (usually EIO)
-     and causes it to move into a state whereby it will be withdrawn as soon
-     as possible.
-
-
- (*) Indicate that a stale object was found and discarded:
-
-	void fscache_object_retrying_stale(struct fscache_object *object);
-
-     This is called to indicate that the lookup procedure found an object in
-     the cache that the netfs decided was stale.  The object has been
-     discarded from the cache and the lookup will be performed again.
-
-
- (*) Indicate that the caching backend killed an object:
-
-	void fscache_object_mark_killed(struct fscache_object *object,
-					enum fscache_why_object_killed why);
-
-     This is called to indicate that the cache backend preemptively killed an
-     object.  The why parameter should be set to indicate the reason:
-
-	FSCACHE_OBJECT_IS_STALE - the object was stale and needs discarding.
-	FSCACHE_OBJECT_NO_SPACE - there was insufficient cache space
-	FSCACHE_OBJECT_WAS_RETIRED - the object was retired when relinquished.
-	FSCACHE_OBJECT_WAS_CULLED - the object was culled to make space.
-
-
- (*) Get and release references on a retrieval record:
-
-	void fscache_get_retrieval(struct fscache_retrieval *op);
-	void fscache_put_retrieval(struct fscache_retrieval *op);
-
-     These two functions are used to retain a retrieval record while doing
-     asynchronous data retrieval and block allocation.
-
-
- (*) Enqueue a retrieval record for processing.
-
-	void fscache_enqueue_retrieval(struct fscache_retrieval *op);
-
-     This enqueues a retrieval record for processing by the FS-Cache thread
-     pool.  One of the threads in the pool will invoke the retrieval record's
-     op->op.processor callback function.  This function may be called from
-     within the callback function.
-
-
- (*) List of object state names:
-
-	const char *fscache_object_states[];
-
-     For debugging purposes, this may be used to turn the state that an object
-     is in into a text string for display purposes.
diff --git a/Documentation/filesystems/caching/fscache.rst b/Documentation/filesystems/caching/fscache.rst
index dd1297d884d0..70de86922b6a 100644
--- a/Documentation/filesystems/caching/fscache.rst
+++ b/Documentation/filesystems/caching/fscache.rst
@@ -187,7 +187,7 @@ The netfs API to FS-Cache can be found in:
 
 The cache backend API to FS-Cache can be found in:
 
-	Documentation/filesystems/caching/backend-api.txt
+	Documentation/filesystems/caching/backend-api.rst
 
 A description of the internal representations and object state machine can be
 found in:
diff --git a/Documentation/filesystems/caching/index.rst b/Documentation/filesystems/caching/index.rst
index a2cf35f89e28..033da7ac7c6e 100644
--- a/Documentation/filesystems/caching/index.rst
+++ b/Documentation/filesystems/caching/index.rst
@@ -8,6 +8,7 @@ Filesystem Caching
 
    fscache
    object
+   backend-api
    cachefiles
    netfs-api
    operations
diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c
index f78793f3d21e..fcc136361415 100644
--- a/fs/fscache/cache.c
+++ b/fs/fscache/cache.c
@@ -172,7 +172,7 @@ no_preference:
  *
  * Initialise a record of a cache and fill in the name.
  *
- * See Documentation/filesystems/caching/backend-api.txt for a complete
+ * See Documentation/filesystems/caching/backend-api.rst for a complete
  * description.
  */
 void fscache_init_cache(struct fscache_cache *cache,
@@ -207,7 +207,7 @@ EXPORT_SYMBOL(fscache_init_cache);
  *
  * Add a cache to the system, making it available for netfs's to use.
  *
- * See Documentation/filesystems/caching/backend-api.txt for a complete
+ * See Documentation/filesystems/caching/backend-api.rst for a complete
  * description.
  */
 int fscache_add_cache(struct fscache_cache *cache,
@@ -307,7 +307,7 @@ EXPORT_SYMBOL(fscache_add_cache);
  * Note that an I/O error occurred in a cache and that it should no longer be
  * used for anything.  This also reports the error into the kernel log.
  *
- * See Documentation/filesystems/caching/backend-api.txt for a complete
+ * See Documentation/filesystems/caching/backend-api.rst for a complete
  * description.
  */
 void fscache_io_error(struct fscache_cache *cache)
@@ -355,7 +355,7 @@ static void fscache_withdraw_all_objects(struct fscache_cache *cache,
  * Withdraw a cache from service, unbinding all its cache objects from the
  * netfs cookies they're currently representing.
  *
- * See Documentation/filesystems/caching/backend-api.txt for a complete
+ * See Documentation/filesystems/caching/backend-api.rst for a complete
  * description.
  */
 void fscache_withdraw_cache(struct fscache_cache *cache)
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index efaa003b8323..cb2146e02cd5 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -295,7 +295,7 @@ static void fscache_object_work_func(struct work_struct *work)
  *
  * Initialise a cache object description to its basic values.
  *
- * See Documentation/filesystems/caching/backend-api.txt for a complete
+ * See Documentation/filesystems/caching/backend-api.rst for a complete
  * description.
  */
 void fscache_object_init(struct fscache_object *object,
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index d5ba431b5d63..ce0b5fbf239d 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -6,7 +6,7 @@
  *
  * NOTE!!! See:
  *
- *	Documentation/filesystems/caching/backend-api.txt
+ *	Documentation/filesystems/caching/backend-api.rst
  *
  * for a description of the cache backend interface declared here.
  */
@@ -454,7 +454,7 @@ static inline void fscache_object_lookup_error(struct fscache_object *object)
  * Set the maximum size an object is permitted to reach, implying the highest
  * byte that may be written.  Intended to be called by the attr_changed() op.
  *
- * See Documentation/filesystems/caching/backend-api.txt for a complete
+ * See Documentation/filesystems/caching/backend-api.rst for a complete
  * description.
  */
 static inline
-- 
cgit v1.2.3


From 175cc46f4d667a9073270a32f40212d871c18955 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:16:59 +0200
Subject: docs: filesystems: convert cifs/cifsroot.txt to ReST

- Add a SPDX header;
- Adjust document title;
- Some whitespace fixes and new line breaks;
- Mark literal blocks as such;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/28de01ee52283287e4195cf736d7154f122d30d4.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/cifs/cifsroot.rst | 105 ++++++++++++++++++++++++++++
 Documentation/filesystems/cifs/cifsroot.txt |  97 -------------------------
 Documentation/filesystems/index.rst         |   1 +
 3 files changed, 106 insertions(+), 97 deletions(-)
 create mode 100644 Documentation/filesystems/cifs/cifsroot.rst
 delete mode 100644 Documentation/filesystems/cifs/cifsroot.txt

diff --git a/Documentation/filesystems/cifs/cifsroot.rst b/Documentation/filesystems/cifs/cifsroot.rst
new file mode 100644
index 000000000000..4930bb443134
--- /dev/null
+++ b/Documentation/filesystems/cifs/cifsroot.rst
@@ -0,0 +1,105 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================================
+Mounting root file system via SMB (cifs.ko)
+===========================================
+
+Written 2019 by Paulo Alcantara <palcantara@suse.de>
+
+Written 2019 by Aurelien Aptel <aaptel@suse.com>
+
+The CONFIG_CIFS_ROOT option enables experimental root file system
+support over the SMB protocol via cifs.ko.
+
+It introduces a new kernel command-line option called 'cifsroot='
+which will tell the kernel to mount the root file system over the
+network by utilizing SMB or CIFS protocol.
+
+In order to mount, the network stack will also need to be set up by
+using 'ip=' config option. For more details, see
+Documentation/admin-guide/nfs/nfsroot.rst.
+
+A CIFS root mount currently requires the use of SMB1+UNIX Extensions
+which is only supported by the Samba server. SMB1 is the older
+deprecated version of the protocol but it has been extended to support
+POSIX features (See [1]). The equivalent extensions for the newer
+recommended version of the protocol (SMB3) have not been fully
+implemented yet which means SMB3 doesn't support some required POSIX
+file system objects (e.g. block devices, pipes, sockets).
+
+As a result, a CIFS root will default to SMB1 for now but the version
+to use can nonetheless be changed via the 'vers=' mount option.  This
+default will change once the SMB3 POSIX extensions are fully
+implemented.
+
+Server configuration
+====================
+
+To enable SMB1+UNIX extensions you will need to set these global
+settings in Samba smb.conf::
+
+    [global]
+    server min protocol = NT1
+    unix extension = yes        # default
+
+Kernel command line
+===================
+
+::
+
+    root=/dev/cifs
+
+This is just a virtual device that basically tells the kernel to mount
+the root file system via SMB protocol.
+
+::
+
+    cifsroot=//<server-ip>/<share>[,options]
+
+Enables the kernel to mount the root file system via SMB that are
+located in the <server-ip> and <share> specified in this option.
+
+The default mount options are set in fs/cifs/cifsroot.c.
+
+server-ip
+	IPv4 address of the server.
+
+share
+	Path to SMB share (rootfs).
+
+options
+	Optional mount options. For more information, see mount.cifs(8).
+
+Examples
+========
+
+Export root file system as a Samba share in smb.conf file::
+
+    ...
+    [linux]
+	    path = /path/to/rootfs
+	    read only = no
+	    guest ok = yes
+	    force user = root
+	    force group = root
+	    browseable = yes
+	    writeable = yes
+	    admin users = root
+	    public = yes
+	    create mask = 0777
+	    directory mask = 0777
+    ...
+
+Restart smb service::
+
+    # systemctl restart smb
+
+Test it under QEMU on a kernel built with CONFIG_CIFS_ROOT and
+CONFIG_IP_PNP options enabled::
+
+    # qemu-system-x86_64 -enable-kvm -cpu host -m 1024 \
+    -kernel /path/to/linux/arch/x86/boot/bzImage -nographic \
+    -append "root=/dev/cifs rw ip=dhcp cifsroot=//10.0.2.2/linux,username=foo,password=bar console=ttyS0 3"
+
+
+1: https://wiki.samba.org/index.php/UNIX_Extensions
diff --git a/Documentation/filesystems/cifs/cifsroot.txt b/Documentation/filesystems/cifs/cifsroot.txt
deleted file mode 100644
index 947b7ec6ce9e..000000000000
--- a/Documentation/filesystems/cifs/cifsroot.txt
+++ /dev/null
@@ -1,97 +0,0 @@
-Mounting root file system via SMB (cifs.ko)
-===========================================
-
-Written 2019 by Paulo Alcantara <palcantara@suse.de>
-Written 2019 by Aurelien Aptel <aaptel@suse.com>
-
-The CONFIG_CIFS_ROOT option enables experimental root file system
-support over the SMB protocol via cifs.ko.
-
-It introduces a new kernel command-line option called 'cifsroot='
-which will tell the kernel to mount the root file system over the
-network by utilizing SMB or CIFS protocol.
-
-In order to mount, the network stack will also need to be set up by
-using 'ip=' config option. For more details, see
-Documentation/admin-guide/nfs/nfsroot.rst.
-
-A CIFS root mount currently requires the use of SMB1+UNIX Extensions
-which is only supported by the Samba server. SMB1 is the older
-deprecated version of the protocol but it has been extended to support
-POSIX features (See [1]). The equivalent extensions for the newer
-recommended version of the protocol (SMB3) have not been fully
-implemented yet which means SMB3 doesn't support some required POSIX
-file system objects (e.g. block devices, pipes, sockets).
-
-As a result, a CIFS root will default to SMB1 for now but the version
-to use can nonetheless be changed via the 'vers=' mount option.  This
-default will change once the SMB3 POSIX extensions are fully
-implemented.
-
-Server configuration
-====================
-
-To enable SMB1+UNIX extensions you will need to set these global
-settings in Samba smb.conf:
-
-    [global]
-    server min protocol = NT1
-    unix extension = yes        # default
-
-Kernel command line
-===================
-
-root=/dev/cifs
-
-This is just a virtual device that basically tells the kernel to mount
-the root file system via SMB protocol.
-
-cifsroot=//<server-ip>/<share>[,options]
-
-Enables the kernel to mount the root file system via SMB that are
-located in the <server-ip> and <share> specified in this option.
-
-The default mount options are set in fs/cifs/cifsroot.c.
-
-server-ip
-	IPv4 address of the server.
-
-share
-	Path to SMB share (rootfs).
-
-options
-	Optional mount options. For more information, see mount.cifs(8).
-
-Examples
-========
-
-Export root file system as a Samba share in smb.conf file.
-
-...
-[linux]
-	path = /path/to/rootfs
-	read only = no
-	guest ok = yes
-	force user = root
-	force group = root
-	browseable = yes
-	writeable = yes
-	admin users = root
-	public = yes
-	create mask = 0777
-	directory mask = 0777
-...
-
-Restart smb service.
-
-# systemctl restart smb
-
-Test it under QEMU on a kernel built with CONFIG_CIFS_ROOT and
-CONFIG_IP_PNP options enabled.
-
-# qemu-system-x86_64 -enable-kvm -cpu host -m 1024 \
-  -kernel /path/to/linux/arch/x86/boot/bzImage -nographic \
-  -append "root=/dev/cifs rw ip=dhcp cifsroot=//10.0.2.2/linux,username=foo,password=bar console=ttyS0 3"
-
-
-1: https://wiki.samba.org/index.php/UNIX_Extensions
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 773af390c5e5..d993b33a48c4 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -59,6 +59,7 @@ Documentation for filesystem implementations.
    befs
    bfs
    btrfs
+   cifs/cifsroot
    ceph
    cramfs
    debugfs
-- 
cgit v1.2.3


From 41defb4d0d587ef3fb9b90f3c320b6c41ef68f16 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:00 +0200
Subject: docs: filesystems: convert automount-support.txt to ReST

- Add a SPDX header;
- Add a document title;
- Adjust section titles;
- Mark literal blocks as such;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/ba7e2f2bf9aa2c7096772f5e7e8e609cb5fce07c.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/automount-support.rst | 98 +++++++++++++++++++++++++
 Documentation/filesystems/automount-support.txt | 93 -----------------------
 Documentation/filesystems/index.rst             |  2 +
 3 files changed, 100 insertions(+), 93 deletions(-)
 create mode 100644 Documentation/filesystems/automount-support.rst
 delete mode 100644 Documentation/filesystems/automount-support.txt

diff --git a/Documentation/filesystems/automount-support.rst b/Documentation/filesystems/automount-support.rst
new file mode 100644
index 000000000000..430f0b40796b
--- /dev/null
+++ b/Documentation/filesystems/automount-support.rst
@@ -0,0 +1,98 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+Automount Support
+=================
+
+
+Support is available for filesystems that wish to do automounting
+support (such as kAFS which can be found in fs/afs/ and NFS in
+fs/nfs/). This facility includes allowing in-kernel mounts to be
+performed and mountpoint degradation to be requested. The latter can
+also be requested by userspace.
+
+
+In-Kernel Automounting
+======================
+
+See section "Mount Traps" of  Documentation/filesystems/autofs.rst
+
+Then from userspace, you can just do something like::
+
+	[root@andromeda root]# mount -t afs \#root.afs. /afs
+	[root@andromeda root]# ls /afs
+	asd  cambridge  cambridge.redhat.com  grand.central.org
+	[root@andromeda root]# ls /afs/cambridge
+	afsdoc
+	[root@andromeda root]# ls /afs/cambridge/afsdoc/
+	ChangeLog  html  LICENSE  pdf  RELNOTES-1.2.2
+
+And then if you look in the mountpoint catalogue, you'll see something like::
+
+	[root@andromeda root]# cat /proc/mounts
+	...
+	#root.afs. /afs afs rw 0 0
+	#root.cell. /afs/cambridge.redhat.com afs rw 0 0
+	#afsdoc. /afs/cambridge.redhat.com/afsdoc afs rw 0 0
+
+
+Automatic Mountpoint Expiry
+===========================
+
+Automatic expiration of mountpoints is easy, provided you've mounted the
+mountpoint to be expired in the automounting procedure outlined separately.
+
+To do expiration, you need to follow these steps:
+
+ (1) Create at least one list off which the vfsmounts to be expired can be
+     hung.
+
+ (2) When a new mountpoint is created in the ->d_automount method, add
+     the mnt to the list using mnt_set_expiry()::
+
+             mnt_set_expiry(newmnt, &afs_vfsmounts);
+
+ (3) When you want mountpoints to be expired, call mark_mounts_for_expiry()
+     with a pointer to this list. This will process the list, marking every
+     vfsmount thereon for potential expiry on the next call.
+
+     If a vfsmount was already flagged for expiry, and if its usage count is 1
+     (it's only referenced by its parent vfsmount), then it will be deleted
+     from the namespace and thrown away (effectively unmounted).
+
+     It may prove simplest to simply call this at regular intervals, using
+     some sort of timed event to drive it.
+
+The expiration flag is cleared by calls to mntput. This means that expiration
+will only happen on the second expiration request after the last time the
+mountpoint was accessed.
+
+If a mountpoint is moved, it gets removed from the expiration list. If a bind
+mount is made on an expirable mount, the new vfsmount will not be on the
+expiration list and will not expire.
+
+If a namespace is copied, all mountpoints contained therein will be copied,
+and the copies of those that are on an expiration list will be added to the
+same expiration list.
+
+
+Userspace Driven Expiry
+=======================
+
+As an alternative, it is possible for userspace to request expiry of any
+mountpoint (though some will be rejected - the current process's idea of the
+rootfs for example). It does this by passing the MNT_EXPIRE flag to
+umount(). This flag is considered incompatible with MNT_FORCE and MNT_DETACH.
+
+If the mountpoint in question is in referenced by something other than
+umount() or its parent mountpoint, an EBUSY error will be returned and the
+mountpoint will not be marked for expiration or unmounted.
+
+If the mountpoint was not already marked for expiry at that time, an EAGAIN
+error will be given and it won't be unmounted.
+
+Otherwise if it was already marked and it wasn't referenced, unmounting will
+take place as usual.
+
+Again, the expiration flag is cleared every time anything other than umount()
+looks at a mountpoint.
diff --git a/Documentation/filesystems/automount-support.txt b/Documentation/filesystems/automount-support.txt
deleted file mode 100644
index 7d9f82607562..000000000000
--- a/Documentation/filesystems/automount-support.txt
+++ /dev/null
@@ -1,93 +0,0 @@
-Support is available for filesystems that wish to do automounting
-support (such as kAFS which can be found in fs/afs/ and NFS in
-fs/nfs/). This facility includes allowing in-kernel mounts to be
-performed and mountpoint degradation to be requested. The latter can
-also be requested by userspace.
-
-
-======================
-IN-KERNEL AUTOMOUNTING
-======================
-
-See section "Mount Traps" of  Documentation/filesystems/autofs.rst
-
-Then from userspace, you can just do something like:
-
-	[root@andromeda root]# mount -t afs \#root.afs. /afs
-	[root@andromeda root]# ls /afs
-	asd  cambridge  cambridge.redhat.com  grand.central.org
-	[root@andromeda root]# ls /afs/cambridge
-	afsdoc
-	[root@andromeda root]# ls /afs/cambridge/afsdoc/
-	ChangeLog  html  LICENSE  pdf  RELNOTES-1.2.2
-
-And then if you look in the mountpoint catalogue, you'll see something like:
-
-	[root@andromeda root]# cat /proc/mounts
-	...
-	#root.afs. /afs afs rw 0 0
-	#root.cell. /afs/cambridge.redhat.com afs rw 0 0
-	#afsdoc. /afs/cambridge.redhat.com/afsdoc afs rw 0 0
-
-
-===========================
-AUTOMATIC MOUNTPOINT EXPIRY
-===========================
-
-Automatic expiration of mountpoints is easy, provided you've mounted the
-mountpoint to be expired in the automounting procedure outlined separately.
-
-To do expiration, you need to follow these steps:
-
- (1) Create at least one list off which the vfsmounts to be expired can be
-     hung.
-
- (2) When a new mountpoint is created in the ->d_automount method, add
-     the mnt to the list using mnt_set_expiry()
-             mnt_set_expiry(newmnt, &afs_vfsmounts);
-
- (3) When you want mountpoints to be expired, call mark_mounts_for_expiry()
-     with a pointer to this list. This will process the list, marking every
-     vfsmount thereon for potential expiry on the next call.
-
-     If a vfsmount was already flagged for expiry, and if its usage count is 1
-     (it's only referenced by its parent vfsmount), then it will be deleted
-     from the namespace and thrown away (effectively unmounted).
-
-     It may prove simplest to simply call this at regular intervals, using
-     some sort of timed event to drive it.
-
-The expiration flag is cleared by calls to mntput. This means that expiration
-will only happen on the second expiration request after the last time the
-mountpoint was accessed.
-
-If a mountpoint is moved, it gets removed from the expiration list. If a bind
-mount is made on an expirable mount, the new vfsmount will not be on the
-expiration list and will not expire.
-
-If a namespace is copied, all mountpoints contained therein will be copied,
-and the copies of those that are on an expiration list will be added to the
-same expiration list.
-
-
-=======================
-USERSPACE DRIVEN EXPIRY
-=======================
-
-As an alternative, it is possible for userspace to request expiry of any
-mountpoint (though some will be rejected - the current process's idea of the
-rootfs for example). It does this by passing the MNT_EXPIRE flag to
-umount(). This flag is considered incompatible with MNT_FORCE and MNT_DETACH.
-
-If the mountpoint in question is in referenced by something other than
-umount() or its parent mountpoint, an EBUSY error will be returned and the
-mountpoint will not be marked for expiration or unmounted.
-
-If the mountpoint was not already marked for expiry at that time, an EAGAIN
-error will be given and it won't be unmounted.
-
-Otherwise if it was already marked and it wasn't referenced, unmounting will
-take place as usual.
-
-Again, the expiration flag is cleared every time anything other than umount()
-looks at a mountpoint.
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index d993b33a48c4..373d695bac65 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -25,6 +25,8 @@ algorithms work.
    locking
    directory-locking
 
+   automount-support
+
    caching/index
 
    porting
-- 
cgit v1.2.3


From f476c6ed17d49553d40467514b4cb2dd2648e30f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:01 +0200
Subject: docs: filesystems: convert coda.txt to ReST

This document has its own style. It seems to be print output
for the old matrixial printers where backspace were used to
do double prints.

For the conversion, I used several regex expressions to get
rid of some weird stuff. The patch also does almost all possible
conversions in order to get a nice output document, while keeping
it readable/editable as is:

- Add a SPDX header;
- Add a document title;
- Adjust document title;
- Adjust section titles;
- Some whitespace fixes and new line breaks;
- Mark literal blocks as such;
- Adjust list markups;
- Mark some unumbered titles with bold font;
- Use footnoote markups;
- Add table markups;
- Use notes markups;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/25c06c40c3d7b947a131c3be124ce0e93cc00ae3.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/coda.rst  | 1670 ++++++++++++++++++++++++++++++++++
 Documentation/filesystems/coda.txt  | 1676 -----------------------------------
 Documentation/filesystems/index.rst |    1 +
 MAINTAINERS                         |    2 +-
 fs/coda/Kconfig                     |    2 +-
 5 files changed, 1673 insertions(+), 1678 deletions(-)
 create mode 100644 Documentation/filesystems/coda.rst
 delete mode 100644 Documentation/filesystems/coda.txt

diff --git a/Documentation/filesystems/coda.rst b/Documentation/filesystems/coda.rst
new file mode 100644
index 000000000000..84c860c89887
--- /dev/null
+++ b/Documentation/filesystems/coda.rst
@@ -0,0 +1,1670 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Coda Kernel-Venus Interface
+===========================
+
+.. Note::
+
+   This is one of the technical documents describing a component of
+   Coda -- this document describes the client kernel-Venus interface.
+
+For more information:
+
+  http://www.coda.cs.cmu.edu
+
+For user level software needed to run Coda:
+
+  ftp://ftp.coda.cs.cmu.edu
+
+To run Coda you need to get a user level cache manager for the client,
+named Venus, as well as tools to manipulate ACLs, to log in, etc.  The
+client needs to have the Coda filesystem selected in the kernel
+configuration.
+
+The server needs a user level server and at present does not depend on
+kernel support.
+
+  The Venus kernel interface
+
+  Peter J. Braam
+
+  v1.0, Nov 9, 1997
+
+  This document describes the communication between Venus and kernel
+  level filesystem code needed for the operation of the Coda file sys-
+  tem.  This document version is meant to describe the current interface
+  (version 1.0) as well as improvements we envisage.
+
+.. Table of Contents
+
+  1. Introduction
+
+  2. Servicing Coda filesystem calls
+
+  3. The message layer
+
+     3.1 Implementation details
+
+  4. The interface at the call level
+
+     4.1 Data structures shared by the kernel and Venus
+     4.2 The pioctl interface
+     4.3 root
+     4.4 lookup
+     4.5 getattr
+     4.6 setattr
+     4.7 access
+     4.8 create
+     4.9 mkdir
+     4.10 link
+     4.11 symlink
+     4.12 remove
+     4.13 rmdir
+     4.14 readlink
+     4.15 open
+     4.16 close
+     4.17 ioctl
+     4.18 rename
+     4.19 readdir
+     4.20 vget
+     4.21 fsync
+     4.22 inactive
+     4.23 rdwr
+     4.24 odymount
+     4.25 ody_lookup
+     4.26 ody_expand
+     4.27 prefetch
+     4.28 signal
+
+  5. The minicache and downcalls
+
+     5.1 INVALIDATE
+     5.2 FLUSH
+     5.3 PURGEUSER
+     5.4 ZAPFILE
+     5.5 ZAPDIR
+     5.6 ZAPVNODE
+     5.7 PURGEFID
+     5.8 REPLACE
+
+  6. Initialization and cleanup
+
+     6.1 Requirements
+
+1. Introduction
+===============
+
+  A key component in the Coda Distributed File System is the cache
+  manager, Venus.
+
+  When processes on a Coda enabled system access files in the Coda
+  filesystem, requests are directed at the filesystem layer in the
+  operating system. The operating system will communicate with Venus to
+  service the request for the process.  Venus manages a persistent
+  client cache and makes remote procedure calls to Coda file servers and
+  related servers (such as authentication servers) to service these
+  requests it receives from the operating system.  When Venus has
+  serviced a request it replies to the operating system with appropriate
+  return codes, and other data related to the request.  Optionally the
+  kernel support for Coda may maintain a minicache of recently processed
+  requests to limit the number of interactions with Venus.  Venus
+  possesses the facility to inform the kernel when elements from its
+  minicache are no longer valid.
+
+  This document describes precisely this communication between the
+  kernel and Venus.  The definitions of so called upcalls and downcalls
+  will be given with the format of the data they handle. We shall also
+  describe the semantic invariants resulting from the calls.
+
+  Historically Coda was implemented in a BSD file system in Mach 2.6.
+  The interface between the kernel and Venus is very similar to the BSD
+  VFS interface.  Similar functionality is provided, and the format of
+  the parameters and returned data is very similar to the BSD VFS.  This
+  leads to an almost natural environment for implementing a kernel-level
+  filesystem driver for Coda in a BSD system.  However, other operating
+  systems such as Linux and Windows 95 and NT have virtual filesystem
+  with different interfaces.
+
+  To implement Coda on these systems some reverse engineering of the
+  Venus/Kernel protocol is necessary.  Also it came to light that other
+  systems could profit significantly from certain small optimizations
+  and modifications to the protocol. To facilitate this work as well as
+  to make future ports easier, communication between Venus and the
+  kernel should be documented in great detail.  This is the aim of this
+  document.
+
+2.  Servicing Coda filesystem calls
+===================================
+
+  The service of a request for a Coda file system service originates in
+  a process P which accessing a Coda file. It makes a system call which
+  traps to the OS kernel. Examples of such calls trapping to the kernel
+  are ``read``, ``write``, ``open``, ``close``, ``create``, ``mkdir``,
+  ``rmdir``, ``chmod`` in a Unix ontext.  Similar calls exist in the Win32
+  environment, and are named ``CreateFile``.
+
+  Generally the operating system handles the request in a virtual
+  filesystem (VFS) layer, which is named I/O Manager in NT and IFS
+  manager in Windows 95.  The VFS is responsible for partial processing
+  of the request and for locating the specific filesystem(s) which will
+  service parts of the request.  Usually the information in the path
+  assists in locating the correct FS drivers.  Sometimes after extensive
+  pre-processing, the VFS starts invoking exported routines in the FS
+  driver.  This is the point where the FS specific processing of the
+  request starts, and here the Coda specific kernel code comes into
+  play.
+
+  The FS layer for Coda must expose and implement several interfaces.
+  First and foremost the VFS must be able to make all necessary calls to
+  the Coda FS layer, so the Coda FS driver must expose the VFS interface
+  as applicable in the operating system. These differ very significantly
+  among operating systems, but share features such as facilities to
+  read/write and create and remove objects.  The Coda FS layer services
+  such VFS requests by invoking one or more well defined services
+  offered by the cache manager Venus.  When the replies from Venus have
+  come back to the FS driver, servicing of the VFS call continues and
+  finishes with a reply to the kernel's VFS. Finally the VFS layer
+  returns to the process.
+
+  As a result of this design a basic interface exposed by the FS driver
+  must allow Venus to manage message traffic.  In particular Venus must
+  be able to retrieve and place messages and to be notified of the
+  arrival of a new message. The notification must be through a mechanism
+  which does not block Venus since Venus must attend to other tasks even
+  when no messages are waiting or being processed.
+
+  **Interfaces of the Coda FS Driver**
+
+  Furthermore the FS layer provides for a special path of communication
+  between a user process and Venus, called the pioctl interface. The
+  pioctl interface is used for Coda specific services, such as
+  requesting detailed information about the persistent cache managed by
+  Venus. Here the involvement of the kernel is minimal.  It identifies
+  the calling process and passes the information on to Venus.  When
+  Venus replies the response is passed back to the caller in unmodified
+  form.
+
+  Finally Venus allows the kernel FS driver to cache the results from
+  certain services.  This is done to avoid excessive context switches
+  and results in an efficient system.  However, Venus may acquire
+  information, for example from the network which implies that cached
+  information must be flushed or replaced. Venus then makes a downcall
+  to the Coda FS layer to request flushes or updates in the cache.  The
+  kernel FS driver handles such requests synchronously.
+
+  Among these interfaces the VFS interface and the facility to place,
+  receive and be notified of messages are platform specific.  We will
+  not go into the calls exported to the VFS layer but we will state the
+  requirements of the message exchange mechanism.
+
+
+3.  The message layer
+=====================
+
+  At the lowest level the communication between Venus and the FS driver
+  proceeds through messages.  The synchronization between processes
+  requesting Coda file service and Venus relies on blocking and waking
+  up processes.  The Coda FS driver processes VFS- and pioctl-requests
+  on behalf of a process P, creates messages for Venus, awaits replies
+  and finally returns to the caller.  The implementation of the exchange
+  of messages is platform specific, but the semantics have (so far)
+  appeared to be generally applicable.  Data buffers are created by the
+  FS Driver in kernel memory on behalf of P and copied to user memory in
+  Venus.
+
+  The FS Driver while servicing P makes upcalls to Venus.  Such an
+  upcall is dispatched to Venus by creating a message structure.  The
+  structure contains the identification of P, the message sequence
+  number, the size of the request and a pointer to the data in kernel
+  memory for the request.  Since the data buffer is re-used to hold the
+  reply from Venus, there is a field for the size of the reply.  A flags
+  field is used in the message to precisely record the status of the
+  message.  Additional platform dependent structures involve pointers to
+  determine the position of the message on queues and pointers to
+  synchronization objects.  In the upcall routine the message structure
+  is filled in, flags are set to 0, and it is placed on the *pending*
+  queue.  The routine calling upcall is responsible for allocating the
+  data buffer; its structure will be described in the next section.
+
+  A facility must exist to notify Venus that the message has been
+  created, and implemented using available synchronization objects in
+  the OS. This notification is done in the upcall context of the process
+  P. When the message is on the pending queue, process P cannot proceed
+  in upcall.  The (kernel mode) processing of P in the filesystem
+  request routine must be suspended until Venus has replied.  Therefore
+  the calling thread in P is blocked in upcall.  A pointer in the
+  message structure will locate the synchronization object on which P is
+  sleeping.
+
+  Venus detects the notification that a message has arrived, and the FS
+  driver allow Venus to retrieve the message with a getmsg_from_kernel
+  call. This action finishes in the kernel by putting the message on the
+  queue of processing messages and setting flags to READ.  Venus is
+  passed the contents of the data buffer. The getmsg_from_kernel call
+  now returns and Venus processes the request.
+
+  At some later point the FS driver receives a message from Venus,
+  namely when Venus calls sendmsg_to_kernel.  At this moment the Coda FS
+  driver looks at the contents of the message and decides if:
+
+
+  *  the message is a reply for a suspended thread P.  If so it removes
+     the message from the processing queue and marks the message as
+     WRITTEN.  Finally, the FS driver unblocks P (still in the kernel
+     mode context of Venus) and the sendmsg_to_kernel call returns to
+     Venus.  The process P will be scheduled at some point and continues
+     processing its upcall with the data buffer replaced with the reply
+     from Venus.
+
+  *  The message is a ``downcall``.  A downcall is a request from Venus to
+     the FS Driver. The FS driver processes the request immediately
+     (usually a cache eviction or replacement) and when it finishes
+     sendmsg_to_kernel returns.
+
+  Now P awakes and continues processing upcall.  There are some
+  subtleties to take account of. First P will determine if it was woken
+  up in upcall by a signal from some other source (for example an
+  attempt to terminate P) or as is normally the case by Venus in its
+  sendmsg_to_kernel call.  In the normal case, the upcall routine will
+  deallocate the message structure and return.  The FS routine can proceed
+  with its processing.
+
+
+  **Sleeping and IPC arrangements**
+
+  In case P is woken up by a signal and not by Venus, it will first look
+  at the flags field.  If the message is not yet READ, the process P can
+  handle its signal without notifying Venus.  If Venus has READ, and
+  the request should not be processed, P can send Venus a signal message
+  to indicate that it should disregard the previous message.  Such
+  signals are put in the queue at the head, and read first by Venus.  If
+  the message is already marked as WRITTEN it is too late to stop the
+  processing.  The VFS routine will now continue.  (-- If a VFS request
+  involves more than one upcall, this can lead to complicated state, an
+  extra field "handle_signals" could be added in the message structure
+  to indicate points of no return have been passed.--)
+
+
+
+3.1.  Implementation details
+----------------------------
+
+  The Unix implementation of this mechanism has been through the
+  implementation of a character device associated with Coda.  Venus
+  retrieves messages by doing a read on the device, replies are sent
+  with a write and notification is through the select system call on the
+  file descriptor for the device.  The process P is kept waiting on an
+  interruptible wait queue object.
+
+  In Windows NT and the DPMI Windows 95 implementation a DeviceIoControl
+  call is used.  The DeviceIoControl call is designed to copy buffers
+  from user memory to kernel memory with OPCODES. The sendmsg_to_kernel
+  is issued as a synchronous call, while the getmsg_from_kernel call is
+  asynchronous.  Windows EventObjects are used for notification of
+  message arrival.  The process P is kept waiting on a KernelEvent
+  object in NT and a semaphore in Windows 95.
+
+
+4.  The interface at the call level
+===================================
+
+
+  This section describes the upcalls a Coda FS driver can make to Venus.
+  Each of these upcalls make use of two structures: inputArgs and
+  outputArgs.   In pseudo BNF form the structures take the following
+  form::
+
+
+	struct inputArgs {
+	    u_long opcode;
+	    u_long unique;     /* Keep multiple outstanding msgs distinct */
+	    u_short pid;                 /* Common to all */
+	    u_short pgid;                /* Common to all */
+	    struct CodaCred cred;        /* Common to all */
+
+	    <union "in" of call dependent parts of inputArgs>
+	};
+
+	struct outputArgs {
+	    u_long opcode;
+	    u_long unique;       /* Keep multiple outstanding msgs distinct */
+	    u_long result;
+
+	    <union "out" of call dependent parts of inputArgs>
+	};
+
+
+
+  Before going on let us elucidate the role of the various fields. The
+  inputArgs start with the opcode which defines the type of service
+  requested from Venus. There are approximately 30 upcalls at present
+  which we will discuss.   The unique field labels the inputArg with a
+  unique number which will identify the message uniquely.  A process and
+  process group id are passed.  Finally the credentials of the caller
+  are included.
+
+  Before delving into the specific calls we need to discuss a variety of
+  data structures shared by the kernel and Venus.
+
+
+
+
+4.1.  Data structures shared by the kernel and Venus
+----------------------------------------------------
+
+
+  The CodaCred structure defines a variety of user and group ids as
+  they are set for the calling process. The vuid_t and vgid_t are 32 bit
+  unsigned integers.  It also defines group membership in an array.  On
+  Unix the CodaCred has proven sufficient to implement good security
+  semantics for Coda but the structure may have to undergo modification
+  for the Windows environment when these mature::
+
+	struct CodaCred {
+	    vuid_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, effective, set, fs uid */
+	    vgid_t cr_gid, cr_egid, cr_sgid, cr_fsgid; /* same for groups */
+	    vgid_t cr_groups[NGROUPS];        /* Group membership for caller */
+	};
+
+
+  .. Note::
+
+     It is questionable if we need CodaCreds in Venus. Finally Venus
+     doesn't know about groups, although it does create files with the
+     default uid/gid.  Perhaps the list of group membership is superfluous.
+
+
+  The next item is the fundamental identifier used to identify Coda
+  files, the ViceFid.  A fid of a file uniquely defines a file or
+  directory in the Coda filesystem within a cell [1]_::
+
+	typedef struct ViceFid {
+	    VolumeId Volume;
+	    VnodeId Vnode;
+	    Unique_t Unique;
+	} ViceFid;
+
+  .. [1] A cell is agroup of Coda servers acting under the aegis of a single
+	 system control machine or SCM. See the Coda Administration manual
+	 for a detailed description of the role of the SCM.
+
+  Each of the constituent fields: VolumeId, VnodeId and Unique_t are
+  unsigned 32 bit integers.  We envisage that a further field will need
+  to be prefixed to identify the Coda cell; this will probably take the
+  form of a Ipv6 size IP address naming the Coda cell through DNS.
+
+  The next important structure shared between Venus and the kernel is
+  the attributes of the file.  The following structure is used to
+  exchange information.  It has room for future extensions such as
+  support for device files (currently not present in Coda)::
+
+
+	struct coda_timespec {
+		int64_t         tv_sec;         /* seconds */
+		long            tv_nsec;        /* nanoseconds */
+	};
+
+	struct coda_vattr {
+		enum coda_vtype va_type;        /* vnode type (for create) */
+		u_short         va_mode;        /* files access mode and type */
+		short           va_nlink;       /* number of references to file */
+		vuid_t          va_uid;         /* owner user id */
+		vgid_t          va_gid;         /* owner group id */
+		long            va_fsid;        /* file system id (dev for now) */
+		long            va_fileid;      /* file id */
+		u_quad_t        va_size;        /* file size in bytes */
+		long            va_blocksize;   /* blocksize preferred for i/o */
+		struct coda_timespec va_atime;  /* time of last access */
+		struct coda_timespec va_mtime;  /* time of last modification */
+		struct coda_timespec va_ctime;  /* time file changed */
+		u_long          va_gen;         /* generation number of file */
+		u_long          va_flags;       /* flags defined for file */
+		dev_t           va_rdev;        /* device special file represents */
+		u_quad_t        va_bytes;       /* bytes of disk space held by file */
+		u_quad_t        va_filerev;     /* file modification number */
+		u_int           va_vaflags;     /* operations flags, see below */
+		long            va_spare;       /* remain quad aligned */
+	};
+
+
+4.2.  The pioctl interface
+--------------------------
+
+
+  Coda specific requests can be made by application through the pioctl
+  interface. The pioctl is implemented as an ordinary ioctl on a
+  fictitious file /coda/.CONTROL.  The pioctl call opens this file, gets
+  a file handle and makes the ioctl call. Finally it closes the file.
+
+  The kernel involvement in this is limited to providing the facility to
+  open and close and pass the ioctl message and to verify that a path in
+  the pioctl data buffers is a file in a Coda filesystem.
+
+  The kernel is handed a data packet of the form::
+
+	struct {
+	    const char *path;
+	    struct ViceIoctl vidata;
+	    int follow;
+	} data;
+
+
+
+  where::
+
+
+	struct ViceIoctl {
+		caddr_t in, out;        /* Data to be transferred in, or out */
+		short in_size;          /* Size of input buffer <= 2K */
+		short out_size;         /* Maximum size of output buffer, <= 2K */
+	};
+
+
+
+  The path must be a Coda file, otherwise the ioctl upcall will not be
+  made.
+
+  .. Note:: The data structures and code are a mess.  We need to clean this up.
+
+
+**We now proceed to document the individual calls**:
+
+
+4.3.  root
+----------
+
+
+  Arguments
+     in
+
+	empty
+
+     out::
+
+		struct cfs_root_out {
+		    ViceFid VFid;
+		} cfs_root;
+
+
+
+  Description
+    This call is made to Venus during the initialization of
+    the Coda filesystem. If the result is zero, the cfs_root structure
+    contains the ViceFid of the root of the Coda filesystem. If a non-zero
+    result is generated, its value is a platform dependent error code
+    indicating the difficulty Venus encountered in locating the root of
+    the Coda filesystem.
+
+4.4.  lookup
+------------
+
+
+  Summary
+    Find the ViceFid and type of an object in a directory if it exists.
+
+  Arguments
+     in::
+
+		struct  cfs_lookup_in {
+		    ViceFid     VFid;
+		    char        *name;          /* Place holder for data. */
+		} cfs_lookup;
+
+
+
+     out::
+
+		struct cfs_lookup_out {
+		    ViceFid VFid;
+		    int vtype;
+		} cfs_lookup;
+
+
+
+  Description
+    This call is made to determine the ViceFid and filetype of
+    a directory entry.  The directory entry requested carries name name
+    and Venus will search the directory identified by cfs_lookup_in.VFid.
+    The result may indicate that the name does not exist, or that
+    difficulty was encountered in finding it (e.g. due to disconnection).
+    If the result is zero, the field cfs_lookup_out.VFid contains the
+    targets ViceFid and cfs_lookup_out.vtype the coda_vtype giving the
+    type of object the name designates.
+
+  The name of the object is an 8 bit character string of maximum length
+  CFS_MAXNAMLEN, currently set to 256 (including a 0 terminator.)
+
+  It is extremely important to realize that Venus bitwise ors the field
+  cfs_lookup.vtype with CFS_NOCACHE to indicate that the object should
+  not be put in the kernel name cache.
+
+  .. Note::
+
+     The type of the vtype is currently wrong.  It should be
+     coda_vtype. Linux does not take note of CFS_NOCACHE.  It should.
+
+
+4.5.  getattr
+-------------
+
+
+  Summary Get the attributes of a file.
+
+  Arguments
+     in::
+
+		struct cfs_getattr_in {
+		    ViceFid VFid;
+		    struct coda_vattr attr; /* XXXXX */
+		} cfs_getattr;
+
+
+
+     out::
+
+		struct cfs_getattr_out {
+		    struct coda_vattr attr;
+		} cfs_getattr;
+
+
+
+  Description
+    This call returns the attributes of the file identified by fid.
+
+  Errors
+    Errors can occur if the object with fid does not exist, is
+    unaccessible or if the caller does not have permission to fetch
+    attributes.
+
+  .. Note::
+
+     Many kernel FS drivers (Linux, NT and Windows 95) need to acquire
+     the attributes as well as the Fid for the instantiation of an internal
+     "inode" or "FileHandle".  A significant improvement in performance on
+     such systems could be made by combining the lookup and getattr calls
+     both at the Venus/kernel interaction level and at the RPC level.
+
+  The vattr structure included in the input arguments is superfluous and
+  should be removed.
+
+
+4.6.  setattr
+-------------
+
+
+  Summary
+    Set the attributes of a file.
+
+  Arguments
+     in::
+
+		struct cfs_setattr_in {
+		    ViceFid VFid;
+		    struct coda_vattr attr;
+		} cfs_setattr;
+
+
+
+
+     out
+
+	empty
+
+  Description
+    The structure attr is filled with attributes to be changed
+    in BSD style.  Attributes not to be changed are set to -1, apart from
+    vtype which is set to VNON. Other are set to the value to be assigned.
+    The only attributes which the FS driver may request to change are the
+    mode, owner, groupid, atime, mtime and ctime.  The return value
+    indicates success or failure.
+
+  Errors
+    A variety of errors can occur.  The object may not exist, may
+    be inaccessible, or permission may not be granted by Venus.
+
+
+4.7.  access
+------------
+
+
+  Arguments
+     in::
+
+		struct cfs_access_in {
+		    ViceFid     VFid;
+		    int flags;
+		} cfs_access;
+
+
+
+     out
+
+	empty
+
+  Description
+    Verify if access to the object identified by VFid for
+    operations described by flags is permitted.  The result indicates if
+    access will be granted.  It is important to remember that Coda uses
+    ACLs to enforce protection and that ultimately the servers, not the
+    clients enforce the security of the system.  The result of this call
+    will depend on whether a token is held by the user.
+
+  Errors
+    The object may not exist, or the ACL describing the protection
+    may not be accessible.
+
+
+4.8.  create
+------------
+
+
+  Summary
+    Invoked to create a file
+
+  Arguments
+     in::
+
+		struct cfs_create_in {
+		    ViceFid VFid;
+		    struct coda_vattr attr;
+		    int excl;
+		    int mode;
+		    char        *name;          /* Place holder for data. */
+		} cfs_create;
+
+
+
+
+     out::
+
+		struct cfs_create_out {
+		    ViceFid VFid;
+		    struct coda_vattr attr;
+		} cfs_create;
+
+
+
+  Description
+    This upcall is invoked to request creation of a file.
+    The file will be created in the directory identified by VFid, its name
+    will be name, and the mode will be mode.  If excl is set an error will
+    be returned if the file already exists.  If the size field in attr is
+    set to zero the file will be truncated.  The uid and gid of the file
+    are set by converting the CodaCred to a uid using a macro CRTOUID
+    (this macro is platform dependent).  Upon success the VFid and
+    attributes of the file are returned.  The Coda FS Driver will normally
+    instantiate a vnode, inode or file handle at kernel level for the new
+    object.
+
+
+  Errors
+    A variety of errors can occur. Permissions may be insufficient.
+    If the object exists and is not a file the error EISDIR is returned
+    under Unix.
+
+  .. Note::
+
+     The packing of parameters is very inefficient and appears to
+     indicate confusion between the system call creat and the VFS operation
+     create. The VFS operation create is only called to create new objects.
+     This create call differs from the Unix one in that it is not invoked
+     to return a file descriptor. The truncate and exclusive options,
+     together with the mode, could simply be part of the mode as it is
+     under Unix.  There should be no flags argument; this is used in open
+     (2) to return a file descriptor for READ or WRITE mode.
+
+  The attributes of the directory should be returned too, since the size
+  and mtime changed.
+
+
+4.9.  mkdir
+-----------
+
+
+  Summary
+    Create a new directory.
+
+  Arguments
+     in::
+
+		struct cfs_mkdir_in {
+		    ViceFid     VFid;
+		    struct coda_vattr attr;
+		    char        *name;          /* Place holder for data. */
+		} cfs_mkdir;
+
+
+
+     out::
+
+		struct cfs_mkdir_out {
+		    ViceFid VFid;
+		    struct coda_vattr attr;
+		} cfs_mkdir;
+
+
+
+
+  Description
+    This call is similar to create but creates a directory.
+    Only the mode field in the input parameters is used for creation.
+    Upon successful creation, the attr returned contains the attributes of
+    the new directory.
+
+  Errors
+    As for create.
+
+  .. Note::
+
+     The input parameter should be changed to mode instead of
+     attributes.
+
+  The attributes of the parent should be returned since the size and
+  mtime changes.
+
+
+4.10.  link
+-----------
+
+
+  Summary
+    Create a link to an existing file.
+
+  Arguments
+     in::
+
+		struct cfs_link_in {
+		    ViceFid sourceFid;          /* cnode to link *to* */
+		    ViceFid destFid;            /* Directory in which to place link */
+		    char        *tname;         /* Place holder for data. */
+		} cfs_link;
+
+
+
+     out
+
+	empty
+
+  Description
+    This call creates a link to the sourceFid in the directory
+    identified by destFid with name tname.  The source must reside in the
+    target's parent, i.e. the source must be have parent destFid, i.e. Coda
+    does not support cross directory hard links.  Only the return value is
+    relevant.  It indicates success or the type of failure.
+
+  Errors
+    The usual errors can occur.
+
+
+4.11.  symlink
+--------------
+
+
+  Summary
+    create a symbolic link
+
+  Arguments
+     in::
+
+		struct cfs_symlink_in {
+		    ViceFid     VFid;          /* Directory to put symlink in */
+		    char        *srcname;
+		    struct coda_vattr attr;
+		    char        *tname;
+		} cfs_symlink;
+
+
+
+     out
+
+	none
+
+  Description
+    Create a symbolic link. The link is to be placed in the
+    directory identified by VFid and named tname.  It should point to the
+    pathname srcname.  The attributes of the newly created object are to
+    be set to attr.
+
+  .. Note::
+
+     The attributes of the target directory should be returned since
+     its size changed.
+
+
+4.12.  remove
+-------------
+
+
+  Summary
+    Remove a file
+
+  Arguments
+     in::
+
+		struct cfs_remove_in {
+		    ViceFid     VFid;
+		    char        *name;          /* Place holder for data. */
+		} cfs_remove;
+
+
+
+     out
+
+	none
+
+  Description
+    Remove file named cfs_remove_in.name in directory
+    identified by   VFid.
+
+
+  .. Note::
+
+     The attributes of the directory should be returned since its
+     mtime and size may change.
+
+
+4.13.  rmdir
+------------
+
+
+  Summary
+    Remove a directory
+
+  Arguments
+     in::
+
+		struct cfs_rmdir_in {
+		    ViceFid     VFid;
+		    char        *name;          /* Place holder for data. */
+		} cfs_rmdir;
+
+
+
+     out
+
+	none
+
+  Description
+    Remove the directory with name name from the directory
+    identified by VFid.
+
+  .. Note:: The attributes of the parent directory should be returned since
+	    its mtime and size may change.
+
+
+4.14.  readlink
+---------------
+
+
+  Summary
+    Read the value of a symbolic link.
+
+  Arguments
+     in::
+
+		struct cfs_readlink_in {
+		    ViceFid VFid;
+		} cfs_readlink;
+
+
+
+     out::
+
+		struct cfs_readlink_out {
+		    int count;
+		    caddr_t     data;           /* Place holder for data. */
+		} cfs_readlink;
+
+
+
+  Description
+    This routine reads the contents of symbolic link
+    identified by VFid into the buffer data.  The buffer data must be able
+    to hold any name up to CFS_MAXNAMLEN (PATH or NAM??).
+
+  Errors
+    No unusual errors.
+
+
+4.15.  open
+-----------
+
+
+  Summary
+    Open a file.
+
+  Arguments
+     in::
+
+		struct cfs_open_in {
+		    ViceFid     VFid;
+		    int flags;
+		} cfs_open;
+
+
+
+     out::
+
+		struct cfs_open_out {
+		    dev_t       dev;
+		    ino_t       inode;
+		} cfs_open;
+
+
+
+  Description
+    This request asks Venus to place the file identified by
+    VFid in its cache and to note that the calling process wishes to open
+    it with flags as in open(2).  The return value to the kernel differs
+    for Unix and Windows systems.  For Unix systems the Coda FS Driver is
+    informed of the device and inode number of the container file in the
+    fields dev and inode.  For Windows the path of the container file is
+    returned to the kernel.
+
+
+  .. Note::
+
+     Currently the cfs_open_out structure is not properly adapted to
+     deal with the Windows case.  It might be best to implement two
+     upcalls, one to open aiming at a container file name, the other at a
+     container file inode.
+
+
+4.16.  close
+------------
+
+
+  Summary
+    Close a file, update it on the servers.
+
+  Arguments
+     in::
+
+		struct cfs_close_in {
+		    ViceFid     VFid;
+		    int flags;
+		} cfs_close;
+
+
+
+     out
+
+	none
+
+  Description
+    Close the file identified by VFid.
+
+  .. Note::
+
+     The flags argument is bogus and not used.  However, Venus' code
+     has room to deal with an execp input field, probably this field should
+     be used to inform Venus that the file was closed but is still memory
+     mapped for execution.  There are comments about fetching versus not
+     fetching the data in Venus vproc_vfscalls.  This seems silly.  If a
+     file is being closed, the data in the container file is to be the new
+     data.  Here again the execp flag might be in play to create confusion:
+     currently Venus might think a file can be flushed from the cache when
+     it is still memory mapped.  This needs to be understood.
+
+
+4.17.  ioctl
+------------
+
+
+  Summary
+    Do an ioctl on a file. This includes the pioctl interface.
+
+  Arguments
+     in::
+
+		struct cfs_ioctl_in {
+		    ViceFid VFid;
+		    int cmd;
+		    int len;
+		    int rwflag;
+		    char *data;                 /* Place holder for data. */
+		} cfs_ioctl;
+
+
+
+     out::
+
+
+		struct cfs_ioctl_out {
+		    int len;
+		    caddr_t     data;           /* Place holder for data. */
+		} cfs_ioctl;
+
+
+
+  Description
+    Do an ioctl operation on a file.  The command, len and
+    data arguments are filled as usual.  flags is not used by Venus.
+
+  .. Note::
+
+     Another bogus parameter.  flags is not used.  What is the
+     business about PREFETCHING in the Venus code?
+
+
+
+4.18.  rename
+-------------
+
+
+  Summary
+    Rename a fid.
+
+  Arguments
+     in::
+
+		struct cfs_rename_in {
+		    ViceFid     sourceFid;
+		    char        *srcname;
+		    ViceFid destFid;
+		    char        *destname;
+		} cfs_rename;
+
+
+
+     out
+
+	none
+
+  Description
+    Rename the object with name srcname in directory
+    sourceFid to destname in destFid.   It is important that the names
+    srcname and destname are 0 terminated strings.  Strings in Unix
+    kernels are not always null terminated.
+
+
+4.19.  readdir
+--------------
+
+
+  Summary
+    Read directory entries.
+
+  Arguments
+     in::
+
+		struct cfs_readdir_in {
+		    ViceFid     VFid;
+		    int count;
+		    int offset;
+		} cfs_readdir;
+
+
+
+
+     out::
+
+		struct cfs_readdir_out {
+		    int size;
+		    caddr_t     data;           /* Place holder for data. */
+		} cfs_readdir;
+
+
+
+  Description
+    Read directory entries from VFid starting at offset and
+    read at most count bytes.  Returns the data in data and returns
+    the size in size.
+
+
+  .. Note::
+
+     This call is not used.  Readdir operations exploit container
+     files.  We will re-evaluate this during the directory revamp which is
+     about to take place.
+
+
+4.20.  vget
+-----------
+
+
+  Summary
+    instructs Venus to do an FSDB->Get.
+
+  Arguments
+     in::
+
+		struct cfs_vget_in {
+		    ViceFid VFid;
+		} cfs_vget;
+
+
+
+     out::
+
+		struct cfs_vget_out {
+		    ViceFid VFid;
+		    int vtype;
+		} cfs_vget;
+
+
+
+  Description
+    This upcall asks Venus to do a get operation on an fsobj
+    labelled by VFid.
+
+  .. Note::
+
+     This operation is not used.  However, it is extremely useful
+     since it can be used to deal with read/write memory mapped files.
+     These can be "pinned" in the Venus cache using vget and released with
+     inactive.
+
+
+4.21.  fsync
+------------
+
+
+  Summary
+    Tell Venus to update the RVM attributes of a file.
+
+  Arguments
+     in::
+
+		struct cfs_fsync_in {
+		    ViceFid VFid;
+		} cfs_fsync;
+
+
+
+     out
+
+	none
+
+  Description
+    Ask Venus to update RVM attributes of object VFid. This
+    should be called as part of kernel level fsync type calls.  The
+    result indicates if the syncing was successful.
+
+  .. Note:: Linux does not implement this call. It should.
+
+
+4.22.  inactive
+---------------
+
+
+  Summary
+    Tell Venus a vnode is no longer in use.
+
+  Arguments
+     in::
+
+		struct cfs_inactive_in {
+		    ViceFid VFid;
+		} cfs_inactive;
+
+
+
+     out
+
+	none
+
+  Description
+    This operation returns EOPNOTSUPP.
+
+  .. Note:: This should perhaps be removed.
+
+
+4.23.  rdwr
+-----------
+
+
+  Summary
+    Read or write from a file
+
+  Arguments
+     in::
+
+		struct cfs_rdwr_in {
+		    ViceFid     VFid;
+		    int rwflag;
+		    int count;
+		    int offset;
+		    int ioflag;
+		    caddr_t     data;           /* Place holder for data. */
+		} cfs_rdwr;
+
+
+
+
+     out::
+
+		struct cfs_rdwr_out {
+		    int rwflag;
+		    int count;
+		    caddr_t     data;   /* Place holder for data. */
+		} cfs_rdwr;
+
+
+
+  Description
+    This upcall asks Venus to read or write from a file.
+
+
+  .. Note::
+
+    It should be removed since it is against the Coda philosophy that
+    read/write operations never reach Venus.  I have been told the
+    operation does not work.  It is not currently used.
+
+
+
+4.24.  odymount
+---------------
+
+
+  Summary
+    Allows mounting multiple Coda "filesystems" on one Unix mount point.
+
+  Arguments
+     in::
+
+		struct ody_mount_in {
+		    char        *name;          /* Place holder for data. */
+		} ody_mount;
+
+
+
+     out::
+
+		struct ody_mount_out {
+		    ViceFid VFid;
+		} ody_mount;
+
+
+
+  Description
+    Asks Venus to return the rootfid of a Coda system named
+    name.  The fid is returned in VFid.
+
+  .. Note::
+
+     This call was used by David for dynamic sets.  It should be
+     removed since it causes a jungle of pointers in the VFS mounting area.
+     It is not used by Coda proper.  Call is not implemented by Venus.
+
+
+4.25.  ody_lookup
+-----------------
+
+
+  Summary
+    Looks up something.
+
+  Arguments
+     in
+
+	irrelevant
+
+
+     out
+
+	irrelevant
+
+
+  .. Note:: Gut it. Call is not implemented by Venus.
+
+
+4.26.  ody_expand
+-----------------
+
+
+  Summary
+    expands something in a dynamic set.
+
+  Arguments
+     in
+
+	irrelevant
+
+     out
+
+	irrelevant
+
+  .. Note:: Gut it. Call is not implemented by Venus.
+
+
+4.27.  prefetch
+---------------
+
+
+  Summary
+    Prefetch a dynamic set.
+
+  Arguments
+
+     in
+
+	Not documented.
+
+     out
+
+	Not documented.
+
+  Description
+    Venus worker.cc has support for this call, although it is
+    noted that it doesn't work.  Not surprising, since the kernel does not
+    have support for it. (ODY_PREFETCH is not a defined operation).
+
+
+  .. Note:: Gut it. It isn't working and isn't used by Coda.
+
+
+
+4.28.  signal
+-------------
+
+
+  Summary
+    Send Venus a signal about an upcall.
+
+  Arguments
+     in
+
+	none
+
+     out
+
+	not applicable.
+
+  Description
+    This is an out-of-band upcall to Venus to inform Venus
+    that the calling process received a signal after Venus read the
+    message from the input queue.  Venus is supposed to clean up the
+    operation.
+
+  Errors
+    No reply is given.
+
+  .. Note::
+
+     We need to better understand what Venus needs to clean up and if
+     it is doing this correctly.  Also we need to handle multiple upcall
+     per system call situations correctly.  It would be important to know
+     what state changes in Venus take place after an upcall for which the
+     kernel is responsible for notifying Venus to clean up (e.g. open
+     definitely is such a state change, but many others are maybe not).
+
+
+5.  The minicache and downcalls
+===============================
+
+
+  The Coda FS Driver can cache results of lookup and access upcalls, to
+  limit the frequency of upcalls.  Upcalls carry a price since a process
+  context switch needs to take place.  The counterpart of caching the
+  information is that Venus will notify the FS Driver that cached
+  entries must be flushed or renamed.
+
+  The kernel code generally has to maintain a structure which links the
+  internal file handles (called vnodes in BSD, inodes in Linux and
+  FileHandles in Windows) with the ViceFid's which Venus maintains.  The
+  reason is that frequent translations back and forth are needed in
+  order to make upcalls and use the results of upcalls.  Such linking
+  objects are called cnodes.
+
+  The current minicache implementations have cache entries which record
+  the following:
+
+  1. the name of the file
+
+  2. the cnode of the directory containing the object
+
+  3. a list of CodaCred's for which the lookup is permitted.
+
+  4. the cnode of the object
+
+  The lookup call in the Coda FS Driver may request the cnode of the
+  desired object from the cache, by passing its name, directory and the
+  CodaCred's of the caller.  The cache will return the cnode or indicate
+  that it cannot be found.  The Coda FS Driver must be careful to
+  invalidate cache entries when it modifies or removes objects.
+
+  When Venus obtains information that indicates that cache entries are
+  no longer valid, it will make a downcall to the kernel.  Downcalls are
+  intercepted by the Coda FS Driver and lead to cache invalidations of
+  the kind described below.  The Coda FS Driver does not return an error
+  unless the downcall data could not be read into kernel memory.
+
+
+5.1.  INVALIDATE
+----------------
+
+
+  No information is available on this call.
+
+
+5.2.  FLUSH
+-----------
+
+
+
+  Arguments
+    None
+
+  Summary
+    Flush the name cache entirely.
+
+  Description
+    Venus issues this call upon startup and when it dies. This
+    is to prevent stale cache information being held.  Some operating
+    systems allow the kernel name cache to be switched off dynamically.
+    When this is done, this downcall is made.
+
+
+5.3.  PURGEUSER
+---------------
+
+
+  Arguments
+    ::
+
+	  struct cfs_purgeuser_out {/* CFS_PURGEUSER is a venus->kernel call */
+	      struct CodaCred cred;
+	  } cfs_purgeuser;
+
+
+
+  Description
+    Remove all entries in the cache carrying the Cred.  This
+    call is issued when tokens for a user expire or are flushed.
+
+
+5.4.  ZAPFILE
+-------------
+
+
+  Arguments
+    ::
+
+	  struct cfs_zapfile_out {  /* CFS_ZAPFILE is a venus->kernel call */
+	      ViceFid CodaFid;
+	  } cfs_zapfile;
+
+
+
+  Description
+    Remove all entries which have the (dir vnode, name) pair.
+    This is issued as a result of an invalidation of cached attributes of
+    a vnode.
+
+  .. Note::
+
+     Call is not named correctly in NetBSD and Mach.  The minicache
+     zapfile routine takes different arguments. Linux does not implement
+     the invalidation of attributes correctly.
+
+
+
+5.5.  ZAPDIR
+------------
+
+
+  Arguments
+    ::
+
+	  struct cfs_zapdir_out {   /* CFS_ZAPDIR is a venus->kernel call */
+	      ViceFid CodaFid;
+	  } cfs_zapdir;
+
+
+
+  Description
+    Remove all entries in the cache lying in a directory
+    CodaFid, and all children of this directory. This call is issued when
+    Venus receives a callback on the directory.
+
+
+5.6.  ZAPVNODE
+--------------
+
+
+
+  Arguments
+    ::
+
+	  struct cfs_zapvnode_out { /* CFS_ZAPVNODE is a venus->kernel call */
+	      struct CodaCred cred;
+	      ViceFid VFid;
+	  } cfs_zapvnode;
+
+
+
+  Description
+    Remove all entries in the cache carrying the cred and VFid
+    as in the arguments. This downcall is probably never issued.
+
+
+5.7.  PURGEFID
+--------------
+
+
+  Arguments
+    ::
+
+	  struct cfs_purgefid_out { /* CFS_PURGEFID is a venus->kernel call */
+	      ViceFid CodaFid;
+	  } cfs_purgefid;
+
+
+
+  Description
+    Flush the attribute for the file. If it is a dir (odd
+    vnode), purge its children from the namecache and remove the file from the
+    namecache.
+
+
+
+5.8.  REPLACE
+-------------
+
+
+  Summary
+    Replace the Fid's for a collection of names.
+
+  Arguments
+    ::
+
+	  struct cfs_replace_out { /* cfs_replace is a venus->kernel call */
+	      ViceFid NewFid;
+	      ViceFid OldFid;
+	  } cfs_replace;
+
+
+
+  Description
+    This routine replaces a ViceFid in the name cache with
+    another.  It is added to allow Venus during reintegration to replace
+    locally allocated temp fids while disconnected with global fids even
+    when the reference counts on those fids are not zero.
+
+
+6.  Initialization and cleanup
+==============================
+
+
+  This section gives brief hints as to desirable features for the Coda
+  FS Driver at startup and upon shutdown or Venus failures.  Before
+  entering the discussion it is useful to repeat that the Coda FS Driver
+  maintains the following data:
+
+
+  1. message queues
+
+  2. cnodes
+
+  3. name cache entries
+
+     The name cache entries are entirely private to the driver, so they
+     can easily be manipulated.   The message queues will generally have
+     clear points of initialization and destruction.  The cnodes are
+     much more delicate.  User processes hold reference counts in Coda
+     filesystems and it can be difficult to clean up the cnodes.
+
+  It can expect requests through:
+
+  1. the message subsystem
+
+  2. the VFS layer
+
+  3. pioctl interface
+
+     Currently the pioctl passes through the VFS for Coda so we can
+     treat these similarly.
+
+
+6.1.  Requirements
+------------------
+
+
+  The following requirements should be accommodated:
+
+  1. The message queues should have open and close routines.  On Unix
+     the opening of the character devices are such routines.
+
+    -  Before opening, no messages can be placed.
+
+    -  Opening will remove any old messages still pending.
+
+    -  Close will notify any sleeping processes that their upcall cannot
+       be completed.
+
+    -  Close will free all memory allocated by the message queues.
+
+
+  2. At open the namecache shall be initialized to empty state.
+
+  3. Before the message queues are open, all VFS operations will fail.
+     Fortunately this can be achieved by making sure than mounting the
+     Coda filesystem cannot succeed before opening.
+
+  4. After closing of the queues, no VFS operations can succeed.  Here
+     one needs to be careful, since a few operations (lookup,
+     read/write, readdir) can proceed without upcalls.  These must be
+     explicitly blocked.
+
+  5. Upon closing the namecache shall be flushed and disabled.
+
+  6. All memory held by cnodes can be freed without relying on upcalls.
+
+  7. Unmounting the file system can be done without relying on upcalls.
+
+  8. Mounting the Coda filesystem should fail gracefully if Venus cannot
+     get the rootfid or the attributes of the rootfid.  The latter is
+     best implemented by Venus fetching these objects before attempting
+     to mount.
+
+  .. Note::
+
+     NetBSD in particular but also Linux have not implemented the
+     above requirements fully.  For smooth operation this needs to be
+     corrected.
+
+
+
diff --git a/Documentation/filesystems/coda.txt b/Documentation/filesystems/coda.txt
deleted file mode 100644
index 1711ad48e38a..000000000000
--- a/Documentation/filesystems/coda.txt
+++ /dev/null
@@ -1,1676 +0,0 @@
-NOTE: 
-This is one of the technical documents describing a component of
-Coda -- this document describes the client kernel-Venus interface.
-
-For more information:
-  http://www.coda.cs.cmu.edu
-For user level software needed to run Coda:
-  ftp://ftp.coda.cs.cmu.edu
-
-To run Coda you need to get a user level cache manager for the client,
-named Venus, as well as tools to manipulate ACLs, to log in, etc.  The
-client needs to have the Coda filesystem selected in the kernel
-configuration.
-
-The server needs a user level server and at present does not depend on
-kernel support.
-
-
-
-
-
-
-
-  The Venus kernel interface
-  Peter J. Braam
-  v1.0, Nov 9, 1997
-
-  This document describes the communication between Venus and kernel
-  level filesystem code needed for the operation of the Coda file sys-
-  tem.  This document version is meant to describe the current interface
-  (version 1.0) as well as improvements we envisage.
-  ______________________________________________________________________
-
-  Table of Contents
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-  1. Introduction
-
-  2. Servicing Coda filesystem calls
-
-  3. The message layer
-
-     3.1 Implementation details
-
-  4. The interface at the call level
-
-     4.1 Data structures shared by the kernel and Venus
-     4.2 The pioctl interface
-     4.3 root
-     4.4 lookup
-     4.5 getattr
-     4.6 setattr
-     4.7 access
-     4.8 create
-     4.9 mkdir
-     4.10 link
-     4.11 symlink
-     4.12 remove
-     4.13 rmdir
-     4.14 readlink
-     4.15 open
-     4.16 close
-     4.17 ioctl
-     4.18 rename
-     4.19 readdir
-     4.20 vget
-     4.21 fsync
-     4.22 inactive
-     4.23 rdwr
-     4.24 odymount
-     4.25 ody_lookup
-     4.26 ody_expand
-     4.27 prefetch
-     4.28 signal
-
-  5. The minicache and downcalls
-
-     5.1 INVALIDATE
-     5.2 FLUSH
-     5.3 PURGEUSER
-     5.4 ZAPFILE
-     5.5 ZAPDIR
-     5.6 ZAPVNODE
-     5.7 PURGEFID
-     5.8 REPLACE
-
-  6. Initialization and cleanup
-
-     6.1 Requirements
-
-
-  ______________________________________________________________________
-  0wpage
-
-  11..  IInnttrroodduuccttiioonn
-
-
-
-  A key component in the Coda Distributed File System is the cache
-  manager, _V_e_n_u_s.
-
-
-  When processes on a Coda enabled system access files in the Coda
-  filesystem, requests are directed at the filesystem layer in the
-  operating system. The operating system will communicate with Venus to
-  service the request for the process.  Venus manages a persistent
-  client cache and makes remote procedure calls to Coda file servers and
-  related servers (such as authentication servers) to service these
-  requests it receives from the operating system.  When Venus has
-  serviced a request it replies to the operating system with appropriate
-  return codes, and other data related to the request.  Optionally the
-  kernel support for Coda may maintain a minicache of recently processed
-  requests to limit the number of interactions with Venus.  Venus
-  possesses the facility to inform the kernel when elements from its
-  minicache are no longer valid.
-
-  This document describes precisely this communication between the
-  kernel and Venus.  The definitions of so called upcalls and downcalls
-  will be given with the format of the data they handle. We shall also
-  describe the semantic invariants resulting from the calls.
-
-  Historically Coda was implemented in a BSD file system in Mach 2.6.
-  The interface between the kernel and Venus is very similar to the BSD
-  VFS interface.  Similar functionality is provided, and the format of
-  the parameters and returned data is very similar to the BSD VFS.  This
-  leads to an almost natural environment for implementing a kernel-level
-  filesystem driver for Coda in a BSD system.  However, other operating
-  systems such as Linux and Windows 95 and NT have virtual filesystem
-  with different interfaces.
-
-  To implement Coda on these systems some reverse engineering of the
-  Venus/Kernel protocol is necessary.  Also it came to light that other
-  systems could profit significantly from certain small optimizations
-  and modifications to the protocol. To facilitate this work as well as
-  to make future ports easier, communication between Venus and the
-  kernel should be documented in great detail.  This is the aim of this
-  document.
-
-  0wpage
-
-  22..  SSeerrvviicciinngg CCooddaa ffiilleessyysstteemm ccaallllss
-
-  The service of a request for a Coda file system service originates in
-  a process PP which accessing a Coda file. It makes a system call which
-  traps to the OS kernel. Examples of such calls trapping to the kernel
-  are _r_e_a_d_, _w_r_i_t_e_, _o_p_e_n_, _c_l_o_s_e_, _c_r_e_a_t_e_, _m_k_d_i_r_, _r_m_d_i_r_, _c_h_m_o_d in a Unix
-  context.  Similar calls exist in the Win32 environment, and are named
-  _C_r_e_a_t_e_F_i_l_e_, .
-
-  Generally the operating system handles the request in a virtual
-  filesystem (VFS) layer, which is named I/O Manager in NT and IFS
-  manager in Windows 95.  The VFS is responsible for partial processing
-  of the request and for locating the specific filesystem(s) which will
-  service parts of the request.  Usually the information in the path
-  assists in locating the correct FS drivers.  Sometimes after extensive
-  pre-processing, the VFS starts invoking exported routines in the FS
-  driver.  This is the point where the FS specific processing of the
-  request starts, and here the Coda specific kernel code comes into
-  play.
-
-  The FS layer for Coda must expose and implement several interfaces.
-  First and foremost the VFS must be able to make all necessary calls to
-  the Coda FS layer, so the Coda FS driver must expose the VFS interface
-  as applicable in the operating system. These differ very significantly
-  among operating systems, but share features such as facilities to
-  read/write and create and remove objects.  The Coda FS layer services
-  such VFS requests by invoking one or more well defined services
-  offered by the cache manager Venus.  When the replies from Venus have
-  come back to the FS driver, servicing of the VFS call continues and
-  finishes with a reply to the kernel's VFS. Finally the VFS layer
-  returns to the process.
-
-  As a result of this design a basic interface exposed by the FS driver
-  must allow Venus to manage message traffic.  In particular Venus must
-  be able to retrieve and place messages and to be notified of the
-  arrival of a new message. The notification must be through a mechanism
-  which does not block Venus since Venus must attend to other tasks even
-  when no messages are waiting or being processed.
-
-
-
-
-
-
-                     Interfaces of the Coda FS Driver
-
-  Furthermore the FS layer provides for a special path of communication
-  between a user process and Venus, called the pioctl interface. The
-  pioctl interface is used for Coda specific services, such as
-  requesting detailed information about the persistent cache managed by
-  Venus. Here the involvement of the kernel is minimal.  It identifies
-  the calling process and passes the information on to Venus.  When
-  Venus replies the response is passed back to the caller in unmodified
-  form.
-
-  Finally Venus allows the kernel FS driver to cache the results from
-  certain services.  This is done to avoid excessive context switches
-  and results in an efficient system.  However, Venus may acquire
-  information, for example from the network which implies that cached
-  information must be flushed or replaced. Venus then makes a downcall
-  to the Coda FS layer to request flushes or updates in the cache.  The
-  kernel FS driver handles such requests synchronously.
-
-  Among these interfaces the VFS interface and the facility to place,
-  receive and be notified of messages are platform specific.  We will
-  not go into the calls exported to the VFS layer but we will state the
-  requirements of the message exchange mechanism.
-
-  0wpage
-
-  33..  TThhee mmeessssaaggee llaayyeerr
-
-
-
-  At the lowest level the communication between Venus and the FS driver
-  proceeds through messages.  The synchronization between processes
-  requesting Coda file service and Venus relies on blocking and waking
-  up processes.  The Coda FS driver processes VFS- and pioctl-requests
-  on behalf of a process P, creates messages for Venus, awaits replies
-  and finally returns to the caller.  The implementation of the exchange
-  of messages is platform specific, but the semantics have (so far)
-  appeared to be generally applicable.  Data buffers are created by the
-  FS Driver in kernel memory on behalf of P and copied to user memory in
-  Venus.
-
-  The FS Driver while servicing P makes upcalls to Venus.  Such an
-  upcall is dispatched to Venus by creating a message structure.  The
-  structure contains the identification of P, the message sequence
-  number, the size of the request and a pointer to the data in kernel
-  memory for the request.  Since the data buffer is re-used to hold the
-  reply from Venus, there is a field for the size of the reply.  A flags
-  field is used in the message to precisely record the status of the
-  message.  Additional platform dependent structures involve pointers to
-  determine the position of the message on queues and pointers to
-  synchronization objects.  In the upcall routine the message structure
-  is filled in, flags are set to 0, and it is placed on the _p_e_n_d_i_n_g
-  queue.  The routine calling upcall is responsible for allocating the
-  data buffer; its structure will be described in the next section.
-
-  A facility must exist to notify Venus that the message has been
-  created, and implemented using available synchronization objects in
-  the OS. This notification is done in the upcall context of the process
-  P. When the message is on the pending queue, process P cannot proceed
-  in upcall.  The (kernel mode) processing of P in the filesystem
-  request routine must be suspended until Venus has replied.  Therefore
-  the calling thread in P is blocked in upcall.  A pointer in the
-  message structure will locate the synchronization object on which P is
-  sleeping.
-
-  Venus detects the notification that a message has arrived, and the FS
-  driver allow Venus to retrieve the message with a getmsg_from_kernel
-  call. This action finishes in the kernel by putting the message on the
-  queue of processing messages and setting flags to READ.  Venus is
-  passed the contents of the data buffer. The getmsg_from_kernel call
-  now returns and Venus processes the request.
-
-  At some later point the FS driver receives a message from Venus,
-  namely when Venus calls sendmsg_to_kernel.  At this moment the Coda FS
-  driver looks at the contents of the message and decides if:
-
-
-  +o  the message is a reply for a suspended thread P.  If so it removes
-     the message from the processing queue and marks the message as
-     WRITTEN.  Finally, the FS driver unblocks P (still in the kernel
-     mode context of Venus) and the sendmsg_to_kernel call returns to
-     Venus.  The process P will be scheduled at some point and continues
-     processing its upcall with the data buffer replaced with the reply
-     from Venus.
-
-  +o  The message is a _d_o_w_n_c_a_l_l.  A downcall is a request from Venus to
-     the FS Driver. The FS driver processes the request immediately
-     (usually a cache eviction or replacement) and when it finishes
-     sendmsg_to_kernel returns.
-
-  Now P awakes and continues processing upcall.  There are some
-  subtleties to take account of. First P will determine if it was woken
-  up in upcall by a signal from some other source (for example an
-  attempt to terminate P) or as is normally the case by Venus in its
-  sendmsg_to_kernel call.  In the normal case, the upcall routine will
-  deallocate the message structure and return.  The FS routine can proceed
-  with its processing.
-
-
-
-
-
-
-
-                      Sleeping and IPC arrangements
-
-  In case P is woken up by a signal and not by Venus, it will first look
-  at the flags field.  If the message is not yet READ, the process P can
-  handle its signal without notifying Venus.  If Venus has READ, and
-  the request should not be processed, P can send Venus a signal message
-  to indicate that it should disregard the previous message.  Such
-  signals are put in the queue at the head, and read first by Venus.  If
-  the message is already marked as WRITTEN it is too late to stop the
-  processing.  The VFS routine will now continue.  (-- If a VFS request
-  involves more than one upcall, this can lead to complicated state, an
-  extra field "handle_signals" could be added in the message structure
-  to indicate points of no return have been passed.--)
-
-
-
-  33..11..  IImmpplleemmeennttaattiioonn ddeettaaiillss
-
-  The Unix implementation of this mechanism has been through the
-  implementation of a character device associated with Coda.  Venus
-  retrieves messages by doing a read on the device, replies are sent
-  with a write and notification is through the select system call on the
-  file descriptor for the device.  The process P is kept waiting on an
-  interruptible wait queue object.
-
-  In Windows NT and the DPMI Windows 95 implementation a DeviceIoControl
-  call is used.  The DeviceIoControl call is designed to copy buffers
-  from user memory to kernel memory with OPCODES. The sendmsg_to_kernel
-  is issued as a synchronous call, while the getmsg_from_kernel call is
-  asynchronous.  Windows EventObjects are used for notification of
-  message arrival.  The process P is kept waiting on a KernelEvent
-  object in NT and a semaphore in Windows 95.
-
-  0wpage
-
-  44..  TThhee iinntteerrffaaccee aatt tthhee ccaallll lleevveell
-
-
-  This section describes the upcalls a Coda FS driver can make to Venus.
-  Each of these upcalls make use of two structures: inputArgs and
-  outputArgs.   In pseudo BNF form the structures take the following
-  form:
-
-
-  struct inputArgs {
-      u_long opcode;
-      u_long unique;     /* Keep multiple outstanding msgs distinct */
-      u_short pid;                 /* Common to all */
-      u_short pgid;                /* Common to all */
-      struct CodaCred cred;        /* Common to all */
-
-      <union "in" of call dependent parts of inputArgs>
-  };
-
-  struct outputArgs {
-      u_long opcode;
-      u_long unique;       /* Keep multiple outstanding msgs distinct */
-      u_long result;
-
-      <union "out" of call dependent parts of inputArgs>
-  };
-
-
-
-  Before going on let us elucidate the role of the various fields. The
-  inputArgs start with the opcode which defines the type of service
-  requested from Venus. There are approximately 30 upcalls at present
-  which we will discuss.   The unique field labels the inputArg with a
-  unique number which will identify the message uniquely.  A process and
-  process group id are passed.  Finally the credentials of the caller
-  are included.
-
-  Before delving into the specific calls we need to discuss a variety of
-  data structures shared by the kernel and Venus.
-
-
-
-
-  44..11..  DDaattaa ssttrruuccttuurreess sshhaarreedd bbyy tthhee kkeerrnneell aanndd VVeennuuss
-
-
-  The CodaCred structure defines a variety of user and group ids as
-  they are set for the calling process. The vuid_t and vgid_t are 32 bit
-  unsigned integers.  It also defines group membership in an array.  On
-  Unix the CodaCred has proven sufficient to implement good security
-  semantics for Coda but the structure may have to undergo modification
-  for the Windows environment when these mature.
-
-  struct CodaCred {
-      vuid_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, effective, set, fs uid */
-      vgid_t cr_gid, cr_egid, cr_sgid, cr_fsgid; /* same for groups */
-      vgid_t cr_groups[NGROUPS];        /* Group membership for caller */
-  };
-
-
-
-  NNOOTTEE It is questionable if we need CodaCreds in Venus. Finally Venus
-  doesn't know about groups, although it does create files with the
-  default uid/gid.  Perhaps the list of group membership is superfluous.
-
-
-  The next item is the fundamental identifier used to identify Coda
-  files, the ViceFid.  A fid of a file uniquely defines a file or
-  directory in the Coda filesystem within a _c_e_l_l.   (-- A _c_e_l_l is a
-  group of Coda servers acting under the aegis of a single system
-  control machine or SCM. See the Coda Administration manual for a
-  detailed description of the role of the SCM.--)
-
-
-  typedef struct ViceFid {
-      VolumeId Volume;
-      VnodeId Vnode;
-      Unique_t Unique;
-  } ViceFid;
-
-
-
-  Each of the constituent fields: VolumeId, VnodeId and Unique_t are
-  unsigned 32 bit integers.  We envisage that a further field will need
-  to be prefixed to identify the Coda cell; this will probably take the
-  form of a Ipv6 size IP address naming the Coda cell through DNS.
-
-  The next important structure shared between Venus and the kernel is
-  the attributes of the file.  The following structure is used to
-  exchange information.  It has room for future extensions such as
-  support for device files (currently not present in Coda).
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-  struct coda_timespec {
-          int64_t         tv_sec;         /* seconds */
-          long            tv_nsec;        /* nanoseconds */
-  };
-
-  struct coda_vattr {
-          enum coda_vtype va_type;        /* vnode type (for create) */
-          u_short         va_mode;        /* files access mode and type */
-          short           va_nlink;       /* number of references to file */
-          vuid_t          va_uid;         /* owner user id */
-          vgid_t          va_gid;         /* owner group id */
-          long            va_fsid;        /* file system id (dev for now) */
-          long            va_fileid;      /* file id */
-          u_quad_t        va_size;        /* file size in bytes */
-          long            va_blocksize;   /* blocksize preferred for i/o */
-          struct coda_timespec va_atime;  /* time of last access */
-          struct coda_timespec va_mtime;  /* time of last modification */
-          struct coda_timespec va_ctime;  /* time file changed */
-          u_long          va_gen;         /* generation number of file */
-          u_long          va_flags;       /* flags defined for file */
-          dev_t           va_rdev;        /* device special file represents */
-          u_quad_t        va_bytes;       /* bytes of disk space held by file */
-          u_quad_t        va_filerev;     /* file modification number */
-          u_int           va_vaflags;     /* operations flags, see below */
-          long            va_spare;       /* remain quad aligned */
-  };
-
-
-
-
-  44..22..  TThhee ppiiooccttll iinntteerrffaaccee
-
-
-  Coda specific requests can be made by application through the pioctl
-  interface. The pioctl is implemented as an ordinary ioctl on a
-  fictitious file /coda/.CONTROL.  The pioctl call opens this file, gets
-  a file handle and makes the ioctl call. Finally it closes the file.
-
-  The kernel involvement in this is limited to providing the facility to
-  open and close and pass the ioctl message _a_n_d to verify that a path in
-  the pioctl data buffers is a file in a Coda filesystem.
-
-  The kernel is handed a data packet of the form:
-
-      struct {
-          const char *path;
-          struct ViceIoctl vidata;
-          int follow;
-      } data;
-
-
-
-  where
-
-
-  struct ViceIoctl {
-          caddr_t in, out;        /* Data to be transferred in, or out */
-          short in_size;          /* Size of input buffer <= 2K */
-          short out_size;         /* Maximum size of output buffer, <= 2K */
-  };
-
-
-
-  The path must be a Coda file, otherwise the ioctl upcall will not be
-  made.
-
-  NNOOTTEE  The data structures and code are a mess.  We need to clean this
-  up.
-
-  We now proceed to document the individual calls:
-
-  0wpage
-
-  44..33..  rroooott
-
-
-  AArrgguummeennttss
-
-     iinn empty
-
-     oouutt
-
-                struct cfs_root_out {
-                    ViceFid VFid;
-                } cfs_root;
-
-
-
-  DDeessccrriippttiioonn This call is made to Venus during the initialization of
-  the Coda filesystem. If the result is zero, the cfs_root structure
-  contains the ViceFid of the root of the Coda filesystem. If a non-zero
-  result is generated, its value is a platform dependent error code
-  indicating the difficulty Venus encountered in locating the root of
-  the Coda filesystem.
-
-  0wpage
-
-  44..44..  llooookkuupp
-
-
-  SSuummmmaarryy Find the ViceFid and type of an object in a directory if it
-  exists.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct  cfs_lookup_in {
-                    ViceFid     VFid;
-                    char        *name;          /* Place holder for data. */
-                } cfs_lookup;
-
-
-
-     oouutt
-
-                struct cfs_lookup_out {
-                    ViceFid VFid;
-                    int vtype;
-                } cfs_lookup;
-
-
-
-  DDeessccrriippttiioonn This call is made to determine the ViceFid and filetype of
-  a directory entry.  The directory entry requested carries name name
-  and Venus will search the directory identified by cfs_lookup_in.VFid.
-  The result may indicate that the name does not exist, or that
-  difficulty was encountered in finding it (e.g. due to disconnection).
-  If the result is zero, the field cfs_lookup_out.VFid contains the
-  targets ViceFid and cfs_lookup_out.vtype the coda_vtype giving the
-  type of object the name designates.
-
-  The name of the object is an 8 bit character string of maximum length
-  CFS_MAXNAMLEN, currently set to 256 (including a 0 terminator.)
-
-  It is extremely important to realize that Venus bitwise ors the field
-  cfs_lookup.vtype with CFS_NOCACHE to indicate that the object should
-  not be put in the kernel name cache.
-
-  NNOOTTEE The type of the vtype is currently wrong.  It should be
-  coda_vtype. Linux does not take note of CFS_NOCACHE.  It should.
-
-  0wpage
-
-  44..55..  ggeettaattttrr
-
-
-  SSuummmmaarryy Get the attributes of a file.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_getattr_in {
-                    ViceFid VFid;
-                    struct coda_vattr attr; /* XXXXX */
-                } cfs_getattr;
-
-
-
-     oouutt
-
-                struct cfs_getattr_out {
-                    struct coda_vattr attr;
-                } cfs_getattr;
-
-
-
-  DDeessccrriippttiioonn This call returns the attributes of the file identified by
-  fid.
-
-  EErrrroorrss Errors can occur if the object with fid does not exist, is
-  unaccessible or if the caller does not have permission to fetch
-  attributes.
-
-  NNoottee Many kernel FS drivers (Linux, NT and Windows 95) need to acquire
-  the attributes as well as the Fid for the instantiation of an internal
-  "inode" or "FileHandle".  A significant improvement in performance on
-  such systems could be made by combining the _l_o_o_k_u_p and _g_e_t_a_t_t_r calls
-  both at the Venus/kernel interaction level and at the RPC level.
-
-  The vattr structure included in the input arguments is superfluous and
-  should be removed.
-
-  0wpage
-
-  44..66..  sseettaattttrr
-
-
-  SSuummmmaarryy Set the attributes of a file.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_setattr_in {
-                    ViceFid VFid;
-                    struct coda_vattr attr;
-                } cfs_setattr;
-
-
-
-
-     oouutt
-        empty
-
-  DDeessccrriippttiioonn The structure attr is filled with attributes to be changed
-  in BSD style.  Attributes not to be changed are set to -1, apart from
-  vtype which is set to VNON. Other are set to the value to be assigned.
-  The only attributes which the FS driver may request to change are the
-  mode, owner, groupid, atime, mtime and ctime.  The return value
-  indicates success or failure.
-
-  EErrrroorrss A variety of errors can occur.  The object may not exist, may
-  be inaccessible, or permission may not be granted by Venus.
-
-  0wpage
-
-  44..77..  aacccceessss
-
-
-  SSuummmmaarryy
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_access_in {
-                    ViceFid     VFid;
-                    int flags;
-                } cfs_access;
-
-
-
-     oouutt
-        empty
-
-  DDeessccrriippttiioonn Verify if access to the object identified by VFid for
-  operations described by flags is permitted.  The result indicates if
-  access will be granted.  It is important to remember that Coda uses
-  ACLs to enforce protection and that ultimately the servers, not the
-  clients enforce the security of the system.  The result of this call
-  will depend on whether a _t_o_k_e_n is held by the user.
-
-  EErrrroorrss The object may not exist, or the ACL describing the protection
-  may not be accessible.
-
-  0wpage
-
-  44..88..  ccrreeaattee
-
-
-  SSuummmmaarryy Invoked to create a file
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_create_in {
-                    ViceFid VFid;
-                    struct coda_vattr attr;
-                    int excl;
-                    int mode;
-                    char        *name;          /* Place holder for data. */
-                } cfs_create;
-
-
-
-
-     oouutt
-
-                struct cfs_create_out {
-                    ViceFid VFid;
-                    struct coda_vattr attr;
-                } cfs_create;
-
-
-
-  DDeessccrriippttiioonn  This upcall is invoked to request creation of a file.
-  The file will be created in the directory identified by VFid, its name
-  will be name, and the mode will be mode.  If excl is set an error will
-  be returned if the file already exists.  If the size field in attr is
-  set to zero the file will be truncated.  The uid and gid of the file
-  are set by converting the CodaCred to a uid using a macro CRTOUID
-  (this macro is platform dependent).  Upon success the VFid and
-  attributes of the file are returned.  The Coda FS Driver will normally
-  instantiate a vnode, inode or file handle at kernel level for the new
-  object.
-
-
-  EErrrroorrss A variety of errors can occur. Permissions may be insufficient.
-  If the object exists and is not a file the error EISDIR is returned
-  under Unix.
-
-  NNOOTTEE The packing of parameters is very inefficient and appears to
-  indicate confusion between the system call creat and the VFS operation
-  create. The VFS operation create is only called to create new objects.
-  This create call differs from the Unix one in that it is not invoked
-  to return a file descriptor. The truncate and exclusive options,
-  together with the mode, could simply be part of the mode as it is
-  under Unix.  There should be no flags argument; this is used in open
-  (2) to return a file descriptor for READ or WRITE mode.
-
-  The attributes of the directory should be returned too, since the size
-  and mtime changed.
-
-  0wpage
-
-  44..99..  mmkkddiirr
-
-
-  SSuummmmaarryy Create a new directory.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_mkdir_in {
-                    ViceFid     VFid;
-                    struct coda_vattr attr;
-                    char        *name;          /* Place holder for data. */
-                } cfs_mkdir;
-
-
-
-     oouutt
-
-                struct cfs_mkdir_out {
-                    ViceFid VFid;
-                    struct coda_vattr attr;
-                } cfs_mkdir;
-
-
-
-
-  DDeessccrriippttiioonn This call is similar to create but creates a directory.
-  Only the mode field in the input parameters is used for creation.
-  Upon successful creation, the attr returned contains the attributes of
-  the new directory.
-
-  EErrrroorrss As for create.
-
-  NNOOTTEE The input parameter should be changed to mode instead of
-  attributes.
-
-  The attributes of the parent should be returned since the size and
-  mtime changes.
-
-  0wpage
-
-  44..1100..  lliinnkk
-
-
-  SSuummmmaarryy Create a link to an existing file.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_link_in {
-                    ViceFid sourceFid;          /* cnode to link *to* */
-                    ViceFid destFid;            /* Directory in which to place link */
-                    char        *tname;         /* Place holder for data. */
-                } cfs_link;
-
-
-
-     oouutt
-        empty
-
-  DDeessccrriippttiioonn This call creates a link to the sourceFid in the directory
-  identified by destFid with name tname.  The source must reside in the
-  target's parent, i.e. the source must be have parent destFid, i.e. Coda
-  does not support cross directory hard links.  Only the return value is
-  relevant.  It indicates success or the type of failure.
-
-  EErrrroorrss The usual errors can occur.0wpage
-
-  44..1111..  ssyymmlliinnkk
-
-
-  SSuummmmaarryy create a symbolic link
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_symlink_in {
-                    ViceFid     VFid;          /* Directory to put symlink in */
-                    char        *srcname;
-                    struct coda_vattr attr;
-                    char        *tname;
-                } cfs_symlink;
-
-
-
-     oouutt
-        none
-
-  DDeessccrriippttiioonn Create a symbolic link. The link is to be placed in the
-  directory identified by VFid and named tname.  It should point to the
-  pathname srcname.  The attributes of the newly created object are to
-  be set to attr.
-
-  EErrrroorrss
-
-  NNOOTTEE The attributes of the target directory should be returned since
-  its size changed.
-
-  0wpage
-
-  44..1122..  rreemmoovvee
-
-
-  SSuummmmaarryy Remove a file
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_remove_in {
-                    ViceFid     VFid;
-                    char        *name;          /* Place holder for data. */
-                } cfs_remove;
-
-
-
-     oouutt
-        none
-
-  DDeessccrriippttiioonn  Remove file named cfs_remove_in.name in directory
-  identified by   VFid.
-
-  EErrrroorrss
-
-  NNOOTTEE The attributes of the directory should be returned since its
-  mtime and size may change.
-
-  0wpage
-
-  44..1133..  rrmmddiirr
-
-
-  SSuummmmaarryy Remove a directory
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_rmdir_in {
-                    ViceFid     VFid;
-                    char        *name;          /* Place holder for data. */
-                } cfs_rmdir;
-
-
-
-     oouutt
-        none
-
-  DDeessccrriippttiioonn Remove the directory with name name from the directory
-  identified by VFid.
-
-  EErrrroorrss
-
-  NNOOTTEE The attributes of the parent directory should be returned since
-  its mtime and size may change.
-
-  0wpage
-
-  44..1144..  rreeaaddlliinnkk
-
-
-  SSuummmmaarryy Read the value of a symbolic link.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_readlink_in {
-                    ViceFid VFid;
-                } cfs_readlink;
-
-
-
-     oouutt
-
-                struct cfs_readlink_out {
-                    int count;
-                    caddr_t     data;           /* Place holder for data. */
-                } cfs_readlink;
-
-
-
-  DDeessccrriippttiioonn This routine reads the contents of symbolic link
-  identified by VFid into the buffer data.  The buffer data must be able
-  to hold any name up to CFS_MAXNAMLEN (PATH or NAM??).
-
-  EErrrroorrss No unusual errors.
-
-  0wpage
-
-  44..1155..  ooppeenn
-
-
-  SSuummmmaarryy Open a file.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_open_in {
-                    ViceFid     VFid;
-                    int flags;
-                } cfs_open;
-
-
-
-     oouutt
-
-                struct cfs_open_out {
-                    dev_t       dev;
-                    ino_t       inode;
-                } cfs_open;
-
-
-
-  DDeessccrriippttiioonn  This request asks Venus to place the file identified by
-  VFid in its cache and to note that the calling process wishes to open
-  it with flags as in open(2).  The return value to the kernel differs
-  for Unix and Windows systems.  For Unix systems the Coda FS Driver is
-  informed of the device and inode number of the container file in the
-  fields dev and inode.  For Windows the path of the container file is
-  returned to the kernel.
-  EErrrroorrss
-
-  NNOOTTEE Currently the cfs_open_out structure is not properly adapted to
-  deal with the Windows case.  It might be best to implement two
-  upcalls, one to open aiming at a container file name, the other at a
-  container file inode.
-
-  0wpage
-
-  44..1166..  cclloossee
-
-
-  SSuummmmaarryy Close a file, update it on the servers.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_close_in {
-                    ViceFid     VFid;
-                    int flags;
-                } cfs_close;
-
-
-
-     oouutt
-        none
-
-  DDeessccrriippttiioonn Close the file identified by VFid.
-
-  EErrrroorrss
-
-  NNOOTTEE The flags argument is bogus and not used.  However, Venus' code
-  has room to deal with an execp input field, probably this field should
-  be used to inform Venus that the file was closed but is still memory
-  mapped for execution.  There are comments about fetching versus not
-  fetching the data in Venus vproc_vfscalls.  This seems silly.  If a
-  file is being closed, the data in the container file is to be the new
-  data.  Here again the execp flag might be in play to create confusion:
-  currently Venus might think a file can be flushed from the cache when
-  it is still memory mapped.  This needs to be understood.
-
-  0wpage
-
-  44..1177..  iiooccttll
-
-
-  SSuummmmaarryy Do an ioctl on a file. This includes the pioctl interface.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_ioctl_in {
-                    ViceFid VFid;
-                    int cmd;
-                    int len;
-                    int rwflag;
-                    char *data;                 /* Place holder for data. */
-                } cfs_ioctl;
-
-
-
-     oouutt
-
-
-                struct cfs_ioctl_out {
-                    int len;
-                    caddr_t     data;           /* Place holder for data. */
-                } cfs_ioctl;
-
-
-
-  DDeessccrriippttiioonn Do an ioctl operation on a file.  The command, len and
-  data arguments are filled as usual.  flags is not used by Venus.
-
-  EErrrroorrss
-
-  NNOOTTEE Another bogus parameter.  flags is not used.  What is the
-  business about PREFETCHING in the Venus code?
-
-
-  0wpage
-
-  44..1188..  rreennaammee
-
-
-  SSuummmmaarryy Rename a fid.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_rename_in {
-                    ViceFid     sourceFid;
-                    char        *srcname;
-                    ViceFid destFid;
-                    char        *destname;
-                } cfs_rename;
-
-
-
-     oouutt
-        none
-
-  DDeessccrriippttiioonn  Rename the object with name srcname in directory
-  sourceFid to destname in destFid.   It is important that the names
-  srcname and destname are 0 terminated strings.  Strings in Unix
-  kernels are not always null terminated.
-
-  EErrrroorrss
-
-  0wpage
-
-  44..1199..  rreeaaddddiirr
-
-
-  SSuummmmaarryy Read directory entries.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_readdir_in {
-                    ViceFid     VFid;
-                    int count;
-                    int offset;
-                } cfs_readdir;
-
-
-
-
-     oouutt
-
-                struct cfs_readdir_out {
-                    int size;
-                    caddr_t     data;           /* Place holder for data. */
-                } cfs_readdir;
-
-
-
-  DDeessccrriippttiioonn Read directory entries from VFid starting at offset and
-  read at most count bytes.  Returns the data in data and returns
-  the size in size.
-
-  EErrrroorrss
-
-  NNOOTTEE This call is not used.  Readdir operations exploit container
-  files.  We will re-evaluate this during the directory revamp which is
-  about to take place.
-
-  0wpage
-
-  44..2200..  vvggeett
-
-
-  SSuummmmaarryy instructs Venus to do an FSDB->Get.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_vget_in {
-                    ViceFid VFid;
-                } cfs_vget;
-
-
-
-     oouutt
-
-                struct cfs_vget_out {
-                    ViceFid VFid;
-                    int vtype;
-                } cfs_vget;
-
-
-
-  DDeessccrriippttiioonn This upcall asks Venus to do a get operation on an fsobj
-  labelled by VFid.
-
-  EErrrroorrss
-
-  NNOOTTEE This operation is not used.  However, it is extremely useful
-  since it can be used to deal with read/write memory mapped files.
-  These can be "pinned" in the Venus cache using vget and released with
-  inactive.
-
-  0wpage
-
-  44..2211..  ffssyynncc
-
-
-  SSuummmmaarryy Tell Venus to update the RVM attributes of a file.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_fsync_in {
-                    ViceFid VFid;
-                } cfs_fsync;
-
-
-
-     oouutt
-        none
-
-  DDeessccrriippttiioonn Ask Venus to update RVM attributes of object VFid. This
-  should be called as part of kernel level fsync type calls.  The
-  result indicates if the syncing was successful.
-
-  EErrrroorrss
-
-  NNOOTTEE Linux does not implement this call. It should.
-
-  0wpage
-
-  44..2222..  iinnaaccttiivvee
-
-
-  SSuummmmaarryy Tell Venus a vnode is no longer in use.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_inactive_in {
-                    ViceFid VFid;
-                } cfs_inactive;
-
-
-
-     oouutt
-        none
-
-  DDeessccrriippttiioonn This operation returns EOPNOTSUPP.
-
-  EErrrroorrss
-
-  NNOOTTEE This should perhaps be removed.
-
-  0wpage
-
-  44..2233..  rrddwwrr
-
-
-  SSuummmmaarryy Read or write from a file
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct cfs_rdwr_in {
-                    ViceFid     VFid;
-                    int rwflag;
-                    int count;
-                    int offset;
-                    int ioflag;
-                    caddr_t     data;           /* Place holder for data. */
-                } cfs_rdwr;
-
-
-
-
-     oouutt
-
-                struct cfs_rdwr_out {
-                    int rwflag;
-                    int count;
-                    caddr_t     data;   /* Place holder for data. */
-                } cfs_rdwr;
-
-
-
-  DDeessccrriippttiioonn This upcall asks Venus to read or write from a file.
-
-  EErrrroorrss
-
-  NNOOTTEE It should be removed since it is against the Coda philosophy that
-  read/write operations never reach Venus.  I have been told the
-  operation does not work.  It is not currently used.
-
-
-  0wpage
-
-  44..2244..  ooddyymmoouunntt
-
-
-  SSuummmmaarryy Allows mounting multiple Coda "filesystems" on one Unix mount
-  point.
-
-  AArrgguummeennttss
-
-     iinn
-
-                struct ody_mount_in {
-                    char        *name;          /* Place holder for data. */
-                } ody_mount;
-
-
-
-     oouutt
-
-                struct ody_mount_out {
-                    ViceFid VFid;
-                } ody_mount;
-
-
-
-  DDeessccrriippttiioonn  Asks Venus to return the rootfid of a Coda system named
-  name.  The fid is returned in VFid.
-
-  EErrrroorrss
-
-  NNOOTTEE This call was used by David for dynamic sets.  It should be
-  removed since it causes a jungle of pointers in the VFS mounting area.
-  It is not used by Coda proper.  Call is not implemented by Venus.
-
-  0wpage
-
-  44..2255..  ooddyy__llooookkuupp
-
-
-  SSuummmmaarryy Looks up something.
-
-  AArrgguummeennttss
-
-     iinn irrelevant
-
-
-     oouutt
-        irrelevant
-
-  DDeessccrriippttiioonn
-
-  EErrrroorrss
-
-  NNOOTTEE Gut it. Call is not implemented by Venus.
-
-  0wpage
-
-  44..2266..  ooddyy__eexxppaanndd
-
-
-  SSuummmmaarryy expands something in a dynamic set.
-
-  AArrgguummeennttss
-
-     iinn irrelevant
-
-     oouutt
-        irrelevant
-
-  DDeessccrriippttiioonn
-
-  EErrrroorrss
-
-  NNOOTTEE Gut it.  Call is not implemented by Venus.
-
-  0wpage
-
-  44..2277..  pprreeffeettcchh
-
-
-  SSuummmmaarryy Prefetch a dynamic set.
-
-  AArrgguummeennttss
-
-     iinn Not documented.
-
-     oouutt
-        Not documented.
-
-  DDeessccrriippttiioonn  Venus worker.cc has support for this call, although it is
-  noted that it doesn't work.  Not surprising, since the kernel does not
-  have support for it. (ODY_PREFETCH is not a defined operation).
-
-  EErrrroorrss
-
-  NNOOTTEE Gut it. It isn't working and isn't used by Coda.
-
-
-  0wpage
-
-  44..2288..  ssiiggnnaall
-
-
-  SSuummmmaarryy Send Venus a signal about an upcall.
-
-  AArrgguummeennttss
-
-     iinn none
-
-     oouutt
-        not applicable.
-
-  DDeessccrriippttiioonn  This is an out-of-band upcall to Venus to inform Venus
-  that the calling process received a signal after Venus read the
-  message from the input queue.  Venus is supposed to clean up the
-  operation.
-
-  EErrrroorrss No reply is given.
-
-  NNOOTTEE We need to better understand what Venus needs to clean up and if
-  it is doing this correctly.  Also we need to handle multiple upcall
-  per system call situations correctly.  It would be important to know
-  what state changes in Venus take place after an upcall for which the
-  kernel is responsible for notifying Venus to clean up (e.g. open
-  definitely is such a state change, but many others are maybe not).
-
-  0wpage
-
-  55..  TThhee mmiinniiccaacchhee aanndd ddoowwnnccaallllss
-
-
-  The Coda FS Driver can cache results of lookup and access upcalls, to
-  limit the frequency of upcalls.  Upcalls carry a price since a process
-  context switch needs to take place.  The counterpart of caching the
-  information is that Venus will notify the FS Driver that cached
-  entries must be flushed or renamed.
-
-  The kernel code generally has to maintain a structure which links the
-  internal file handles (called vnodes in BSD, inodes in Linux and
-  FileHandles in Windows) with the ViceFid's which Venus maintains.  The
-  reason is that frequent translations back and forth are needed in
-  order to make upcalls and use the results of upcalls.  Such linking
-  objects are called ccnnooddeess.
-
-  The current minicache implementations have cache entries which record
-  the following:
-
-  1. the name of the file
-
-  2. the cnode of the directory containing the object
-
-  3. a list of CodaCred's for which the lookup is permitted.
-
-  4. the cnode of the object
-
-  The lookup call in the Coda FS Driver may request the cnode of the
-  desired object from the cache, by passing its name, directory and the
-  CodaCred's of the caller.  The cache will return the cnode or indicate
-  that it cannot be found.  The Coda FS Driver must be careful to
-  invalidate cache entries when it modifies or removes objects.
-
-  When Venus obtains information that indicates that cache entries are
-  no longer valid, it will make a downcall to the kernel.  Downcalls are
-  intercepted by the Coda FS Driver and lead to cache invalidations of
-  the kind described below.  The Coda FS Driver does not return an error
-  unless the downcall data could not be read into kernel memory.
-
-
-  55..11..  IINNVVAALLIIDDAATTEE
-
-
-  No information is available on this call.
-
-
-  55..22..  FFLLUUSSHH
-
-
-
-  AArrgguummeennttss None
-
-  SSuummmmaarryy Flush the name cache entirely.
-
-  DDeessccrriippttiioonn Venus issues this call upon startup and when it dies. This
-  is to prevent stale cache information being held.  Some operating
-  systems allow the kernel name cache to be switched off dynamically.
-  When this is done, this downcall is made.
-
-
-  55..33..  PPUURRGGEEUUSSEERR
-
-
-  AArrgguummeennttss
-
-          struct cfs_purgeuser_out {/* CFS_PURGEUSER is a venus->kernel call */
-              struct CodaCred cred;
-          } cfs_purgeuser;
-
-
-
-  DDeessccrriippttiioonn Remove all entries in the cache carrying the Cred.  This
-  call is issued when tokens for a user expire or are flushed.
-
-
-  55..44..  ZZAAPPFFIILLEE
-
-
-  AArrgguummeennttss
-
-          struct cfs_zapfile_out {  /* CFS_ZAPFILE is a venus->kernel call */
-              ViceFid CodaFid;
-          } cfs_zapfile;
-
-
-
-  DDeessccrriippttiioonn Remove all entries which have the (dir vnode, name) pair.
-  This is issued as a result of an invalidation of cached attributes of
-  a vnode.
-
-  NNOOTTEE Call is not named correctly in NetBSD and Mach.  The minicache
-  zapfile routine takes different arguments. Linux does not implement
-  the invalidation of attributes correctly.
-
-
-
-  55..55..  ZZAAPPDDIIRR
-
-
-  AArrgguummeennttss
-
-          struct cfs_zapdir_out {   /* CFS_ZAPDIR is a venus->kernel call */
-              ViceFid CodaFid;
-          } cfs_zapdir;
-
-
-
-  DDeessccrriippttiioonn Remove all entries in the cache lying in a directory
-  CodaFid, and all children of this directory. This call is issued when
-  Venus receives a callback on the directory.
-
-
-  55..66..  ZZAAPPVVNNOODDEE
-
-
-
-  AArrgguummeennttss
-
-          struct cfs_zapvnode_out { /* CFS_ZAPVNODE is a venus->kernel call */
-              struct CodaCred cred;
-              ViceFid VFid;
-          } cfs_zapvnode;
-
-
-
-  DDeessccrriippttiioonn Remove all entries in the cache carrying the cred and VFid
-  as in the arguments. This downcall is probably never issued.
-
-
-  55..77..  PPUURRGGEEFFIIDD
-
-
-  SSuummmmaarryy
-
-  AArrgguummeennttss
-
-          struct cfs_purgefid_out { /* CFS_PURGEFID is a venus->kernel call */
-              ViceFid CodaFid;
-          } cfs_purgefid;
-
-
-
-  DDeessccrriippttiioonn Flush the attribute for the file. If it is a dir (odd
-  vnode), purge its children from the namecache and remove the file from the
-  namecache.
-
-
-
-  55..88..  RREEPPLLAACCEE
-
-
-  SSuummmmaarryy Replace the Fid's for a collection of names.
-
-  AArrgguummeennttss
-
-          struct cfs_replace_out { /* cfs_replace is a venus->kernel call */
-              ViceFid NewFid;
-              ViceFid OldFid;
-          } cfs_replace;
-
-
-
-  DDeessccrriippttiioonn This routine replaces a ViceFid in the name cache with
-  another.  It is added to allow Venus during reintegration to replace
-  locally allocated temp fids while disconnected with global fids even
-  when the reference counts on those fids are not zero.
-
-  0wpage
-
-  66..  IInniittiiaalliizzaattiioonn aanndd cclleeaannuupp
-
-
-  This section gives brief hints as to desirable features for the Coda
-  FS Driver at startup and upon shutdown or Venus failures.  Before
-  entering the discussion it is useful to repeat that the Coda FS Driver
-  maintains the following data:
-
-
-  1. message queues
-
-  2. cnodes
-
-  3. name cache entries
-
-     The name cache entries are entirely private to the driver, so they
-     can easily be manipulated.   The message queues will generally have
-     clear points of initialization and destruction.  The cnodes are
-     much more delicate.  User processes hold reference counts in Coda
-     filesystems and it can be difficult to clean up the cnodes.
-
-  It can expect requests through:
-
-  1. the message subsystem
-
-  2. the VFS layer
-
-  3. pioctl interface
-
-     Currently the _p_i_o_c_t_l passes through the VFS for Coda so we can
-     treat these similarly.
-
-
-  66..11..  RReeqquuiirreemmeennttss
-
-
-  The following requirements should be accommodated:
-
-  1. The message queues should have open and close routines.  On Unix
-     the opening of the character devices are such routines.
-
-  +o  Before opening, no messages can be placed.
-
-  +o  Opening will remove any old messages still pending.
-
-  +o  Close will notify any sleeping processes that their upcall cannot
-     be completed.
-
-  +o  Close will free all memory allocated by the message queues.
-
-
-  2. At open the namecache shall be initialized to empty state.
-
-  3. Before the message queues are open, all VFS operations will fail.
-     Fortunately this can be achieved by making sure than mounting the
-     Coda filesystem cannot succeed before opening.
-
-  4. After closing of the queues, no VFS operations can succeed.  Here
-     one needs to be careful, since a few operations (lookup,
-     read/write, readdir) can proceed without upcalls.  These must be
-     explicitly blocked.
-
-  5. Upon closing the namecache shall be flushed and disabled.
-
-  6. All memory held by cnodes can be freed without relying on upcalls.
-
-  7. Unmounting the file system can be done without relying on upcalls.
-
-  8. Mounting the Coda filesystem should fail gracefully if Venus cannot
-     get the rootfid or the attributes of the rootfid.  The latter is
-     best implemented by Venus fetching these objects before attempting
-     to mount.
-
-  NNOOTTEE  NetBSD in particular but also Linux have not implemented the
-  above requirements fully.  For smooth operation this needs to be
-  corrected.
-
-
-
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 373d695bac65..ec6e308587e1 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -63,6 +63,7 @@ Documentation for filesystem implementations.
    btrfs
    cifs/cifsroot
    ceph
+   coda
    cramfs
    debugfs
    dlmfs
diff --git a/MAINTAINERS b/MAINTAINERS
index a103117dec85..7d2fda93f838 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4203,7 +4203,7 @@ M:	coda@cs.cmu.edu
 L:	codalist@coda.cs.cmu.edu
 S:	Maintained
 W:	http://www.coda.cs.cmu.edu/
-F:	Documentation/filesystems/coda.txt
+F:	Documentation/filesystems/coda.rst
 F:	fs/coda/
 F:	include/linux/coda*.h
 F:	include/uapi/linux/coda*.h
diff --git a/fs/coda/Kconfig b/fs/coda/Kconfig
index ae6759f9594a..c3477eeafb3f 100644
--- a/fs/coda/Kconfig
+++ b/fs/coda/Kconfig
@@ -15,7 +15,7 @@ config CODA_FS
 	  *client*.  You will need user level code as well, both for the
 	  client and server.  Servers are currently user level, i.e. they need
 	  no kernel support.  Please read
-	  <file:Documentation/filesystems/coda.txt> and check out the Coda
+	  <file:Documentation/filesystems/coda.rst> and check out the Coda
 	  home page <http://www.coda.cs.cmu.edu/>.
 
 	  To compile the coda client support as a module, choose M here: the
-- 
cgit v1.2.3


From 01478b833176761e188a465fc7a22bd4e07d040e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:02 +0200
Subject: docs: filesystems: convert devpts.txt to ReST

- Add a SPDX header;
- Add a document title;
- Some whitespace fixes and new line breaks;
- Mark literal blocks as such;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/4ac8f3a7edd4d817acf0d173ead7ef74fe010c6c.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/sysctl/kernel.rst |  2 +-
 Documentation/filesystems/devpts.rst        | 36 +++++++++++++++++++++++++++++
 Documentation/filesystems/devpts.txt        | 26 ---------------------
 Documentation/filesystems/index.rst         |  1 +
 4 files changed, 38 insertions(+), 27 deletions(-)
 create mode 100644 Documentation/filesystems/devpts.rst
 delete mode 100644 Documentation/filesystems/devpts.txt

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 82bfd5892663..706c2158ad25 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -895,7 +895,7 @@ this sysctl interface anymore.
 pty
 ===
 
-See Documentation/filesystems/devpts.txt.
+See Documentation/filesystems/devpts.rst.
 
 
 randomize_va_space
diff --git a/Documentation/filesystems/devpts.rst b/Documentation/filesystems/devpts.rst
new file mode 100644
index 000000000000..a03248ddfb4c
--- /dev/null
+++ b/Documentation/filesystems/devpts.rst
@@ -0,0 +1,36 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+The Devpts Filesystem
+=====================
+
+Each mount of the devpts filesystem is now distinct such that ptys
+and their indicies allocated in one mount are independent from ptys
+and their indicies in all other mounts.
+
+All mounts of the devpts filesystem now create a ``/dev/pts/ptmx`` node
+with permissions ``0000``.
+
+To retain backwards compatibility the a ptmx device node (aka any node
+created with ``mknod name c 5 2``) when opened will look for an instance
+of devpts under the name ``pts`` in the same directory as the ptmx device
+node.
+
+As an option instead of placing a ``/dev/ptmx`` device node at ``/dev/ptmx``
+it is possible to place a symlink to ``/dev/pts/ptmx`` at ``/dev/ptmx`` or
+to bind mount ``/dev/ptx/ptmx`` to ``/dev/ptmx``.  If you opt for using
+the devpts filesystem in this manner devpts should be mounted with
+the ``ptmxmode=0666``, or ``chmod 0666 /dev/pts/ptmx`` should be called.
+
+Total count of pty pairs in all instances is limited by sysctls::
+
+    kernel.pty.max = 4096	- global limit
+    kernel.pty.reserve = 1024	- reserved for filesystems mounted from the initial mount namespace
+    kernel.pty.nr		- current count of ptys
+
+Per-instance limit could be set by adding mount option ``max=<count>``.
+
+This feature was added in kernel 3.4 together with
+``sysctl kernel.pty.reserve``.
+
+In kernels older than 3.4 sysctl ``kernel.pty.max`` works as per-instance limit.
diff --git a/Documentation/filesystems/devpts.txt b/Documentation/filesystems/devpts.txt
deleted file mode 100644
index 9f94fe276dea..000000000000
--- a/Documentation/filesystems/devpts.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Each mount of the devpts filesystem is now distinct such that ptys
-and their indicies allocated in one mount are independent from ptys
-and their indicies in all other mounts.
-
-All mounts of the devpts filesystem now create a /dev/pts/ptmx node
-with permissions 0000.
-
-To retain backwards compatibility the a ptmx device node (aka any node
-created with "mknod name c 5 2") when opened will look for an instance
-of devpts under the name "pts" in the same directory as the ptmx device
-node.
-
-As an option instead of placing a /dev/ptmx device node at /dev/ptmx
-it is possible to place a symlink to /dev/pts/ptmx at /dev/ptmx or
-to bind mount /dev/ptx/ptmx to /dev/ptmx.  If you opt for using
-the devpts filesystem in this manner devpts should be mounted with
-the ptmxmode=0666, or chmod 0666 /dev/pts/ptmx should be called.
-
-Total count of pty pairs in all instances is limited by sysctls:
-kernel.pty.max = 4096		- global limit
-kernel.pty.reserve = 1024	- reserved for filesystems mounted from the initial mount namespace
-kernel.pty.nr			- current count of ptys
-
-Per-instance limit could be set by adding mount option "max=<count>".
-This feature was added in kernel 3.4 together with sysctl kernel.pty.reserve.
-In kernels older than 3.4 sysctl kernel.pty.max works as per-instance limit.
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index ec6e308587e1..43717f11d10b 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -24,6 +24,7 @@ algorithms work.
    splice
    locking
    directory-locking
+   devpts
 
    automount-support
 
-- 
cgit v1.2.3


From b31763cff488344aaabbee99739774aea1636e27 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:03 +0200
Subject: docs: filesystems: convert dnotify.txt to ReST

- Add a SPDX header;
- Add a document title;
- Some whitespace fixes and new line breaks;
- Add table markups;
- Add it to filesystems/index.rst

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/b39d6430d1c28438e833f01cb4597eff78703c75.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/dnotify.rst | 75 +++++++++++++++++++++++++++++++++++
 Documentation/filesystems/dnotify.txt | 70 --------------------------------
 Documentation/filesystems/index.rst   |  1 +
 MAINTAINERS                           |  2 +-
 4 files changed, 77 insertions(+), 71 deletions(-)
 create mode 100644 Documentation/filesystems/dnotify.rst
 delete mode 100644 Documentation/filesystems/dnotify.txt

diff --git a/Documentation/filesystems/dnotify.rst b/Documentation/filesystems/dnotify.rst
new file mode 100644
index 000000000000..a28a1f9ef79c
--- /dev/null
+++ b/Documentation/filesystems/dnotify.rst
@@ -0,0 +1,75 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
+Linux Directory Notification
+============================
+
+	   Stephen Rothwell <sfr@canb.auug.org.au>
+
+The intention of directory notification is to allow user applications
+to be notified when a directory, or any of the files in it, are changed.
+The basic mechanism involves the application registering for notification
+on a directory using a fcntl(2) call and the notifications themselves
+being delivered using signals.
+
+The application decides which "events" it wants to be notified about.
+The currently defined events are:
+
+	=========	=====================================================
+	DN_ACCESS	A file in the directory was accessed (read)
+	DN_MODIFY	A file in the directory was modified (write,truncate)
+	DN_CREATE	A file was created in the directory
+	DN_DELETE	A file was unlinked from directory
+	DN_RENAME	A file in the directory was renamed
+	DN_ATTRIB	A file in the directory had its attributes
+			changed (chmod,chown)
+	=========	=====================================================
+
+Usually, the application must reregister after each notification, but
+if DN_MULTISHOT is or'ed with the event mask, then the registration will
+remain until explicitly removed (by registering for no events).
+
+By default, SIGIO will be delivered to the process and no other useful
+information.  However, if the F_SETSIG fcntl(2) call is used to let the
+kernel know which signal to deliver, a siginfo structure will be passed to
+the signal handler and the si_fd member of that structure will contain the
+file descriptor associated with the directory in which the event occurred.
+
+Preferably the application will choose one of the real time signals
+(SIGRTMIN + <n>) so that the notifications may be queued.  This is
+especially important if DN_MULTISHOT is specified.  Note that SIGRTMIN
+is often blocked, so it is better to use (at least) SIGRTMIN + 1.
+
+Implementation expectations (features and bugs :-))
+---------------------------------------------------
+
+The notification should work for any local access to files even if the
+actual file system is on a remote server.  This implies that remote
+access to files served by local user mode servers should be notified.
+Also, remote accesses to files served by a local kernel NFS server should
+be notified.
+
+In order to make the impact on the file system code as small as possible,
+the problem of hard links to files has been ignored.  So if a file (x)
+exists in two directories (a and b) then a change to the file using the
+name "a/x" should be notified to a program expecting notifications on
+directory "a", but will not be notified to one expecting notifications on
+directory "b".
+
+Also, files that are unlinked, will still cause notifications in the
+last directory that they were linked to.
+
+Configuration
+-------------
+
+Dnotify is controlled via the CONFIG_DNOTIFY configuration option.  When
+disabled, fcntl(fd, F_NOTIFY, ...) will return -EINVAL.
+
+Example
+-------
+See tools/testing/selftests/filesystems/dnotify_test.c for an example.
+
+NOTE
+----
+Beginning with Linux 2.6.13, dnotify has been replaced by inotify.
+See Documentation/filesystems/inotify.rst for more information on it.
diff --git a/Documentation/filesystems/dnotify.txt b/Documentation/filesystems/dnotify.txt
deleted file mode 100644
index 08d575ece45d..000000000000
--- a/Documentation/filesystems/dnotify.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-		Linux Directory Notification
-		============================
-
-	   Stephen Rothwell <sfr@canb.auug.org.au>
-
-The intention of directory notification is to allow user applications
-to be notified when a directory, or any of the files in it, are changed.
-The basic mechanism involves the application registering for notification
-on a directory using a fcntl(2) call and the notifications themselves
-being delivered using signals.
-
-The application decides which "events" it wants to be notified about.
-The currently defined events are:
-
-	DN_ACCESS	A file in the directory was accessed (read)
-	DN_MODIFY	A file in the directory was modified (write,truncate)
-	DN_CREATE	A file was created in the directory
-	DN_DELETE	A file was unlinked from directory
-	DN_RENAME	A file in the directory was renamed
-	DN_ATTRIB	A file in the directory had its attributes
-			changed (chmod,chown)
-
-Usually, the application must reregister after each notification, but
-if DN_MULTISHOT is or'ed with the event mask, then the registration will
-remain until explicitly removed (by registering for no events).
-
-By default, SIGIO will be delivered to the process and no other useful
-information.  However, if the F_SETSIG fcntl(2) call is used to let the
-kernel know which signal to deliver, a siginfo structure will be passed to
-the signal handler and the si_fd member of that structure will contain the
-file descriptor associated with the directory in which the event occurred.
-
-Preferably the application will choose one of the real time signals
-(SIGRTMIN + <n>) so that the notifications may be queued.  This is
-especially important if DN_MULTISHOT is specified.  Note that SIGRTMIN
-is often blocked, so it is better to use (at least) SIGRTMIN + 1.
-
-Implementation expectations (features and bugs :-))
----------------------------
-
-The notification should work for any local access to files even if the
-actual file system is on a remote server.  This implies that remote
-access to files served by local user mode servers should be notified.
-Also, remote accesses to files served by a local kernel NFS server should
-be notified.
-
-In order to make the impact on the file system code as small as possible,
-the problem of hard links to files has been ignored.  So if a file (x)
-exists in two directories (a and b) then a change to the file using the
-name "a/x" should be notified to a program expecting notifications on
-directory "a", but will not be notified to one expecting notifications on
-directory "b".
-
-Also, files that are unlinked, will still cause notifications in the
-last directory that they were linked to.
-
-Configuration
--------------
-
-Dnotify is controlled via the CONFIG_DNOTIFY configuration option.  When
-disabled, fcntl(fd, F_NOTIFY, ...) will return -EINVAL.
-
-Example
--------
-See tools/testing/selftests/filesystems/dnotify_test.c for an example.
-
-NOTE
-----
-Beginning with Linux 2.6.13, dnotify has been replaced by inotify.
-See Documentation/filesystems/inotify.rst for more information on it.
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 43717f11d10b..815101241115 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -25,6 +25,7 @@ algorithms work.
    locking
    directory-locking
    devpts
+   dnotify
 
    automount-support
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 7d2fda93f838..46ba68a07a8f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4996,7 +4996,7 @@ M:	Jan Kara <jack@suse.cz>
 R:	Amir Goldstein <amir73il@gmail.com>
 L:	linux-fsdevel@vger.kernel.org
 S:	Maintained
-F:	Documentation/filesystems/dnotify.txt
+F:	Documentation/filesystems/dnotify.rst
 F:	fs/notify/dnotify/
 F:	include/linux/dnotify.h
 
-- 
cgit v1.2.3


From e6f7df74ec1ab956430db2dcdcaf217ae5cf4ae0 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:04 +0200
Subject: docs: filesystems: convert fiemap.txt to ReST

- Add a SPDX header;
- Some whitespace fixes and new line breaks;
- Mark literal blocks as such;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/9182d49ffca7a0580e32ab24ecf5f8cc8d8924af.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/fiemap.rst | 234 +++++++++++++++++++++++++++++++++++
 Documentation/filesystems/fiemap.txt | 231 ----------------------------------
 Documentation/filesystems/index.rst  |   1 +
 3 files changed, 235 insertions(+), 231 deletions(-)
 create mode 100644 Documentation/filesystems/fiemap.rst
 delete mode 100644 Documentation/filesystems/fiemap.txt

diff --git a/Documentation/filesystems/fiemap.rst b/Documentation/filesystems/fiemap.rst
new file mode 100644
index 000000000000..2a572e7edc08
--- /dev/null
+++ b/Documentation/filesystems/fiemap.rst
@@ -0,0 +1,234 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Fiemap Ioctl
+============
+
+The fiemap ioctl is an efficient method for userspace to get file
+extent mappings. Instead of block-by-block mapping (such as bmap), fiemap
+returns a list of extents.
+
+
+Request Basics
+--------------
+
+A fiemap request is encoded within struct fiemap::
+
+  struct fiemap {
+	__u64	fm_start;	 /* logical offset (inclusive) at
+				  * which to start mapping (in) */
+	__u64	fm_length;	 /* logical length of mapping which
+				  * userspace cares about (in) */
+	__u32	fm_flags;	 /* FIEMAP_FLAG_* flags for request (in/out) */
+	__u32	fm_mapped_extents; /* number of extents that were
+				    * mapped (out) */
+	__u32	fm_extent_count; /* size of fm_extents array (in) */
+	__u32	fm_reserved;
+	struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
+  };
+
+
+fm_start, and fm_length specify the logical range within the file
+which the process would like mappings for. Extents returned mirror
+those on disk - that is, the logical offset of the 1st returned extent
+may start before fm_start, and the range covered by the last returned
+extent may end after fm_length. All offsets and lengths are in bytes.
+
+Certain flags to modify the way in which mappings are looked up can be
+set in fm_flags. If the kernel doesn't understand some particular
+flags, it will return EBADR and the contents of fm_flags will contain
+the set of flags which caused the error. If the kernel is compatible
+with all flags passed, the contents of fm_flags will be unmodified.
+It is up to userspace to determine whether rejection of a particular
+flag is fatal to its operation. This scheme is intended to allow the
+fiemap interface to grow in the future but without losing
+compatibility with old software.
+
+fm_extent_count specifies the number of elements in the fm_extents[] array
+that can be used to return extents.  If fm_extent_count is zero, then the
+fm_extents[] array is ignored (no extents will be returned), and the
+fm_mapped_extents count will hold the number of extents needed in
+fm_extents[] to hold the file's current mapping.  Note that there is
+nothing to prevent the file from changing between calls to FIEMAP.
+
+The following flags can be set in fm_flags:
+
+FIEMAP_FLAG_SYNC
+  If this flag is set, the kernel will sync the file before mapping extents.
+
+FIEMAP_FLAG_XATTR
+  If this flag is set, the extents returned will describe the inodes
+  extended attribute lookup tree, instead of its data tree.
+
+
+Extent Mapping
+--------------
+
+Extent information is returned within the embedded fm_extents array
+which userspace must allocate along with the fiemap structure. The
+number of elements in the fiemap_extents[] array should be passed via
+fm_extent_count. The number of extents mapped by kernel will be
+returned via fm_mapped_extents. If the number of fiemap_extents
+allocated is less than would be required to map the requested range,
+the maximum number of extents that can be mapped in the fm_extent[]
+array will be returned and fm_mapped_extents will be equal to
+fm_extent_count. In that case, the last extent in the array will not
+complete the requested range and will not have the FIEMAP_EXTENT_LAST
+flag set (see the next section on extent flags).
+
+Each extent is described by a single fiemap_extent structure as
+returned in fm_extents::
+
+    struct fiemap_extent {
+	    __u64	fe_logical;  /* logical offset in bytes for the start of
+				* the extent */
+	    __u64	fe_physical; /* physical offset in bytes for the start
+				* of the extent */
+	    __u64	fe_length;   /* length in bytes for the extent */
+	    __u64	fe_reserved64[2];
+	    __u32	fe_flags;    /* FIEMAP_EXTENT_* flags for this extent */
+	    __u32	fe_reserved[3];
+    };
+
+All offsets and lengths are in bytes and mirror those on disk.  It is valid
+for an extents logical offset to start before the request or its logical
+length to extend past the request.  Unless FIEMAP_EXTENT_NOT_ALIGNED is
+returned, fe_logical, fe_physical, and fe_length will be aligned to the
+block size of the file system.  With the exception of extents flagged as
+FIEMAP_EXTENT_MERGED, adjacent extents will not be merged.
+
+The fe_flags field contains flags which describe the extent returned.
+A special flag, FIEMAP_EXTENT_LAST is always set on the last extent in
+the file so that the process making fiemap calls can determine when no
+more extents are available, without having to call the ioctl again.
+
+Some flags are intentionally vague and will always be set in the
+presence of other more specific flags. This way a program looking for
+a general property does not have to know all existing and future flags
+which imply that property.
+
+For example, if FIEMAP_EXTENT_DATA_INLINE or FIEMAP_EXTENT_DATA_TAIL
+are set, FIEMAP_EXTENT_NOT_ALIGNED will also be set. A program looking
+for inline or tail-packed data can key on the specific flag. Software
+which simply cares not to try operating on non-aligned extents
+however, can just key on FIEMAP_EXTENT_NOT_ALIGNED, and not have to
+worry about all present and future flags which might imply unaligned
+data. Note that the opposite is not true - it would be valid for
+FIEMAP_EXTENT_NOT_ALIGNED to appear alone.
+
+FIEMAP_EXTENT_LAST
+  This is generally the last extent in the file. A mapping attempt past
+  this extent may return nothing. Some implementations set this flag to
+  indicate this extent is the last one in the range queried by the user
+  (via fiemap->fm_length).
+
+FIEMAP_EXTENT_UNKNOWN
+  The location of this extent is currently unknown. This may indicate
+  the data is stored on an inaccessible volume or that no storage has
+  been allocated for the file yet.
+
+FIEMAP_EXTENT_DELALLOC
+  This will also set FIEMAP_EXTENT_UNKNOWN.
+
+  Delayed allocation - while there is data for this extent, its
+  physical location has not been allocated yet.
+
+FIEMAP_EXTENT_ENCODED
+  This extent does not consist of plain filesystem blocks but is
+  encoded (e.g. encrypted or compressed).  Reading the data in this
+  extent via I/O to the block device will have undefined results.
+
+Note that it is *always* undefined to try to update the data
+in-place by writing to the indicated location without the
+assistance of the filesystem, or to access the data using the
+information returned by the FIEMAP interface while the filesystem
+is mounted.  In other words, user applications may only read the
+extent data via I/O to the block device while the filesystem is
+unmounted, and then only if the FIEMAP_EXTENT_ENCODED flag is
+clear; user applications must not try reading or writing to the
+filesystem via the block device under any other circumstances.
+
+FIEMAP_EXTENT_DATA_ENCRYPTED
+  This will also set FIEMAP_EXTENT_ENCODED
+  The data in this extent has been encrypted by the file system.
+
+FIEMAP_EXTENT_NOT_ALIGNED
+  Extent offsets and length are not guaranteed to be block aligned.
+
+FIEMAP_EXTENT_DATA_INLINE
+  This will also set FIEMAP_EXTENT_NOT_ALIGNED
+  Data is located within a meta data block.
+
+FIEMAP_EXTENT_DATA_TAIL
+  This will also set FIEMAP_EXTENT_NOT_ALIGNED
+  Data is packed into a block with data from other files.
+
+FIEMAP_EXTENT_UNWRITTEN
+  Unwritten extent - the extent is allocated but its data has not been
+  initialized.  This indicates the extent's data will be all zero if read
+  through the filesystem but the contents are undefined if read directly from
+  the device.
+
+FIEMAP_EXTENT_MERGED
+  This will be set when a file does not support extents, i.e., it uses a block
+  based addressing scheme.  Since returning an extent for each block back to
+  userspace would be highly inefficient, the kernel will try to merge most
+  adjacent blocks into 'extents'.
+
+
+VFS -> File System Implementation
+---------------------------------
+
+File systems wishing to support fiemap must implement a ->fiemap callback on
+their inode_operations structure. The fs ->fiemap call is responsible for
+defining its set of supported fiemap flags, and calling a helper function on
+each discovered extent::
+
+  struct inode_operations {
+       ...
+
+       int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
+                     u64 len);
+
+->fiemap is passed struct fiemap_extent_info which describes the
+fiemap request::
+
+  struct fiemap_extent_info {
+	unsigned int fi_flags;		/* Flags as passed from user */
+	unsigned int fi_extents_mapped;	/* Number of mapped extents */
+	unsigned int fi_extents_max;	/* Size of fiemap_extent array */
+	struct fiemap_extent *fi_extents_start;	/* Start of fiemap_extent array */
+  };
+
+It is intended that the file system should not need to access any of this
+structure directly. Filesystem handlers should be tolerant to signals and return
+EINTR once fatal signal received.
+
+
+Flag checking should be done at the beginning of the ->fiemap callback via the
+fiemap_check_flags() helper::
+
+  int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
+
+The struct fieinfo should be passed in as received from ioctl_fiemap(). The
+set of fiemap flags which the fs understands should be passed via fs_flags. If
+fiemap_check_flags finds invalid user flags, it will place the bad values in
+fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from
+fiemap_check_flags(), it should immediately exit, returning that error back to
+ioctl_fiemap().
+
+
+For each extent in the request range, the file system should call
+the helper function, fiemap_fill_next_extent()::
+
+  int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
+			      u64 phys, u64 len, u32 flags, u32 dev);
+
+fiemap_fill_next_extent() will use the passed values to populate the
+next free extent in the fm_extents array. 'General' extent flags will
+automatically be set from specific flags on behalf of the calling file
+system so that the userspace API is not broken.
+
+fiemap_fill_next_extent() returns 0 on success, and 1 when the
+user-supplied fm_extents array is full. If an error is encountered
+while copying the extent to user memory, -EFAULT will be returned.
diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt
deleted file mode 100644
index ac87e6fda842..000000000000
--- a/Documentation/filesystems/fiemap.txt
+++ /dev/null
@@ -1,231 +0,0 @@
-============
-Fiemap Ioctl
-============
-
-The fiemap ioctl is an efficient method for userspace to get file
-extent mappings. Instead of block-by-block mapping (such as bmap), fiemap
-returns a list of extents.
-
-
-Request Basics
---------------
-
-A fiemap request is encoded within struct fiemap:
-
-struct fiemap {
-	__u64	fm_start;	 /* logical offset (inclusive) at
-				  * which to start mapping (in) */
-	__u64	fm_length;	 /* logical length of mapping which
-				  * userspace cares about (in) */
-	__u32	fm_flags;	 /* FIEMAP_FLAG_* flags for request (in/out) */
-	__u32	fm_mapped_extents; /* number of extents that were
-				    * mapped (out) */
-	__u32	fm_extent_count; /* size of fm_extents array (in) */
-	__u32	fm_reserved;
-	struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
-};
-
-
-fm_start, and fm_length specify the logical range within the file
-which the process would like mappings for. Extents returned mirror
-those on disk - that is, the logical offset of the 1st returned extent
-may start before fm_start, and the range covered by the last returned
-extent may end after fm_length. All offsets and lengths are in bytes.
-
-Certain flags to modify the way in which mappings are looked up can be
-set in fm_flags. If the kernel doesn't understand some particular
-flags, it will return EBADR and the contents of fm_flags will contain
-the set of flags which caused the error. If the kernel is compatible
-with all flags passed, the contents of fm_flags will be unmodified.
-It is up to userspace to determine whether rejection of a particular
-flag is fatal to its operation. This scheme is intended to allow the
-fiemap interface to grow in the future but without losing
-compatibility with old software.
-
-fm_extent_count specifies the number of elements in the fm_extents[] array
-that can be used to return extents.  If fm_extent_count is zero, then the
-fm_extents[] array is ignored (no extents will be returned), and the
-fm_mapped_extents count will hold the number of extents needed in
-fm_extents[] to hold the file's current mapping.  Note that there is
-nothing to prevent the file from changing between calls to FIEMAP.
-
-The following flags can be set in fm_flags:
-
-* FIEMAP_FLAG_SYNC
-If this flag is set, the kernel will sync the file before mapping extents.
-
-* FIEMAP_FLAG_XATTR
-If this flag is set, the extents returned will describe the inodes
-extended attribute lookup tree, instead of its data tree.
-
-
-Extent Mapping
---------------
-
-Extent information is returned within the embedded fm_extents array
-which userspace must allocate along with the fiemap structure. The
-number of elements in the fiemap_extents[] array should be passed via
-fm_extent_count. The number of extents mapped by kernel will be
-returned via fm_mapped_extents. If the number of fiemap_extents
-allocated is less than would be required to map the requested range,
-the maximum number of extents that can be mapped in the fm_extent[]
-array will be returned and fm_mapped_extents will be equal to
-fm_extent_count. In that case, the last extent in the array will not
-complete the requested range and will not have the FIEMAP_EXTENT_LAST
-flag set (see the next section on extent flags).
-
-Each extent is described by a single fiemap_extent structure as
-returned in fm_extents.
-
-struct fiemap_extent {
-	__u64	fe_logical;  /* logical offset in bytes for the start of
-			      * the extent */
-	__u64	fe_physical; /* physical offset in bytes for the start
-			      * of the extent */
-	__u64	fe_length;   /* length in bytes for the extent */
-	__u64	fe_reserved64[2];
-	__u32	fe_flags;    /* FIEMAP_EXTENT_* flags for this extent */
-	__u32	fe_reserved[3];
-};
-
-All offsets and lengths are in bytes and mirror those on disk.  It is valid
-for an extents logical offset to start before the request or its logical
-length to extend past the request.  Unless FIEMAP_EXTENT_NOT_ALIGNED is
-returned, fe_logical, fe_physical, and fe_length will be aligned to the
-block size of the file system.  With the exception of extents flagged as
-FIEMAP_EXTENT_MERGED, adjacent extents will not be merged.
-
-The fe_flags field contains flags which describe the extent returned.
-A special flag, FIEMAP_EXTENT_LAST is always set on the last extent in
-the file so that the process making fiemap calls can determine when no
-more extents are available, without having to call the ioctl again.
-
-Some flags are intentionally vague and will always be set in the
-presence of other more specific flags. This way a program looking for
-a general property does not have to know all existing and future flags
-which imply that property.
-
-For example, if FIEMAP_EXTENT_DATA_INLINE or FIEMAP_EXTENT_DATA_TAIL
-are set, FIEMAP_EXTENT_NOT_ALIGNED will also be set. A program looking
-for inline or tail-packed data can key on the specific flag. Software
-which simply cares not to try operating on non-aligned extents
-however, can just key on FIEMAP_EXTENT_NOT_ALIGNED, and not have to
-worry about all present and future flags which might imply unaligned
-data. Note that the opposite is not true - it would be valid for
-FIEMAP_EXTENT_NOT_ALIGNED to appear alone.
-
-* FIEMAP_EXTENT_LAST
-This is generally the last extent in the file. A mapping attempt past
-this extent may return nothing. Some implementations set this flag to
-indicate this extent is the last one in the range queried by the user
-(via fiemap->fm_length).
-
-* FIEMAP_EXTENT_UNKNOWN
-The location of this extent is currently unknown. This may indicate
-the data is stored on an inaccessible volume or that no storage has
-been allocated for the file yet.
-
-* FIEMAP_EXTENT_DELALLOC
-  - This will also set FIEMAP_EXTENT_UNKNOWN.
-Delayed allocation - while there is data for this extent, its
-physical location has not been allocated yet.
-
-* FIEMAP_EXTENT_ENCODED
-This extent does not consist of plain filesystem blocks but is
-encoded (e.g. encrypted or compressed).  Reading the data in this
-extent via I/O to the block device will have undefined results.
-
-Note that it is *always* undefined to try to update the data
-in-place by writing to the indicated location without the
-assistance of the filesystem, or to access the data using the
-information returned by the FIEMAP interface while the filesystem
-is mounted.  In other words, user applications may only read the
-extent data via I/O to the block device while the filesystem is
-unmounted, and then only if the FIEMAP_EXTENT_ENCODED flag is
-clear; user applications must not try reading or writing to the
-filesystem via the block device under any other circumstances.
-
-* FIEMAP_EXTENT_DATA_ENCRYPTED
-  - This will also set FIEMAP_EXTENT_ENCODED
-The data in this extent has been encrypted by the file system.
-
-* FIEMAP_EXTENT_NOT_ALIGNED
-Extent offsets and length are not guaranteed to be block aligned.
-
-* FIEMAP_EXTENT_DATA_INLINE
-  This will also set FIEMAP_EXTENT_NOT_ALIGNED
-Data is located within a meta data block.
-
-* FIEMAP_EXTENT_DATA_TAIL
-  This will also set FIEMAP_EXTENT_NOT_ALIGNED
-Data is packed into a block with data from other files.
-
-* FIEMAP_EXTENT_UNWRITTEN
-Unwritten extent - the extent is allocated but its data has not been
-initialized.  This indicates the extent's data will be all zero if read
-through the filesystem but the contents are undefined if read directly from
-the device.
-
-* FIEMAP_EXTENT_MERGED
-This will be set when a file does not support extents, i.e., it uses a block
-based addressing scheme.  Since returning an extent for each block back to
-userspace would be highly inefficient, the kernel will try to merge most
-adjacent blocks into 'extents'.
-
-
-VFS -> File System Implementation
----------------------------------
-
-File systems wishing to support fiemap must implement a ->fiemap callback on
-their inode_operations structure. The fs ->fiemap call is responsible for
-defining its set of supported fiemap flags, and calling a helper function on
-each discovered extent:
-
-struct inode_operations {
-       ...
-
-       int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
-                     u64 len);
-
-->fiemap is passed struct fiemap_extent_info which describes the
-fiemap request:
-
-struct fiemap_extent_info {
-	unsigned int fi_flags;		/* Flags as passed from user */
-	unsigned int fi_extents_mapped;	/* Number of mapped extents */
-	unsigned int fi_extents_max;	/* Size of fiemap_extent array */
-	struct fiemap_extent *fi_extents_start;	/* Start of fiemap_extent array */
-};
-
-It is intended that the file system should not need to access any of this
-structure directly. Filesystem handlers should be tolerant to signals and return
-EINTR once fatal signal received.
-
-
-Flag checking should be done at the beginning of the ->fiemap callback via the
-fiemap_check_flags() helper:
-
-int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
-
-The struct fieinfo should be passed in as received from ioctl_fiemap(). The
-set of fiemap flags which the fs understands should be passed via fs_flags. If
-fiemap_check_flags finds invalid user flags, it will place the bad values in
-fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from
-fiemap_check_flags(), it should immediately exit, returning that error back to
-ioctl_fiemap().
-
-
-For each extent in the request range, the file system should call
-the helper function, fiemap_fill_next_extent():
-
-int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
-			    u64 phys, u64 len, u32 flags, u32 dev);
-
-fiemap_fill_next_extent() will use the passed values to populate the
-next free extent in the fm_extents array. 'General' extent flags will
-automatically be set from specific flags on behalf of the calling file
-system so that the userspace API is not broken.
-
-fiemap_fill_next_extent() returns 0 on success, and 1 when the
-user-supplied fm_extents array is full. If an error is encountered
-while copying the extent to user memory, -EFAULT will be returned.
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 815101241115..99b450260e4d 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -26,6 +26,7 @@ algorithms work.
    directory-locking
    devpts
    dnotify
+   fiemap
 
    automount-support
 
-- 
cgit v1.2.3


From e6d42cb19c0367ae6b0edfc09daceaefded4cbdd Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:05 +0200
Subject: docs: filesystems: convert files.txt to ReST

- Add a SPDX header;
- Adjust document title;
- Some whitespace fixes and new line breaks;
- Mark literal blocks as such;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/e31b0f6a7ee466a233dc7f9c73f53f07ebb07f0b.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/files.rst | 128 ++++++++++++++++++++++++++++++++++++
 Documentation/filesystems/files.txt | 123 ----------------------------------
 Documentation/filesystems/index.rst |   1 +
 3 files changed, 129 insertions(+), 123 deletions(-)
 create mode 100644 Documentation/filesystems/files.rst
 delete mode 100644 Documentation/filesystems/files.txt

diff --git a/Documentation/filesystems/files.rst b/Documentation/filesystems/files.rst
new file mode 100644
index 000000000000..cbf8e57376bf
--- /dev/null
+++ b/Documentation/filesystems/files.rst
@@ -0,0 +1,128 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+File management in the Linux kernel
+===================================
+
+This document describes how locking for files (struct file)
+and file descriptor table (struct files) works.
+
+Up until 2.6.12, the file descriptor table has been protected
+with a lock (files->file_lock) and reference count (files->count).
+->file_lock protected accesses to all the file related fields
+of the table. ->count was used for sharing the file descriptor
+table between tasks cloned with CLONE_FILES flag. Typically
+this would be the case for posix threads. As with the common
+refcounting model in the kernel, the last task doing
+a put_files_struct() frees the file descriptor (fd) table.
+The files (struct file) themselves are protected using
+reference count (->f_count).
+
+In the new lock-free model of file descriptor management,
+the reference counting is similar, but the locking is
+based on RCU. The file descriptor table contains multiple
+elements - the fd sets (open_fds and close_on_exec, the
+array of file pointers, the sizes of the sets and the array
+etc.). In order for the updates to appear atomic to
+a lock-free reader, all the elements of the file descriptor
+table are in a separate structure - struct fdtable.
+files_struct contains a pointer to struct fdtable through
+which the actual fd table is accessed. Initially the
+fdtable is embedded in files_struct itself. On a subsequent
+expansion of fdtable, a new fdtable structure is allocated
+and files->fdtab points to the new structure. The fdtable
+structure is freed with RCU and lock-free readers either
+see the old fdtable or the new fdtable making the update
+appear atomic. Here are the locking rules for
+the fdtable structure -
+
+1. All references to the fdtable must be done through
+   the files_fdtable() macro::
+
+	struct fdtable *fdt;
+
+	rcu_read_lock();
+
+	fdt = files_fdtable(files);
+	....
+	if (n <= fdt->max_fds)
+		....
+	...
+	rcu_read_unlock();
+
+   files_fdtable() uses rcu_dereference() macro which takes care of
+   the memory barrier requirements for lock-free dereference.
+   The fdtable pointer must be read within the read-side
+   critical section.
+
+2. Reading of the fdtable as described above must be protected
+   by rcu_read_lock()/rcu_read_unlock().
+
+3. For any update to the fd table, files->file_lock must
+   be held.
+
+4. To look up the file structure given an fd, a reader
+   must use either fcheck() or fcheck_files() APIs. These
+   take care of barrier requirements due to lock-free lookup.
+
+   An example::
+
+	struct file *file;
+
+	rcu_read_lock();
+	file = fcheck(fd);
+	if (file) {
+		...
+	}
+	....
+	rcu_read_unlock();
+
+5. Handling of the file structures is special. Since the look-up
+   of the fd (fget()/fget_light()) are lock-free, it is possible
+   that look-up may race with the last put() operation on the
+   file structure. This is avoided using atomic_long_inc_not_zero()
+   on ->f_count::
+
+	rcu_read_lock();
+	file = fcheck_files(files, fd);
+	if (file) {
+		if (atomic_long_inc_not_zero(&file->f_count))
+			*fput_needed = 1;
+		else
+		/* Didn't get the reference, someone's freed */
+			file = NULL;
+	}
+	rcu_read_unlock();
+	....
+	return file;
+
+   atomic_long_inc_not_zero() detects if refcounts is already zero or
+   goes to zero during increment. If it does, we fail
+   fget()/fget_light().
+
+6. Since both fdtable and file structures can be looked up
+   lock-free, they must be installed using rcu_assign_pointer()
+   API. If they are looked up lock-free, rcu_dereference()
+   must be used. However it is advisable to use files_fdtable()
+   and fcheck()/fcheck_files() which take care of these issues.
+
+7. While updating, the fdtable pointer must be looked up while
+   holding files->file_lock. If ->file_lock is dropped, then
+   another thread expand the files thereby creating a new
+   fdtable and making the earlier fdtable pointer stale.
+
+   For example::
+
+	spin_lock(&files->file_lock);
+	fd = locate_fd(files, file, start);
+	if (fd >= 0) {
+		/* locate_fd() may have expanded fdtable, load the ptr */
+		fdt = files_fdtable(files);
+		__set_open_fd(fd, fdt);
+		__clear_close_on_exec(fd, fdt);
+		spin_unlock(&files->file_lock);
+	.....
+
+   Since locate_fd() can drop ->file_lock (and reacquire ->file_lock),
+   the fdtable pointer (fdt) must be loaded after locate_fd().
+
diff --git a/Documentation/filesystems/files.txt b/Documentation/filesystems/files.txt
deleted file mode 100644
index 46dfc6b038c3..000000000000
--- a/Documentation/filesystems/files.txt
+++ /dev/null
@@ -1,123 +0,0 @@
-File management in the Linux kernel
------------------------------------
-
-This document describes how locking for files (struct file)
-and file descriptor table (struct files) works.
-
-Up until 2.6.12, the file descriptor table has been protected
-with a lock (files->file_lock) and reference count (files->count).
-->file_lock protected accesses to all the file related fields
-of the table. ->count was used for sharing the file descriptor
-table between tasks cloned with CLONE_FILES flag. Typically
-this would be the case for posix threads. As with the common
-refcounting model in the kernel, the last task doing
-a put_files_struct() frees the file descriptor (fd) table.
-The files (struct file) themselves are protected using
-reference count (->f_count).
-
-In the new lock-free model of file descriptor management,
-the reference counting is similar, but the locking is
-based on RCU. The file descriptor table contains multiple
-elements - the fd sets (open_fds and close_on_exec, the
-array of file pointers, the sizes of the sets and the array
-etc.). In order for the updates to appear atomic to
-a lock-free reader, all the elements of the file descriptor
-table are in a separate structure - struct fdtable.
-files_struct contains a pointer to struct fdtable through
-which the actual fd table is accessed. Initially the
-fdtable is embedded in files_struct itself. On a subsequent
-expansion of fdtable, a new fdtable structure is allocated
-and files->fdtab points to the new structure. The fdtable
-structure is freed with RCU and lock-free readers either
-see the old fdtable or the new fdtable making the update
-appear atomic. Here are the locking rules for
-the fdtable structure -
-
-1. All references to the fdtable must be done through
-   the files_fdtable() macro :
-
-	struct fdtable *fdt;
-
-	rcu_read_lock();
-
-	fdt = files_fdtable(files);
-	....
-	if (n <= fdt->max_fds)
-		....
-	...
-	rcu_read_unlock();
-
-   files_fdtable() uses rcu_dereference() macro which takes care of
-   the memory barrier requirements for lock-free dereference.
-   The fdtable pointer must be read within the read-side
-   critical section.
-
-2. Reading of the fdtable as described above must be protected
-   by rcu_read_lock()/rcu_read_unlock().
-
-3. For any update to the fd table, files->file_lock must
-   be held.
-
-4. To look up the file structure given an fd, a reader
-   must use either fcheck() or fcheck_files() APIs. These
-   take care of barrier requirements due to lock-free lookup.
-   An example :
-
-	struct file *file;
-
-	rcu_read_lock();
-	file = fcheck(fd);
-	if (file) {
-		...
-	}
-	....
-	rcu_read_unlock();
-
-5. Handling of the file structures is special. Since the look-up
-   of the fd (fget()/fget_light()) are lock-free, it is possible
-   that look-up may race with the last put() operation on the
-   file structure. This is avoided using atomic_long_inc_not_zero()
-   on ->f_count :
-
-	rcu_read_lock();
-	file = fcheck_files(files, fd);
-	if (file) {
-		if (atomic_long_inc_not_zero(&file->f_count))
-			*fput_needed = 1;
-		else
-		/* Didn't get the reference, someone's freed */
-			file = NULL;
-	}
-	rcu_read_unlock();
-	....
-	return file;
-
-   atomic_long_inc_not_zero() detects if refcounts is already zero or
-   goes to zero during increment. If it does, we fail
-   fget()/fget_light().
-
-6. Since both fdtable and file structures can be looked up
-   lock-free, they must be installed using rcu_assign_pointer()
-   API. If they are looked up lock-free, rcu_dereference()
-   must be used. However it is advisable to use files_fdtable()
-   and fcheck()/fcheck_files() which take care of these issues.
-
-7. While updating, the fdtable pointer must be looked up while
-   holding files->file_lock. If ->file_lock is dropped, then
-   another thread expand the files thereby creating a new
-   fdtable and making the earlier fdtable pointer stale.
-   For example :
-
-	spin_lock(&files->file_lock);
-	fd = locate_fd(files, file, start);
-	if (fd >= 0) {
-		/* locate_fd() may have expanded fdtable, load the ptr */
-		fdt = files_fdtable(files);
-		__set_open_fd(fd, fdt);
-		__clear_close_on_exec(fd, fdt);
-		spin_unlock(&files->file_lock);
-	.....
-
-   Since locate_fd() can drop ->file_lock (and reacquire ->file_lock),
-   the fdtable pointer (fdt) must be loaded after locate_fd().
-
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 99b450260e4d..597ea3ed415b 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -27,6 +27,7 @@ algorithms work.
    devpts
    dnotify
    fiemap
+   files
 
    automount-support
 
-- 
cgit v1.2.3


From ba302d2a8ef0917a6d1c3775d1b4b9f2b26c8ef5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:06 +0200
Subject: docs: filesystems: convert fuse-io.txt to ReST

- Add a SPDX header;
- Add a document title;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/88ec8025c1c5fc3ac5b65f1151c41ebcc696dc0e.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/fuse-io.rst | 44 +++++++++++++++++++++++++++++++++++
 Documentation/filesystems/fuse-io.txt | 38 ------------------------------
 Documentation/filesystems/index.rst   |  1 +
 3 files changed, 45 insertions(+), 38 deletions(-)
 create mode 100644 Documentation/filesystems/fuse-io.rst
 delete mode 100644 Documentation/filesystems/fuse-io.txt

diff --git a/Documentation/filesystems/fuse-io.rst b/Documentation/filesystems/fuse-io.rst
new file mode 100644
index 000000000000..255a368fe534
--- /dev/null
+++ b/Documentation/filesystems/fuse-io.rst
@@ -0,0 +1,44 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+Fuse I/O Modes
+==============
+
+Fuse supports the following I/O modes:
+
+- direct-io
+- cached
+  + write-through
+  + writeback-cache
+
+The direct-io mode can be selected with the FOPEN_DIRECT_IO flag in the
+FUSE_OPEN reply.
+
+In direct-io mode the page cache is completely bypassed for reads and writes.
+No read-ahead takes place. Shared mmap is disabled.
+
+In cached mode reads may be satisfied from the page cache, and data may be
+read-ahead by the kernel to fill the cache.  The cache is always kept consistent
+after any writes to the file.  All mmap modes are supported.
+
+The cached mode has two sub modes controlling how writes are handled.  The
+write-through mode is the default and is supported on all kernels.  The
+writeback-cache mode may be selected by the FUSE_WRITEBACK_CACHE flag in the
+FUSE_INIT reply.
+
+In write-through mode each write is immediately sent to userspace as one or more
+WRITE requests, as well as updating any cached pages (and caching previously
+uncached, but fully written pages).  No READ requests are ever sent for writes,
+so when an uncached page is partially written, the page is discarded.
+
+In writeback-cache mode (enabled by the FUSE_WRITEBACK_CACHE flag) writes go to
+the cache only, which means that the write(2) syscall can often complete very
+fast.  Dirty pages are written back implicitly (background writeback or page
+reclaim on memory pressure) or explicitly (invoked by close(2), fsync(2) and
+when the last ref to the file is being released on munmap(2)).  This mode
+assumes that all changes to the filesystem go through the FUSE kernel module
+(size and atime/ctime/mtime attributes are kept up-to-date by the kernel), so
+it's generally not suitable for network filesystems.  If a partial page is
+written, then the page needs to be first read from userspace.  This means, that
+even for files opened for O_WRONLY it is possible that READ requests will be
+generated by the kernel.
diff --git a/Documentation/filesystems/fuse-io.txt b/Documentation/filesystems/fuse-io.txt
deleted file mode 100644
index 07b8f73f100f..000000000000
--- a/Documentation/filesystems/fuse-io.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-Fuse supports the following I/O modes:
-
-- direct-io
-- cached
-  + write-through
-  + writeback-cache
-
-The direct-io mode can be selected with the FOPEN_DIRECT_IO flag in the
-FUSE_OPEN reply.
-
-In direct-io mode the page cache is completely bypassed for reads and writes.
-No read-ahead takes place. Shared mmap is disabled.
-
-In cached mode reads may be satisfied from the page cache, and data may be
-read-ahead by the kernel to fill the cache.  The cache is always kept consistent
-after any writes to the file.  All mmap modes are supported.
-
-The cached mode has two sub modes controlling how writes are handled.  The
-write-through mode is the default and is supported on all kernels.  The
-writeback-cache mode may be selected by the FUSE_WRITEBACK_CACHE flag in the
-FUSE_INIT reply.
-
-In write-through mode each write is immediately sent to userspace as one or more
-WRITE requests, as well as updating any cached pages (and caching previously
-uncached, but fully written pages).  No READ requests are ever sent for writes,
-so when an uncached page is partially written, the page is discarded.
-
-In writeback-cache mode (enabled by the FUSE_WRITEBACK_CACHE flag) writes go to
-the cache only, which means that the write(2) syscall can often complete very
-fast.  Dirty pages are written back implicitly (background writeback or page
-reclaim on memory pressure) or explicitly (invoked by close(2), fsync(2) and
-when the last ref to the file is being released on munmap(2)).  This mode
-assumes that all changes to the filesystem go through the FUSE kernel module
-(size and atime/ctime/mtime attributes are kept up-to-date by the kernel), so
-it's generally not suitable for network filesystems.  If a partial page is
-written, then the page needs to be first read from userspace.  This means, that
-even for files opened for O_WRONLY it is possible that READ requests will be
-generated by the kernel.
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 597ea3ed415b..ace804c4c3ab 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -83,6 +83,7 @@ Documentation for filesystem implementations.
    hfsplus
    hpfs
    fuse
+   fuse-io
    inotify
    isofs
    nilfs2
-- 
cgit v1.2.3


From 63526525dd7a9c85786ff9f9592b6f4281ad6171 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:07 +0200
Subject: docs: filesystems: convert locks.txt to ReST

- Add a SPDX header;
- Adjust document and section titles;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/467b3f20e63d2640d22599b99229699b5fb79251.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/index.rst |  1 +
 Documentation/filesystems/locks.rst | 72 +++++++++++++++++++++++++++++++++++++
 Documentation/filesystems/locks.txt | 68 -----------------------------------
 3 files changed, 73 insertions(+), 68 deletions(-)
 create mode 100644 Documentation/filesystems/locks.rst
 delete mode 100644 Documentation/filesystems/locks.txt

diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index ace804c4c3ab..64535f57c3ac 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -28,6 +28,7 @@ algorithms work.
    dnotify
    fiemap
    files
+   locks
 
    automount-support
 
diff --git a/Documentation/filesystems/locks.rst b/Documentation/filesystems/locks.rst
new file mode 100644
index 000000000000..10f67fb9ce07
--- /dev/null
+++ b/Documentation/filesystems/locks.rst
@@ -0,0 +1,72 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+File Locking Release Notes
+==========================
+
+		Andy Walker <andy@lysaker.kvaerner.no>
+
+			    12 May 1997
+
+
+1. What's New?
+==============
+
+1.1 Broken Flock Emulation
+--------------------------
+
+The old flock(2) emulation in the kernel was swapped for proper BSD
+compatible flock(2) support in the 1.3.x series of kernels. With the
+release of the 2.1.x kernel series, support for the old emulation has
+been totally removed, so that we don't need to carry this baggage
+forever.
+
+This should not cause problems for anybody, since everybody using a
+2.1.x kernel should have updated their C library to a suitable version
+anyway (see the file "Documentation/process/changes.rst".)
+
+1.2 Allow Mixed Locks Again
+---------------------------
+
+1.2.1 Typical Problems - Sendmail
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Because sendmail was unable to use the old flock() emulation, many sendmail
+installations use fcntl() instead of flock(). This is true of Slackware 3.0
+for example. This gave rise to some other subtle problems if sendmail was
+configured to rebuild the alias file. Sendmail tried to lock the aliases.dir
+file with fcntl() at the same time as the GDBM routines tried to lock this
+file with flock(). With pre 1.3.96 kernels this could result in deadlocks that,
+over time, or under a very heavy mail load, would eventually cause the kernel
+to lock solid with deadlocked processes.
+
+
+1.2.2 The Solution
+^^^^^^^^^^^^^^^^^^
+The solution I have chosen, after much experimentation and discussion,
+is to make flock() and fcntl() locks oblivious to each other. Both can
+exists, and neither will have any effect on the other.
+
+I wanted the two lock styles to be cooperative, but there were so many
+race and deadlock conditions that the current solution was the only
+practical one. It puts us in the same position as, for example, SunOS
+4.1.x and several other commercial Unices. The only OS's that support
+cooperative flock()/fcntl() are those that emulate flock() using
+fcntl(), with all the problems that implies.
+
+
+1.3 Mandatory Locking As A Mount Option
+---------------------------------------
+
+Mandatory locking, as described in
+'Documentation/filesystems/mandatory-locking.txt' was prior to this release a
+general configuration option that was valid for all mounted filesystems.  This
+had a number of inherent dangers, not the least of which was the ability to
+freeze an NFS server by asking it to read a file for which a mandatory lock
+existed.
+
+From this release of the kernel, mandatory locking can be turned on and off
+on a per-filesystem basis, using the mount options 'mand' and 'nomand'.
+The default is to disallow mandatory locking. The intention is that
+mandatory locking only be enabled on a local filesystem as the specific need
+arises.
+
diff --git a/Documentation/filesystems/locks.txt b/Documentation/filesystems/locks.txt
deleted file mode 100644
index 5368690f412e..000000000000
--- a/Documentation/filesystems/locks.txt
+++ /dev/null
@@ -1,68 +0,0 @@
-		      File Locking Release Notes
-
-		Andy Walker <andy@lysaker.kvaerner.no>
-
-			    12 May 1997
-
-
-1. What's New?
---------------
-
-1.1 Broken Flock Emulation
---------------------------
-
-The old flock(2) emulation in the kernel was swapped for proper BSD
-compatible flock(2) support in the 1.3.x series of kernels. With the
-release of the 2.1.x kernel series, support for the old emulation has
-been totally removed, so that we don't need to carry this baggage
-forever.
-
-This should not cause problems for anybody, since everybody using a
-2.1.x kernel should have updated their C library to a suitable version
-anyway (see the file "Documentation/process/changes.rst".)
-
-1.2 Allow Mixed Locks Again
----------------------------
-
-1.2.1 Typical Problems - Sendmail
----------------------------------
-Because sendmail was unable to use the old flock() emulation, many sendmail
-installations use fcntl() instead of flock(). This is true of Slackware 3.0
-for example. This gave rise to some other subtle problems if sendmail was
-configured to rebuild the alias file. Sendmail tried to lock the aliases.dir
-file with fcntl() at the same time as the GDBM routines tried to lock this
-file with flock(). With pre 1.3.96 kernels this could result in deadlocks that,
-over time, or under a very heavy mail load, would eventually cause the kernel
-to lock solid with deadlocked processes.
-
-
-1.2.2 The Solution
-------------------
-The solution I have chosen, after much experimentation and discussion,
-is to make flock() and fcntl() locks oblivious to each other. Both can
-exists, and neither will have any effect on the other.
-
-I wanted the two lock styles to be cooperative, but there were so many
-race and deadlock conditions that the current solution was the only
-practical one. It puts us in the same position as, for example, SunOS
-4.1.x and several other commercial Unices. The only OS's that support
-cooperative flock()/fcntl() are those that emulate flock() using
-fcntl(), with all the problems that implies.
-
-
-1.3 Mandatory Locking As A Mount Option
----------------------------------------
-
-Mandatory locking, as described in
-'Documentation/filesystems/mandatory-locking.txt' was prior to this release a
-general configuration option that was valid for all mounted filesystems.  This
-had a number of inherent dangers, not the least of which was the ability to
-freeze an NFS server by asking it to read a file for which a mandatory lock
-existed.
-
-From this release of the kernel, mandatory locking can be turned on and off
-on a per-filesystem basis, using the mount options 'mand' and 'nomand'.
-The default is to disallow mandatory locking. The intention is that
-mandatory locking only be enabled on a local filesystem as the specific need
-arises.
-
-- 
cgit v1.2.3


From a02dcdf65bcfca1068e9c6d2cbafdab724cfbce9 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:08 +0200
Subject: docs: filesystems: convert mandatory-locking.txt to ReST

- Add a SPDX header;
- Adjust document title;
- Some whitespace fixes and new line breaks;
- Use notes markups;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/aecd6259fe9f99b2c2b3440eab6a2b989125e00d.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/index.rst             |   1 +
 Documentation/filesystems/locks.rst             |   2 +-
 Documentation/filesystems/mandatory-locking.rst | 188 ++++++++++++++++++++++++
 Documentation/filesystems/mandatory-locking.txt | 181 -----------------------
 fs/locks.c                                      |   2 +-
 5 files changed, 191 insertions(+), 183 deletions(-)
 create mode 100644 Documentation/filesystems/mandatory-locking.rst
 delete mode 100644 Documentation/filesystems/mandatory-locking.txt

diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 64535f57c3ac..df1a474ad9b9 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -29,6 +29,7 @@ algorithms work.
    fiemap
    files
    locks
+   mandatory-locking
 
    automount-support
 
diff --git a/Documentation/filesystems/locks.rst b/Documentation/filesystems/locks.rst
index 10f67fb9ce07..c5ae858b1aac 100644
--- a/Documentation/filesystems/locks.rst
+++ b/Documentation/filesystems/locks.rst
@@ -58,7 +58,7 @@ fcntl(), with all the problems that implies.
 ---------------------------------------
 
 Mandatory locking, as described in
-'Documentation/filesystems/mandatory-locking.txt' was prior to this release a
+'Documentation/filesystems/mandatory-locking.rst' was prior to this release a
 general configuration option that was valid for all mounted filesystems.  This
 had a number of inherent dangers, not the least of which was the ability to
 freeze an NFS server by asking it to read a file for which a mandatory lock
diff --git a/Documentation/filesystems/mandatory-locking.rst b/Documentation/filesystems/mandatory-locking.rst
new file mode 100644
index 000000000000..9ce73544a8f0
--- /dev/null
+++ b/Documentation/filesystems/mandatory-locking.rst
@@ -0,0 +1,188 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================================
+Mandatory File Locking For The Linux Operating System
+=====================================================
+
+		Andy Walker <andy@lysaker.kvaerner.no>
+
+			   15 April 1996
+
+		     (Updated September 2007)
+
+0. Why you should avoid mandatory locking
+-----------------------------------------
+
+The Linux implementation is prey to a number of difficult-to-fix race
+conditions which in practice make it not dependable:
+
+	- The write system call checks for a mandatory lock only once
+	  at its start.  It is therefore possible for a lock request to
+	  be granted after this check but before the data is modified.
+	  A process may then see file data change even while a mandatory
+	  lock was held.
+	- Similarly, an exclusive lock may be granted on a file after
+	  the kernel has decided to proceed with a read, but before the
+	  read has actually completed, and the reading process may see
+	  the file data in a state which should not have been visible
+	  to it.
+	- Similar races make the claimed mutual exclusion between lock
+	  and mmap similarly unreliable.
+
+1. What is  mandatory locking?
+------------------------------
+
+Mandatory locking is kernel enforced file locking, as opposed to the more usual
+cooperative file locking used to guarantee sequential access to files among
+processes. File locks are applied using the flock() and fcntl() system calls
+(and the lockf() library routine which is a wrapper around fcntl().) It is
+normally a process' responsibility to check for locks on a file it wishes to
+update, before applying its own lock, updating the file and unlocking it again.
+The most commonly used example of this (and in the case of sendmail, the most
+troublesome) is access to a user's mailbox. The mail user agent and the mail
+transfer agent must guard against updating the mailbox at the same time, and
+prevent reading the mailbox while it is being updated.
+
+In a perfect world all processes would use and honour a cooperative, or
+"advisory" locking scheme. However, the world isn't perfect, and there's
+a lot of poorly written code out there.
+
+In trying to address this problem, the designers of System V UNIX came up
+with a "mandatory" locking scheme, whereby the operating system kernel would
+block attempts by a process to write to a file that another process holds a
+"read" -or- "shared" lock on, and block attempts to both read and write to a 
+file that a process holds a "write " -or- "exclusive" lock on.
+
+The System V mandatory locking scheme was intended to have as little impact as
+possible on existing user code. The scheme is based on marking individual files
+as candidates for mandatory locking, and using the existing fcntl()/lockf()
+interface for applying locks just as if they were normal, advisory locks.
+
+.. Note::
+
+   1. In saying "file" in the paragraphs above I am actually not telling
+      the whole truth. System V locking is based on fcntl(). The granularity of
+      fcntl() is such that it allows the locking of byte ranges in files, in
+      addition to entire files, so the mandatory locking rules also have byte
+      level granularity.
+
+   2. POSIX.1 does not specify any scheme for mandatory locking, despite
+      borrowing the fcntl() locking scheme from System V. The mandatory locking
+      scheme is defined by the System V Interface Definition (SVID) Version 3.
+
+2. Marking a file for mandatory locking
+---------------------------------------
+
+A file is marked as a candidate for mandatory locking by setting the group-id
+bit in its file mode but removing the group-execute bit. This is an otherwise
+meaningless combination, and was chosen by the System V implementors so as not
+to break existing user programs.
+
+Note that the group-id bit is usually automatically cleared by the kernel when
+a setgid file is written to. This is a security measure. The kernel has been
+modified to recognize the special case of a mandatory lock candidate and to
+refrain from clearing this bit. Similarly the kernel has been modified not
+to run mandatory lock candidates with setgid privileges.
+
+3. Available implementations
+----------------------------
+
+I have considered the implementations of mandatory locking available with
+SunOS 4.1.x, Solaris 2.x and HP-UX 9.x.
+
+Generally I have tried to make the most sense out of the behaviour exhibited
+by these three reference systems. There are many anomalies.
+
+All the reference systems reject all calls to open() for a file on which
+another process has outstanding mandatory locks. This is in direct
+contravention of SVID 3, which states that only calls to open() with the
+O_TRUNC flag set should be rejected. The Linux implementation follows the SVID
+definition, which is the "Right Thing", since only calls with O_TRUNC can
+modify the contents of the file.
+
+HP-UX even disallows open() with O_TRUNC for a file with advisory locks, not
+just mandatory locks. That would appear to contravene POSIX.1.
+
+mmap() is another interesting case. All the operating systems mentioned
+prevent mandatory locks from being applied to an mmap()'ed file, but  HP-UX
+also disallows advisory locks for such a file. SVID actually specifies the
+paranoid HP-UX behaviour.
+
+In my opinion only MAP_SHARED mappings should be immune from locking, and then
+only from mandatory locks - that is what is currently implemented.
+
+SunOS is so hopeless that it doesn't even honour the O_NONBLOCK flag for
+mandatory locks, so reads and writes to locked files always block when they
+should return EAGAIN.
+
+I'm afraid that this is such an esoteric area that the semantics described
+below are just as valid as any others, so long as the main points seem to
+agree. 
+
+4. Semantics
+------------
+
+1. Mandatory locks can only be applied via the fcntl()/lockf() locking
+   interface - in other words the System V/POSIX interface. BSD style
+   locks using flock() never result in a mandatory lock.
+
+2. If a process has locked a region of a file with a mandatory read lock, then
+   other processes are permitted to read from that region. If any of these
+   processes attempts to write to the region it will block until the lock is
+   released, unless the process has opened the file with the O_NONBLOCK
+   flag in which case the system call will return immediately with the error
+   status EAGAIN.
+
+3. If a process has locked a region of a file with a mandatory write lock, all
+   attempts to read or write to that region block until the lock is released,
+   unless a process has opened the file with the O_NONBLOCK flag in which case
+   the system call will return immediately with the error status EAGAIN.
+
+4. Calls to open() with O_TRUNC, or to creat(), on a existing file that has
+   any mandatory locks owned by other processes will be rejected with the
+   error status EAGAIN.
+
+5. Attempts to apply a mandatory lock to a file that is memory mapped and
+   shared (via mmap() with MAP_SHARED) will be rejected with the error status
+   EAGAIN.
+
+6. Attempts to create a shared memory map of a file (via mmap() with MAP_SHARED)
+   that has any mandatory locks in effect will be rejected with the error status
+   EAGAIN.
+
+5. Which system calls are affected?
+-----------------------------------
+
+Those which modify a file's contents, not just the inode. That gives read(),
+write(), readv(), writev(), open(), creat(), mmap(), truncate() and
+ftruncate(). truncate() and ftruncate() are considered to be "write" actions
+for the purposes of mandatory locking.
+
+The affected region is usually defined as stretching from the current position
+for the total number of bytes read or written. For the truncate calls it is
+defined as the bytes of a file removed or added (we must also consider bytes
+added, as a lock can specify just "the whole file", rather than a specific
+range of bytes.)
+
+Note 3: I may have overlooked some system calls that need mandatory lock
+checking in my eagerness to get this code out the door. Please let me know, or
+better still fix the system calls yourself and submit a patch to me or Linus.
+
+6. Warning!
+-----------
+
+Not even root can override a mandatory lock, so runaway processes can wreak
+havoc if they lock crucial files. The way around it is to change the file
+permissions (remove the setgid bit) before trying to read or write to it.
+Of course, that might be a bit tricky if the system is hung :-(
+
+7. The "mand" mount option
+--------------------------
+Mandatory locking is disabled on all filesystems by default, and must be
+administratively enabled by mounting with "-o mand". That mount option
+is only allowed if the mounting task has the CAP_SYS_ADMIN capability.
+
+Since kernel v4.5, it is possible to disable mandatory locking
+altogether by setting CONFIG_MANDATORY_FILE_LOCKING to "n". A kernel
+with this disabled will reject attempts to mount filesystems with the
+"mand" mount option with the error status EPERM.
diff --git a/Documentation/filesystems/mandatory-locking.txt b/Documentation/filesystems/mandatory-locking.txt
deleted file mode 100644
index a251ca33164a..000000000000
--- a/Documentation/filesystems/mandatory-locking.txt
+++ /dev/null
@@ -1,181 +0,0 @@
-	Mandatory File Locking For The Linux Operating System
-
-		Andy Walker <andy@lysaker.kvaerner.no>
-
-			   15 April 1996
-		     (Updated September 2007)
-
-0. Why you should avoid mandatory locking
------------------------------------------
-
-The Linux implementation is prey to a number of difficult-to-fix race
-conditions which in practice make it not dependable:
-
-	- The write system call checks for a mandatory lock only once
-	  at its start.  It is therefore possible for a lock request to
-	  be granted after this check but before the data is modified.
-	  A process may then see file data change even while a mandatory
-	  lock was held.
-	- Similarly, an exclusive lock may be granted on a file after
-	  the kernel has decided to proceed with a read, but before the
-	  read has actually completed, and the reading process may see
-	  the file data in a state which should not have been visible
-	  to it.
-	- Similar races make the claimed mutual exclusion between lock
-	  and mmap similarly unreliable.
-
-1. What is  mandatory locking?
-------------------------------
-
-Mandatory locking is kernel enforced file locking, as opposed to the more usual
-cooperative file locking used to guarantee sequential access to files among
-processes. File locks are applied using the flock() and fcntl() system calls
-(and the lockf() library routine which is a wrapper around fcntl().) It is
-normally a process' responsibility to check for locks on a file it wishes to
-update, before applying its own lock, updating the file and unlocking it again.
-The most commonly used example of this (and in the case of sendmail, the most
-troublesome) is access to a user's mailbox. The mail user agent and the mail
-transfer agent must guard against updating the mailbox at the same time, and
-prevent reading the mailbox while it is being updated.
-
-In a perfect world all processes would use and honour a cooperative, or
-"advisory" locking scheme. However, the world isn't perfect, and there's
-a lot of poorly written code out there.
-
-In trying to address this problem, the designers of System V UNIX came up
-with a "mandatory" locking scheme, whereby the operating system kernel would
-block attempts by a process to write to a file that another process holds a
-"read" -or- "shared" lock on, and block attempts to both read and write to a 
-file that a process holds a "write " -or- "exclusive" lock on.
-
-The System V mandatory locking scheme was intended to have as little impact as
-possible on existing user code. The scheme is based on marking individual files
-as candidates for mandatory locking, and using the existing fcntl()/lockf()
-interface for applying locks just as if they were normal, advisory locks.
-
-Note 1: In saying "file" in the paragraphs above I am actually not telling
-the whole truth. System V locking is based on fcntl(). The granularity of
-fcntl() is such that it allows the locking of byte ranges in files, in addition
-to entire files, so the mandatory locking rules also have byte level
-granularity.
-
-Note 2: POSIX.1 does not specify any scheme for mandatory locking, despite
-borrowing the fcntl() locking scheme from System V. The mandatory locking
-scheme is defined by the System V Interface Definition (SVID) Version 3.
-
-2. Marking a file for mandatory locking
----------------------------------------
-
-A file is marked as a candidate for mandatory locking by setting the group-id
-bit in its file mode but removing the group-execute bit. This is an otherwise
-meaningless combination, and was chosen by the System V implementors so as not
-to break existing user programs.
-
-Note that the group-id bit is usually automatically cleared by the kernel when
-a setgid file is written to. This is a security measure. The kernel has been
-modified to recognize the special case of a mandatory lock candidate and to
-refrain from clearing this bit. Similarly the kernel has been modified not
-to run mandatory lock candidates with setgid privileges.
-
-3. Available implementations
-----------------------------
-
-I have considered the implementations of mandatory locking available with
-SunOS 4.1.x, Solaris 2.x and HP-UX 9.x.
-
-Generally I have tried to make the most sense out of the behaviour exhibited
-by these three reference systems. There are many anomalies.
-
-All the reference systems reject all calls to open() for a file on which
-another process has outstanding mandatory locks. This is in direct
-contravention of SVID 3, which states that only calls to open() with the
-O_TRUNC flag set should be rejected. The Linux implementation follows the SVID
-definition, which is the "Right Thing", since only calls with O_TRUNC can
-modify the contents of the file.
-
-HP-UX even disallows open() with O_TRUNC for a file with advisory locks, not
-just mandatory locks. That would appear to contravene POSIX.1.
-
-mmap() is another interesting case. All the operating systems mentioned
-prevent mandatory locks from being applied to an mmap()'ed file, but  HP-UX
-also disallows advisory locks for such a file. SVID actually specifies the
-paranoid HP-UX behaviour.
-
-In my opinion only MAP_SHARED mappings should be immune from locking, and then
-only from mandatory locks - that is what is currently implemented.
-
-SunOS is so hopeless that it doesn't even honour the O_NONBLOCK flag for
-mandatory locks, so reads and writes to locked files always block when they
-should return EAGAIN.
-
-I'm afraid that this is such an esoteric area that the semantics described
-below are just as valid as any others, so long as the main points seem to
-agree. 
-
-4. Semantics
-------------
-
-1. Mandatory locks can only be applied via the fcntl()/lockf() locking
-   interface - in other words the System V/POSIX interface. BSD style
-   locks using flock() never result in a mandatory lock.
-
-2. If a process has locked a region of a file with a mandatory read lock, then
-   other processes are permitted to read from that region. If any of these
-   processes attempts to write to the region it will block until the lock is
-   released, unless the process has opened the file with the O_NONBLOCK
-   flag in which case the system call will return immediately with the error
-   status EAGAIN.
-
-3. If a process has locked a region of a file with a mandatory write lock, all
-   attempts to read or write to that region block until the lock is released,
-   unless a process has opened the file with the O_NONBLOCK flag in which case
-   the system call will return immediately with the error status EAGAIN.
-
-4. Calls to open() with O_TRUNC, or to creat(), on a existing file that has
-   any mandatory locks owned by other processes will be rejected with the
-   error status EAGAIN.
-
-5. Attempts to apply a mandatory lock to a file that is memory mapped and
-   shared (via mmap() with MAP_SHARED) will be rejected with the error status
-   EAGAIN.
-
-6. Attempts to create a shared memory map of a file (via mmap() with MAP_SHARED)
-   that has any mandatory locks in effect will be rejected with the error status
-   EAGAIN.
-
-5. Which system calls are affected?
------------------------------------
-
-Those which modify a file's contents, not just the inode. That gives read(),
-write(), readv(), writev(), open(), creat(), mmap(), truncate() and
-ftruncate(). truncate() and ftruncate() are considered to be "write" actions
-for the purposes of mandatory locking.
-
-The affected region is usually defined as stretching from the current position
-for the total number of bytes read or written. For the truncate calls it is
-defined as the bytes of a file removed or added (we must also consider bytes
-added, as a lock can specify just "the whole file", rather than a specific
-range of bytes.)
-
-Note 3: I may have overlooked some system calls that need mandatory lock
-checking in my eagerness to get this code out the door. Please let me know, or
-better still fix the system calls yourself and submit a patch to me or Linus.
-
-6. Warning!
------------
-
-Not even root can override a mandatory lock, so runaway processes can wreak
-havoc if they lock crucial files. The way around it is to change the file
-permissions (remove the setgid bit) before trying to read or write to it.
-Of course, that might be a bit tricky if the system is hung :-(
-
-7. The "mand" mount option
---------------------------
-Mandatory locking is disabled on all filesystems by default, and must be
-administratively enabled by mounting with "-o mand". That mount option
-is only allowed if the mounting task has the CAP_SYS_ADMIN capability.
-
-Since kernel v4.5, it is possible to disable mandatory locking
-altogether by setting CONFIG_MANDATORY_FILE_LOCKING to "n". A kernel
-with this disabled will reject attempts to mount filesystems with the
-"mand" mount option with the error status EPERM.
diff --git a/fs/locks.c b/fs/locks.c
index b8a31c1c4fff..1d4f4d5da704 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -61,7 +61,7 @@
  *
  *  Initial implementation of mandatory locks. SunOS turned out to be
  *  a rotten model, so I implemented the "obvious" semantics.
- *  See 'Documentation/filesystems/mandatory-locking.txt' for details.
+ *  See 'Documentation/filesystems/mandatory-locking.rst' for details.
  *  Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996.
  *
  *  Don't allow mandatory locks on mmap()'ed files. Added simple functions to
-- 
cgit v1.2.3


From 791a17ee19736fc6d4e35c4bf9f8efd1001be77c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:09 +0200
Subject: docs: filesystems: convert mount_api.txt to ReST

- Add a SPDX header;
- Adjust document and section titles;
- Some whitespace fixes and new line breaks;
- Mark literal blocks as such;
- Add table markups;
- Add lists markups;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/32332c1659a28c22561cb5e64162c959856066b4.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/index.rst     |   1 +
 Documentation/filesystems/mount_api.rst | 825 ++++++++++++++++++++++++++++++++
 Documentation/filesystems/mount_api.txt | 724 ----------------------------
 include/linux/fs_context.h              |   2 +-
 include/linux/lsm_hooks.h               |   2 +-
 5 files changed, 828 insertions(+), 726 deletions(-)
 create mode 100644 Documentation/filesystems/mount_api.rst
 delete mode 100644 Documentation/filesystems/mount_api.txt

diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index df1a474ad9b9..4532b4d2631f 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -30,6 +30,7 @@ algorithms work.
    files
    locks
    mandatory-locking
+   mount_api
 
    automount-support
 
diff --git a/Documentation/filesystems/mount_api.rst b/Documentation/filesystems/mount_api.rst
new file mode 100644
index 000000000000..dea22d64f060
--- /dev/null
+++ b/Documentation/filesystems/mount_api.rst
@@ -0,0 +1,825 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+fILESYSTEM Mount API
+====================
+
+.. CONTENTS
+
+ (1) Overview.
+
+ (2) The filesystem context.
+
+ (3) The filesystem context operations.
+
+ (4) Filesystem context security.
+
+ (5) VFS filesystem context API.
+
+ (6) Superblock creation helpers.
+
+ (7) Parameter description.
+
+ (8) Parameter helper functions.
+
+
+Overview
+========
+
+The creation of new mounts is now to be done in a multistep process:
+
+ (1) Create a filesystem context.
+
+ (2) Parse the parameters and attach them to the context.  Parameters are
+     expected to be passed individually from userspace, though legacy binary
+     parameters can also be handled.
+
+ (3) Validate and pre-process the context.
+
+ (4) Get or create a superblock and mountable root.
+
+ (5) Perform the mount.
+
+ (6) Return an error message attached to the context.
+
+ (7) Destroy the context.
+
+To support this, the file_system_type struct gains two new fields::
+
+	int (*init_fs_context)(struct fs_context *fc);
+	const struct fs_parameter_description *parameters;
+
+The first is invoked to set up the filesystem-specific parts of a filesystem
+context, including the additional space, and the second points to the
+parameter description for validation at registration time and querying by a
+future system call.
+
+Note that security initialisation is done *after* the filesystem is called so
+that the namespaces may be adjusted first.
+
+
+The Filesystem context
+======================
+
+The creation and reconfiguration of a superblock is governed by a filesystem
+context.  This is represented by the fs_context structure::
+
+	struct fs_context {
+		const struct fs_context_operations *ops;
+		struct file_system_type *fs_type;
+		void			*fs_private;
+		struct dentry		*root;
+		struct user_namespace	*user_ns;
+		struct net		*net_ns;
+		const struct cred	*cred;
+		char			*source;
+		char			*subtype;
+		void			*security;
+		void			*s_fs_info;
+		unsigned int		sb_flags;
+		unsigned int		sb_flags_mask;
+		unsigned int		s_iflags;
+		unsigned int		lsm_flags;
+		enum fs_context_purpose	purpose:8;
+		...
+	};
+
+The fs_context fields are as follows:
+
+   * ::
+
+       const struct fs_context_operations *ops
+
+     These are operations that can be done on a filesystem context (see
+     below).  This must be set by the ->init_fs_context() file_system_type
+     operation.
+
+   * ::
+
+       struct file_system_type *fs_type
+
+     A pointer to the file_system_type of the filesystem that is being
+     constructed or reconfigured.  This retains a reference on the type owner.
+
+   * ::
+
+       void *fs_private
+
+     A pointer to the file system's private data.  This is where the filesystem
+     will need to store any options it parses.
+
+   * ::
+
+       struct dentry *root
+
+     A pointer to the root of the mountable tree (and indirectly, the
+     superblock thereof).  This is filled in by the ->get_tree() op.  If this
+     is set, an active reference on root->d_sb must also be held.
+
+   * ::
+
+       struct user_namespace *user_ns
+       struct net *net_ns
+
+     There are a subset of the namespaces in use by the invoking process.  They
+     retain references on each namespace.  The subscribed namespaces may be
+     replaced by the filesystem to reflect other sources, such as the parent
+     mount superblock on an automount.
+
+   * ::
+
+       const struct cred *cred
+
+     The mounter's credentials.  This retains a reference on the credentials.
+
+   * ::
+
+       char *source
+
+     This specifies the source.  It may be a block device (e.g. /dev/sda1) or
+     something more exotic, such as the "host:/path" that NFS desires.
+
+   * ::
+
+       char *subtype
+
+     This is a string to be added to the type displayed in /proc/mounts to
+     qualify it (used by FUSE).  This is available for the filesystem to set if
+     desired.
+
+   * ::
+
+       void *security
+
+     A place for the LSMs to hang their security data for the superblock.  The
+     relevant security operations are described below.
+
+   * ::
+
+       void *s_fs_info
+
+     The proposed s_fs_info for a new superblock, set in the superblock by
+     sget_fc().  This can be used to distinguish superblocks.
+
+   * ::
+
+       unsigned int sb_flags
+       unsigned int sb_flags_mask
+
+     Which bits SB_* flags are to be set/cleared in super_block::s_flags.
+
+   * ::
+
+       unsigned int s_iflags
+
+     These will be bitwise-OR'd with s->s_iflags when a superblock is created.
+
+   * ::
+
+       enum fs_context_purpose
+
+     This indicates the purpose for which the context is intended.  The
+     available values are:
+
+	==========================	======================================
+	FS_CONTEXT_FOR_MOUNT,		New superblock for explicit mount
+	FS_CONTEXT_FOR_SUBMOUNT		New automatic submount of extant mount
+	FS_CONTEXT_FOR_RECONFIGURE	Change an existing mount
+	==========================	======================================
+
+The mount context is created by calling vfs_new_fs_context() or
+vfs_dup_fs_context() and is destroyed with put_fs_context().  Note that the
+structure is not refcounted.
+
+VFS, security and filesystem mount options are set individually with
+vfs_parse_mount_option().  Options provided by the old mount(2) system call as
+a page of data can be parsed with generic_parse_monolithic().
+
+When mounting, the filesystem is allowed to take data from any of the pointers
+and attach it to the superblock (or whatever), provided it clears the pointer
+in the mount context.
+
+The filesystem is also allowed to allocate resources and pin them with the
+mount context.  For instance, NFS might pin the appropriate protocol version
+module.
+
+
+The Filesystem Context Operations
+=================================
+
+The filesystem context points to a table of operations::
+
+	struct fs_context_operations {
+		void (*free)(struct fs_context *fc);
+		int (*dup)(struct fs_context *fc, struct fs_context *src_fc);
+		int (*parse_param)(struct fs_context *fc,
+				   struct struct fs_parameter *param);
+		int (*parse_monolithic)(struct fs_context *fc, void *data);
+		int (*get_tree)(struct fs_context *fc);
+		int (*reconfigure)(struct fs_context *fc);
+	};
+
+These operations are invoked by the various stages of the mount procedure to
+manage the filesystem context.  They are as follows:
+
+   * ::
+
+	void (*free)(struct fs_context *fc);
+
+     Called to clean up the filesystem-specific part of the filesystem context
+     when the context is destroyed.  It should be aware that parts of the
+     context may have been removed and NULL'd out by ->get_tree().
+
+   * ::
+
+	int (*dup)(struct fs_context *fc, struct fs_context *src_fc);
+
+     Called when a filesystem context has been duplicated to duplicate the
+     filesystem-private data.  An error may be returned to indicate failure to
+     do this.
+
+     .. Warning::
+
+         Note that even if this fails, put_fs_context() will be called
+	 immediately thereafter, so ->dup() *must* make the
+	 filesystem-private data safe for ->free().
+
+   * ::
+
+	int (*parse_param)(struct fs_context *fc,
+			   struct struct fs_parameter *param);
+
+     Called when a parameter is being added to the filesystem context.  param
+     points to the key name and maybe a value object.  VFS-specific options
+     will have been weeded out and fc->sb_flags updated in the context.
+     Security options will also have been weeded out and fc->security updated.
+
+     The parameter can be parsed with fs_parse() and fs_lookup_param().  Note
+     that the source(s) are presented as parameters named "source".
+
+     If successful, 0 should be returned or a negative error code otherwise.
+
+   * ::
+
+	int (*parse_monolithic)(struct fs_context *fc, void *data);
+
+     Called when the mount(2) system call is invoked to pass the entire data
+     page in one go.  If this is expected to be just a list of "key[=val]"
+     items separated by commas, then this may be set to NULL.
+
+     The return value is as for ->parse_param().
+
+     If the filesystem (e.g. NFS) needs to examine the data first and then
+     finds it's the standard key-val list then it may pass it off to
+     generic_parse_monolithic().
+
+   * ::
+
+	int (*get_tree)(struct fs_context *fc);
+
+     Called to get or create the mountable root and superblock, using the
+     information stored in the filesystem context (reconfiguration goes via a
+     different vector).  It may detach any resources it desires from the
+     filesystem context and transfer them to the superblock it creates.
+
+     On success it should set fc->root to the mountable root and return 0.  In
+     the case of an error, it should return a negative error code.
+
+     The phase on a userspace-driven context will be set to only allow this to
+     be called once on any particular context.
+
+   * ::
+
+	int (*reconfigure)(struct fs_context *fc);
+
+     Called to effect reconfiguration of a superblock using information stored
+     in the filesystem context.  It may detach any resources it desires from
+     the filesystem context and transfer them to the superblock.  The
+     superblock can be found from fc->root->d_sb.
+
+     On success it should return 0.  In the case of an error, it should return
+     a negative error code.
+
+     .. Note:: reconfigure is intended as a replacement for remount_fs.
+
+
+Filesystem context Security
+===========================
+
+The filesystem context contains a security pointer that the LSMs can use for
+building up a security context for the superblock to be mounted.  There are a
+number of operations used by the new mount code for this purpose:
+
+   * ::
+
+	int security_fs_context_alloc(struct fs_context *fc,
+				      struct dentry *reference);
+
+     Called to initialise fc->security (which is preset to NULL) and allocate
+     any resources needed.  It should return 0 on success or a negative error
+     code on failure.
+
+     reference will be non-NULL if the context is being created for superblock
+     reconfiguration (FS_CONTEXT_FOR_RECONFIGURE) in which case it indicates
+     the root dentry of the superblock to be reconfigured.  It will also be
+     non-NULL in the case of a submount (FS_CONTEXT_FOR_SUBMOUNT) in which case
+     it indicates the automount point.
+
+   * ::
+
+	int security_fs_context_dup(struct fs_context *fc,
+				    struct fs_context *src_fc);
+
+     Called to initialise fc->security (which is preset to NULL) and allocate
+     any resources needed.  The original filesystem context is pointed to by
+     src_fc and may be used for reference.  It should return 0 on success or a
+     negative error code on failure.
+
+   * ::
+
+	void security_fs_context_free(struct fs_context *fc);
+
+     Called to clean up anything attached to fc->security.  Note that the
+     contents may have been transferred to a superblock and the pointer cleared
+     during get_tree.
+
+   * ::
+
+	int security_fs_context_parse_param(struct fs_context *fc,
+					    struct fs_parameter *param);
+
+     Called for each mount parameter, including the source.  The arguments are
+     as for the ->parse_param() method.  It should return 0 to indicate that
+     the parameter should be passed on to the filesystem, 1 to indicate that
+     the parameter should be discarded or an error to indicate that the
+     parameter should be rejected.
+
+     The value pointed to by param may be modified (if a string) or stolen
+     (provided the value pointer is NULL'd out).  If it is stolen, 1 must be
+     returned to prevent it being passed to the filesystem.
+
+   * ::
+
+	int security_fs_context_validate(struct fs_context *fc);
+
+     Called after all the options have been parsed to validate the collection
+     as a whole and to do any necessary allocation so that
+     security_sb_get_tree() and security_sb_reconfigure() are less likely to
+     fail.  It should return 0 or a negative error code.
+
+     In the case of reconfiguration, the target superblock will be accessible
+     via fc->root.
+
+   * ::
+
+	int security_sb_get_tree(struct fs_context *fc);
+
+     Called during the mount procedure to verify that the specified superblock
+     is allowed to be mounted and to transfer the security data there.  It
+     should return 0 or a negative error code.
+
+   * ::
+
+	void security_sb_reconfigure(struct fs_context *fc);
+
+     Called to apply any reconfiguration to an LSM's context.  It must not
+     fail.  Error checking and resource allocation must be done in advance by
+     the parameter parsing and validation hooks.
+
+   * ::
+
+	int security_sb_mountpoint(struct fs_context *fc,
+			           struct path *mountpoint,
+				   unsigned int mnt_flags);
+
+     Called during the mount procedure to verify that the root dentry attached
+     to the context is permitted to be attached to the specified mountpoint.
+     It should return 0 on success or a negative error code on failure.
+
+
+VFS Filesystem context API
+==========================
+
+There are four operations for creating a filesystem context and one for
+destroying a context:
+
+   * ::
+
+       struct fs_context *fs_context_for_mount(struct file_system_type *fs_type,
+					       unsigned int sb_flags);
+
+     Allocate a filesystem context for the purpose of setting up a new mount,
+     whether that be with a new superblock or sharing an existing one.  This
+     sets the superblock flags, initialises the security and calls
+     fs_type->init_fs_context() to initialise the filesystem private data.
+
+     fs_type specifies the filesystem type that will manage the context and
+     sb_flags presets the superblock flags stored therein.
+
+   * ::
+
+       struct fs_context *fs_context_for_reconfigure(
+		struct dentry *dentry,
+		unsigned int sb_flags,
+		unsigned int sb_flags_mask);
+
+     Allocate a filesystem context for the purpose of reconfiguring an
+     existing superblock.  dentry provides a reference to the superblock to be
+     configured.  sb_flags and sb_flags_mask indicate which superblock flags
+     need changing and to what.
+
+   * ::
+
+       struct fs_context *fs_context_for_submount(
+		struct file_system_type *fs_type,
+		struct dentry *reference);
+
+     Allocate a filesystem context for the purpose of creating a new mount for
+     an automount point or other derived superblock.  fs_type specifies the
+     filesystem type that will manage the context and the reference dentry
+     supplies the parameters.  Namespaces are propagated from the reference
+     dentry's superblock also.
+
+     Note that it's not a requirement that the reference dentry be of the same
+     filesystem type as fs_type.
+
+   * ::
+
+        struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc);
+
+     Duplicate a filesystem context, copying any options noted and duplicating
+     or additionally referencing any resources held therein.  This is available
+     for use where a filesystem has to get a mount within a mount, such as NFS4
+     does by internally mounting the root of the target server and then doing a
+     private pathwalk to the target directory.
+
+     The purpose in the new context is inherited from the old one.
+
+   * ::
+
+       void put_fs_context(struct fs_context *fc);
+
+     Destroy a filesystem context, releasing any resources it holds.  This
+     calls the ->free() operation.  This is intended to be called by anyone who
+     created a filesystem context.
+
+     .. Warning::
+
+        filesystem contexts are not refcounted, so this causes unconditional
+	destruction.
+
+In all the above operations, apart from the put op, the return is a mount
+context pointer or a negative error code.
+
+For the remaining operations, if an error occurs, a negative error code will be
+returned.
+
+   * ::
+
+        int vfs_parse_fs_param(struct fs_context *fc,
+			       struct fs_parameter *param);
+
+     Supply a single mount parameter to the filesystem context.  This include
+     the specification of the source/device which is specified as the "source"
+     parameter (which may be specified multiple times if the filesystem
+     supports that).
+
+     param specifies the parameter key name and the value.  The parameter is
+     first checked to see if it corresponds to a standard mount flag (in which
+     case it is used to set an SB_xxx flag and consumed) or a security option
+     (in which case the LSM consumes it) before it is passed on to the
+     filesystem.
+
+     The parameter value is typed and can be one of:
+
+	====================		=============================
+	fs_value_is_flag		Parameter not given a value
+	fs_value_is_string		Value is a string
+	fs_value_is_blob		Value is a binary blob
+	fs_value_is_filename		Value is a filename* + dirfd
+	fs_value_is_file		Value is an open file (file*)
+	====================		=============================
+
+     If there is a value, that value is stored in a union in the struct in one
+     of param->{string,blob,name,file}.  Note that the function may steal and
+     clear the pointer, but then becomes responsible for disposing of the
+     object.
+
+   * ::
+
+       int vfs_parse_fs_string(struct fs_context *fc, const char *key,
+			       const char *value, size_t v_size);
+
+     A wrapper around vfs_parse_fs_param() that copies the value string it is
+     passed.
+
+   * ::
+
+       int generic_parse_monolithic(struct fs_context *fc, void *data);
+
+     Parse a sys_mount() data page, assuming the form to be a text list
+     consisting of key[=val] options separated by commas.  Each item in the
+     list is passed to vfs_mount_option().  This is the default when the
+     ->parse_monolithic() method is NULL.
+
+   * ::
+
+       int vfs_get_tree(struct fs_context *fc);
+
+     Get or create the mountable root and superblock, using the parameters in
+     the filesystem context to select/configure the superblock.  This invokes
+     the ->get_tree() method.
+
+   * ::
+
+       struct vfsmount *vfs_create_mount(struct fs_context *fc);
+
+     Create a mount given the parameters in the specified filesystem context.
+     Note that this does not attach the mount to anything.
+
+
+Superblock Creation Helpers
+===========================
+
+A number of VFS helpers are available for use by filesystems for the creation
+or looking up of superblocks.
+
+   * ::
+
+       struct super_block *
+       sget_fc(struct fs_context *fc,
+	       int (*test)(struct super_block *sb, struct fs_context *fc),
+	       int (*set)(struct super_block *sb, struct fs_context *fc));
+
+     This is the core routine.  If test is non-NULL, it searches for an
+     existing superblock matching the criteria held in the fs_context, using
+     the test function to match them.  If no match is found, a new superblock
+     is created and the set function is called to set it up.
+
+     Prior to the set function being called, fc->s_fs_info will be transferred
+     to sb->s_fs_info - and fc->s_fs_info will be cleared if set returns
+     success (ie. 0).
+
+The following helpers all wrap sget_fc():
+
+   * ::
+
+       int vfs_get_super(struct fs_context *fc,
+		         enum vfs_get_super_keying keying,
+		         int (*fill_super)(struct super_block *sb,
+					   struct fs_context *fc))
+
+     This creates/looks up a deviceless superblock.  The keying indicates how
+     many superblocks of this type may exist and in what manner they may be
+     shared:
+
+	(1) vfs_get_single_super
+
+	    Only one such superblock may exist in the system.  Any further
+	    attempt to get a new superblock gets this one (and any parameter
+	    differences are ignored).
+
+	(2) vfs_get_keyed_super
+
+	    Multiple superblocks of this type may exist and they're keyed on
+	    their s_fs_info pointer (for example this may refer to a
+	    namespace).
+
+	(3) vfs_get_independent_super
+
+	    Multiple independent superblocks of this type may exist.  This
+	    function never matches an existing one and always creates a new
+	    one.
+
+
+=====================
+PARAMETER DESCRIPTION
+=====================
+
+Parameters are described using structures defined in linux/fs_parser.h.
+There's a core description struct that links everything together::
+
+	struct fs_parameter_description {
+		const struct fs_parameter_spec *specs;
+		const struct fs_parameter_enum *enums;
+	};
+
+For example::
+
+	enum {
+		Opt_autocell,
+		Opt_bar,
+		Opt_dyn,
+		Opt_foo,
+		Opt_source,
+	};
+
+	static const struct fs_parameter_description afs_fs_parameters = {
+		.specs		= afs_param_specs,
+		.enums		= afs_param_enums,
+	};
+
+The members are as follows:
+
+ (1) ::
+
+       const struct fs_parameter_specification *specs;
+
+     Table of parameter specifications, terminated with a null entry, where the
+     entries are of type::
+
+	struct fs_parameter_spec {
+		const char		*name;
+		u8			opt;
+		enum fs_parameter_type	type:8;
+		unsigned short		flags;
+	};
+
+     The 'name' field is a string to match exactly to the parameter key (no
+     wildcards, patterns and no case-independence) and 'opt' is the value that
+     will be returned by the fs_parser() function in the case of a successful
+     match.
+
+     The 'type' field indicates the desired value type and must be one of:
+
+	=======================	=======================	=====================
+	TYPE NAME		EXPECTED VALUE		RESULT IN
+	=======================	=======================	=====================
+	fs_param_is_flag	No value		n/a
+	fs_param_is_bool	Boolean value		result->boolean
+	fs_param_is_u32		32-bit unsigned int	result->uint_32
+	fs_param_is_u32_octal	32-bit octal int	result->uint_32
+	fs_param_is_u32_hex	32-bit hex int		result->uint_32
+	fs_param_is_s32		32-bit signed int	result->int_32
+	fs_param_is_u64		64-bit unsigned int	result->uint_64
+	fs_param_is_enum	Enum value name 	result->uint_32
+	fs_param_is_string	Arbitrary string	param->string
+	fs_param_is_blob	Binary blob		param->blob
+	fs_param_is_blockdev	Blockdev path		* Needs lookup
+	fs_param_is_path	Path			* Needs lookup
+	fs_param_is_fd		File descriptor		result->int_32
+	=======================	=======================	=====================
+
+     Note that if the value is of fs_param_is_bool type, fs_parse() will try
+     to match any string value against "0", "1", "no", "yes", "false", "true".
+
+     Each parameter can also be qualified with 'flags':
+
+	=======================	================================================
+	fs_param_v_optional	The value is optional
+	fs_param_neg_with_no	result->negated set if key is prefixed with "no"
+	fs_param_neg_with_empty	result->negated set if value is ""
+	fs_param_deprecated	The parameter is deprecated.
+	=======================	================================================
+
+     These are wrapped with a number of convenience wrappers:
+
+	=======================	===============================================
+	MACRO			SPECIFIES
+	=======================	===============================================
+	fsparam_flag()		fs_param_is_flag
+	fsparam_flag_no()	fs_param_is_flag, fs_param_neg_with_no
+	fsparam_bool()		fs_param_is_bool
+	fsparam_u32()		fs_param_is_u32
+	fsparam_u32oct()	fs_param_is_u32_octal
+	fsparam_u32hex()	fs_param_is_u32_hex
+	fsparam_s32()		fs_param_is_s32
+	fsparam_u64()		fs_param_is_u64
+	fsparam_enum()		fs_param_is_enum
+	fsparam_string()	fs_param_is_string
+	fsparam_blob()		fs_param_is_blob
+	fsparam_bdev()		fs_param_is_blockdev
+	fsparam_path()		fs_param_is_path
+	fsparam_fd()		fs_param_is_fd
+	=======================	===============================================
+
+     all of which take two arguments, name string and option number - for
+     example::
+
+	static const struct fs_parameter_spec afs_param_specs[] = {
+		fsparam_flag	("autocell",	Opt_autocell),
+		fsparam_flag	("dyn",		Opt_dyn),
+		fsparam_string	("source",	Opt_source),
+		fsparam_flag_no	("foo",		Opt_foo),
+		{}
+	};
+
+     An addition macro, __fsparam() is provided that takes an additional pair
+     of arguments to specify the type and the flags for anything that doesn't
+     match one of the above macros.
+
+ (2) ::
+
+       const struct fs_parameter_enum *enums;
+
+     Table of enum value names to integer mappings, terminated with a null
+     entry.  This is of type::
+
+	struct fs_parameter_enum {
+		u8		opt;
+		char		name[14];
+		u8		value;
+	};
+
+     Where the array is an unsorted list of { parameter ID, name }-keyed
+     elements that indicate the value to map to, e.g.::
+
+	static const struct fs_parameter_enum afs_param_enums[] = {
+		{ Opt_bar,   "x",      1},
+		{ Opt_bar,   "y",      23},
+		{ Opt_bar,   "z",      42},
+	};
+
+     If a parameter of type fs_param_is_enum is encountered, fs_parse() will
+     try to look the value up in the enum table and the result will be stored
+     in the parse result.
+
+The parser should be pointed to by the parser pointer in the file_system_type
+struct as this will provide validation on registration (if
+CONFIG_VALIDATE_FS_PARSER=y) and will allow the description to be queried from
+userspace using the fsinfo() syscall.
+
+
+Parameter Helper Functions
+==========================
+
+A number of helper functions are provided to help a filesystem or an LSM
+process the parameters it is given.
+
+   * ::
+
+       int lookup_constant(const struct constant_table tbl[],
+			   const char *name, int not_found);
+
+     Look up a constant by name in a table of name -> integer mappings.  The
+     table is an array of elements of the following type::
+
+	struct constant_table {
+		const char	*name;
+		int		value;
+	};
+
+     If a match is found, the corresponding value is returned.  If a match
+     isn't found, the not_found value is returned instead.
+
+   * ::
+
+       bool validate_constant_table(const struct constant_table *tbl,
+				    size_t tbl_size,
+				    int low, int high, int special);
+
+     Validate a constant table.  Checks that all the elements are appropriately
+     ordered, that there are no duplicates and that the values are between low
+     and high inclusive, though provision is made for one allowable special
+     value outside of that range.  If no special value is required, special
+     should just be set to lie inside the low-to-high range.
+
+     If all is good, true is returned.  If the table is invalid, errors are
+     logged to dmesg and false is returned.
+
+   * ::
+
+       bool fs_validate_description(const struct fs_parameter_description *desc);
+
+     This performs some validation checks on a parameter description.  It
+     returns true if the description is good and false if it is not.  It will
+     log errors to dmesg if validation fails.
+
+   * ::
+
+        int fs_parse(struct fs_context *fc,
+		     const struct fs_parameter_description *desc,
+		     struct fs_parameter *param,
+		     struct fs_parse_result *result);
+
+     This is the main interpreter of parameters.  It uses the parameter
+     description to look up a parameter by key name and to convert that to an
+     option number (which it returns).
+
+     If successful, and if the parameter type indicates the result is a
+     boolean, integer or enum type, the value is converted by this function and
+     the result stored in result->{boolean,int_32,uint_32,uint_64}.
+
+     If a match isn't initially made, the key is prefixed with "no" and no
+     value is present then an attempt will be made to look up the key with the
+     prefix removed.  If this matches a parameter for which the type has flag
+     fs_param_neg_with_no set, then a match will be made and result->negated
+     will be set to true.
+
+     If the parameter isn't matched, -ENOPARAM will be returned; if the
+     parameter is matched, but the value is erroneous, -EINVAL will be
+     returned; otherwise the parameter's option number will be returned.
+
+   * ::
+
+       int fs_lookup_param(struct fs_context *fc,
+			   struct fs_parameter *value,
+			   bool want_bdev,
+			   struct path *_path);
+
+     This takes a parameter that carries a string or filename type and attempts
+     to do a path lookup on it.  If the parameter expects a blockdev, a check
+     is made that the inode actually represents one.
+
+     Returns 0 if successful and ``*_path`` will be set; returns a negative
+     error code if not.
diff --git a/Documentation/filesystems/mount_api.txt b/Documentation/filesystems/mount_api.txt
deleted file mode 100644
index 87c14bbb2b35..000000000000
--- a/Documentation/filesystems/mount_api.txt
+++ /dev/null
@@ -1,724 +0,0 @@
-			     ====================
-			     FILESYSTEM MOUNT API
-			     ====================
-
-CONTENTS
-
- (1) Overview.
-
- (2) The filesystem context.
-
- (3) The filesystem context operations.
-
- (4) Filesystem context security.
-
- (5) VFS filesystem context API.
-
- (6) Superblock creation helpers.
-
- (7) Parameter description.
-
- (8) Parameter helper functions.
-
-
-========
-OVERVIEW
-========
-
-The creation of new mounts is now to be done in a multistep process:
-
- (1) Create a filesystem context.
-
- (2) Parse the parameters and attach them to the context.  Parameters are
-     expected to be passed individually from userspace, though legacy binary
-     parameters can also be handled.
-
- (3) Validate and pre-process the context.
-
- (4) Get or create a superblock and mountable root.
-
- (5) Perform the mount.
-
- (6) Return an error message attached to the context.
-
- (7) Destroy the context.
-
-To support this, the file_system_type struct gains two new fields:
-
-	int (*init_fs_context)(struct fs_context *fc);
-	const struct fs_parameter_description *parameters;
-
-The first is invoked to set up the filesystem-specific parts of a filesystem
-context, including the additional space, and the second points to the
-parameter description for validation at registration time and querying by a
-future system call.
-
-Note that security initialisation is done *after* the filesystem is called so
-that the namespaces may be adjusted first.
-
-
-======================
-THE FILESYSTEM CONTEXT
-======================
-
-The creation and reconfiguration of a superblock is governed by a filesystem
-context.  This is represented by the fs_context structure:
-
-	struct fs_context {
-		const struct fs_context_operations *ops;
-		struct file_system_type *fs_type;
-		void			*fs_private;
-		struct dentry		*root;
-		struct user_namespace	*user_ns;
-		struct net		*net_ns;
-		const struct cred	*cred;
-		char			*source;
-		char			*subtype;
-		void			*security;
-		void			*s_fs_info;
-		unsigned int		sb_flags;
-		unsigned int		sb_flags_mask;
-		unsigned int		s_iflags;
-		unsigned int		lsm_flags;
-		enum fs_context_purpose	purpose:8;
-		...
-	};
-
-The fs_context fields are as follows:
-
- (*) const struct fs_context_operations *ops
-
-     These are operations that can be done on a filesystem context (see
-     below).  This must be set by the ->init_fs_context() file_system_type
-     operation.
-
- (*) struct file_system_type *fs_type
-
-     A pointer to the file_system_type of the filesystem that is being
-     constructed or reconfigured.  This retains a reference on the type owner.
-
- (*) void *fs_private
-
-     A pointer to the file system's private data.  This is where the filesystem
-     will need to store any options it parses.
-
- (*) struct dentry *root
-
-     A pointer to the root of the mountable tree (and indirectly, the
-     superblock thereof).  This is filled in by the ->get_tree() op.  If this
-     is set, an active reference on root->d_sb must also be held.
-
- (*) struct user_namespace *user_ns
- (*) struct net *net_ns
-
-     There are a subset of the namespaces in use by the invoking process.  They
-     retain references on each namespace.  The subscribed namespaces may be
-     replaced by the filesystem to reflect other sources, such as the parent
-     mount superblock on an automount.
-
- (*) const struct cred *cred
-
-     The mounter's credentials.  This retains a reference on the credentials.
-
- (*) char *source
-
-     This specifies the source.  It may be a block device (e.g. /dev/sda1) or
-     something more exotic, such as the "host:/path" that NFS desires.
-
- (*) char *subtype
-
-     This is a string to be added to the type displayed in /proc/mounts to
-     qualify it (used by FUSE).  This is available for the filesystem to set if
-     desired.
-
- (*) void *security
-
-     A place for the LSMs to hang their security data for the superblock.  The
-     relevant security operations are described below.
-
- (*) void *s_fs_info
-
-     The proposed s_fs_info for a new superblock, set in the superblock by
-     sget_fc().  This can be used to distinguish superblocks.
-
- (*) unsigned int sb_flags
- (*) unsigned int sb_flags_mask
-
-     Which bits SB_* flags are to be set/cleared in super_block::s_flags.
-
- (*) unsigned int s_iflags
-
-     These will be bitwise-OR'd with s->s_iflags when a superblock is created.
-
- (*) enum fs_context_purpose
-
-     This indicates the purpose for which the context is intended.  The
-     available values are:
-
-	FS_CONTEXT_FOR_MOUNT,		-- New superblock for explicit mount
-	FS_CONTEXT_FOR_SUBMOUNT		-- New automatic submount of extant mount
-	FS_CONTEXT_FOR_RECONFIGURE	-- Change an existing mount
-
-The mount context is created by calling vfs_new_fs_context() or
-vfs_dup_fs_context() and is destroyed with put_fs_context().  Note that the
-structure is not refcounted.
-
-VFS, security and filesystem mount options are set individually with
-vfs_parse_mount_option().  Options provided by the old mount(2) system call as
-a page of data can be parsed with generic_parse_monolithic().
-
-When mounting, the filesystem is allowed to take data from any of the pointers
-and attach it to the superblock (or whatever), provided it clears the pointer
-in the mount context.
-
-The filesystem is also allowed to allocate resources and pin them with the
-mount context.  For instance, NFS might pin the appropriate protocol version
-module.
-
-
-=================================
-THE FILESYSTEM CONTEXT OPERATIONS
-=================================
-
-The filesystem context points to a table of operations:
-
-	struct fs_context_operations {
-		void (*free)(struct fs_context *fc);
-		int (*dup)(struct fs_context *fc, struct fs_context *src_fc);
-		int (*parse_param)(struct fs_context *fc,
-				   struct struct fs_parameter *param);
-		int (*parse_monolithic)(struct fs_context *fc, void *data);
-		int (*get_tree)(struct fs_context *fc);
-		int (*reconfigure)(struct fs_context *fc);
-	};
-
-These operations are invoked by the various stages of the mount procedure to
-manage the filesystem context.  They are as follows:
-
- (*) void (*free)(struct fs_context *fc);
-
-     Called to clean up the filesystem-specific part of the filesystem context
-     when the context is destroyed.  It should be aware that parts of the
-     context may have been removed and NULL'd out by ->get_tree().
-
- (*) int (*dup)(struct fs_context *fc, struct fs_context *src_fc);
-
-     Called when a filesystem context has been duplicated to duplicate the
-     filesystem-private data.  An error may be returned to indicate failure to
-     do this.
-
-     [!] Note that even if this fails, put_fs_context() will be called
-	 immediately thereafter, so ->dup() *must* make the
-	 filesystem-private data safe for ->free().
-
- (*) int (*parse_param)(struct fs_context *fc,
-			struct struct fs_parameter *param);
-
-     Called when a parameter is being added to the filesystem context.  param
-     points to the key name and maybe a value object.  VFS-specific options
-     will have been weeded out and fc->sb_flags updated in the context.
-     Security options will also have been weeded out and fc->security updated.
-
-     The parameter can be parsed with fs_parse() and fs_lookup_param().  Note
-     that the source(s) are presented as parameters named "source".
-
-     If successful, 0 should be returned or a negative error code otherwise.
-
- (*) int (*parse_monolithic)(struct fs_context *fc, void *data);
-
-     Called when the mount(2) system call is invoked to pass the entire data
-     page in one go.  If this is expected to be just a list of "key[=val]"
-     items separated by commas, then this may be set to NULL.
-
-     The return value is as for ->parse_param().
-
-     If the filesystem (e.g. NFS) needs to examine the data first and then
-     finds it's the standard key-val list then it may pass it off to
-     generic_parse_monolithic().
-
- (*) int (*get_tree)(struct fs_context *fc);
-
-     Called to get or create the mountable root and superblock, using the
-     information stored in the filesystem context (reconfiguration goes via a
-     different vector).  It may detach any resources it desires from the
-     filesystem context and transfer them to the superblock it creates.
-
-     On success it should set fc->root to the mountable root and return 0.  In
-     the case of an error, it should return a negative error code.
-
-     The phase on a userspace-driven context will be set to only allow this to
-     be called once on any particular context.
-
- (*) int (*reconfigure)(struct fs_context *fc);
-
-     Called to effect reconfiguration of a superblock using information stored
-     in the filesystem context.  It may detach any resources it desires from
-     the filesystem context and transfer them to the superblock.  The
-     superblock can be found from fc->root->d_sb.
-
-     On success it should return 0.  In the case of an error, it should return
-     a negative error code.
-
-     [NOTE] reconfigure is intended as a replacement for remount_fs.
-
-
-===========================
-FILESYSTEM CONTEXT SECURITY
-===========================
-
-The filesystem context contains a security pointer that the LSMs can use for
-building up a security context for the superblock to be mounted.  There are a
-number of operations used by the new mount code for this purpose:
-
- (*) int security_fs_context_alloc(struct fs_context *fc,
-				   struct dentry *reference);
-
-     Called to initialise fc->security (which is preset to NULL) and allocate
-     any resources needed.  It should return 0 on success or a negative error
-     code on failure.
-
-     reference will be non-NULL if the context is being created for superblock
-     reconfiguration (FS_CONTEXT_FOR_RECONFIGURE) in which case it indicates
-     the root dentry of the superblock to be reconfigured.  It will also be
-     non-NULL in the case of a submount (FS_CONTEXT_FOR_SUBMOUNT) in which case
-     it indicates the automount point.
-
- (*) int security_fs_context_dup(struct fs_context *fc,
-				 struct fs_context *src_fc);
-
-     Called to initialise fc->security (which is preset to NULL) and allocate
-     any resources needed.  The original filesystem context is pointed to by
-     src_fc and may be used for reference.  It should return 0 on success or a
-     negative error code on failure.
-
- (*) void security_fs_context_free(struct fs_context *fc);
-
-     Called to clean up anything attached to fc->security.  Note that the
-     contents may have been transferred to a superblock and the pointer cleared
-     during get_tree.
-
- (*) int security_fs_context_parse_param(struct fs_context *fc,
-					 struct fs_parameter *param);
-
-     Called for each mount parameter, including the source.  The arguments are
-     as for the ->parse_param() method.  It should return 0 to indicate that
-     the parameter should be passed on to the filesystem, 1 to indicate that
-     the parameter should be discarded or an error to indicate that the
-     parameter should be rejected.
-
-     The value pointed to by param may be modified (if a string) or stolen
-     (provided the value pointer is NULL'd out).  If it is stolen, 1 must be
-     returned to prevent it being passed to the filesystem.
-
- (*) int security_fs_context_validate(struct fs_context *fc);
-
-     Called after all the options have been parsed to validate the collection
-     as a whole and to do any necessary allocation so that
-     security_sb_get_tree() and security_sb_reconfigure() are less likely to
-     fail.  It should return 0 or a negative error code.
-
-     In the case of reconfiguration, the target superblock will be accessible
-     via fc->root.
-
- (*) int security_sb_get_tree(struct fs_context *fc);
-
-     Called during the mount procedure to verify that the specified superblock
-     is allowed to be mounted and to transfer the security data there.  It
-     should return 0 or a negative error code.
-
- (*) void security_sb_reconfigure(struct fs_context *fc);
-
-     Called to apply any reconfiguration to an LSM's context.  It must not
-     fail.  Error checking and resource allocation must be done in advance by
-     the parameter parsing and validation hooks.
-
- (*) int security_sb_mountpoint(struct fs_context *fc, struct path *mountpoint,
-				unsigned int mnt_flags);
-
-     Called during the mount procedure to verify that the root dentry attached
-     to the context is permitted to be attached to the specified mountpoint.
-     It should return 0 on success or a negative error code on failure.
-
-
-==========================
-VFS FILESYSTEM CONTEXT API
-==========================
-
-There are four operations for creating a filesystem context and one for
-destroying a context:
-
- (*) struct fs_context *fs_context_for_mount(
-		struct file_system_type *fs_type,
-		unsigned int sb_flags);
-
-     Allocate a filesystem context for the purpose of setting up a new mount,
-     whether that be with a new superblock or sharing an existing one.  This
-     sets the superblock flags, initialises the security and calls
-     fs_type->init_fs_context() to initialise the filesystem private data.
-
-     fs_type specifies the filesystem type that will manage the context and
-     sb_flags presets the superblock flags stored therein.
-
- (*) struct fs_context *fs_context_for_reconfigure(
-		struct dentry *dentry,
-		unsigned int sb_flags,
-		unsigned int sb_flags_mask);
-
-     Allocate a filesystem context for the purpose of reconfiguring an
-     existing superblock.  dentry provides a reference to the superblock to be
-     configured.  sb_flags and sb_flags_mask indicate which superblock flags
-     need changing and to what.
-
- (*) struct fs_context *fs_context_for_submount(
-		struct file_system_type *fs_type,
-		struct dentry *reference);
-
-     Allocate a filesystem context for the purpose of creating a new mount for
-     an automount point or other derived superblock.  fs_type specifies the
-     filesystem type that will manage the context and the reference dentry
-     supplies the parameters.  Namespaces are propagated from the reference
-     dentry's superblock also.
-
-     Note that it's not a requirement that the reference dentry be of the same
-     filesystem type as fs_type.
-
- (*) struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc);
-
-     Duplicate a filesystem context, copying any options noted and duplicating
-     or additionally referencing any resources held therein.  This is available
-     for use where a filesystem has to get a mount within a mount, such as NFS4
-     does by internally mounting the root of the target server and then doing a
-     private pathwalk to the target directory.
-
-     The purpose in the new context is inherited from the old one.
-
- (*) void put_fs_context(struct fs_context *fc);
-
-     Destroy a filesystem context, releasing any resources it holds.  This
-     calls the ->free() operation.  This is intended to be called by anyone who
-     created a filesystem context.
-
-     [!] filesystem contexts are not refcounted, so this causes unconditional
-	 destruction.
-
-In all the above operations, apart from the put op, the return is a mount
-context pointer or a negative error code.
-
-For the remaining operations, if an error occurs, a negative error code will be
-returned.
-
- (*) int vfs_parse_fs_param(struct fs_context *fc,
-			    struct fs_parameter *param);
-
-     Supply a single mount parameter to the filesystem context.  This include
-     the specification of the source/device which is specified as the "source"
-     parameter (which may be specified multiple times if the filesystem
-     supports that).
-
-     param specifies the parameter key name and the value.  The parameter is
-     first checked to see if it corresponds to a standard mount flag (in which
-     case it is used to set an SB_xxx flag and consumed) or a security option
-     (in which case the LSM consumes it) before it is passed on to the
-     filesystem.
-
-     The parameter value is typed and can be one of:
-
-	fs_value_is_flag,		Parameter not given a value.
-	fs_value_is_string,		Value is a string
-	fs_value_is_blob,		Value is a binary blob
-	fs_value_is_filename,		Value is a filename* + dirfd
-	fs_value_is_file,		Value is an open file (file*)
-
-     If there is a value, that value is stored in a union in the struct in one
-     of param->{string,blob,name,file}.  Note that the function may steal and
-     clear the pointer, but then becomes responsible for disposing of the
-     object.
-
- (*) int vfs_parse_fs_string(struct fs_context *fc, const char *key,
-			     const char *value, size_t v_size);
-
-     A wrapper around vfs_parse_fs_param() that copies the value string it is
-     passed.
-
- (*) int generic_parse_monolithic(struct fs_context *fc, void *data);
-
-     Parse a sys_mount() data page, assuming the form to be a text list
-     consisting of key[=val] options separated by commas.  Each item in the
-     list is passed to vfs_mount_option().  This is the default when the
-     ->parse_monolithic() method is NULL.
-
- (*) int vfs_get_tree(struct fs_context *fc);
-
-     Get or create the mountable root and superblock, using the parameters in
-     the filesystem context to select/configure the superblock.  This invokes
-     the ->get_tree() method.
-
- (*) struct vfsmount *vfs_create_mount(struct fs_context *fc);
-
-     Create a mount given the parameters in the specified filesystem context.
-     Note that this does not attach the mount to anything.
-
-
-===========================
-SUPERBLOCK CREATION HELPERS
-===========================
-
-A number of VFS helpers are available for use by filesystems for the creation
-or looking up of superblocks.
-
- (*) struct super_block *
-     sget_fc(struct fs_context *fc,
-	     int (*test)(struct super_block *sb, struct fs_context *fc),
-	     int (*set)(struct super_block *sb, struct fs_context *fc));
-
-     This is the core routine.  If test is non-NULL, it searches for an
-     existing superblock matching the criteria held in the fs_context, using
-     the test function to match them.  If no match is found, a new superblock
-     is created and the set function is called to set it up.
-
-     Prior to the set function being called, fc->s_fs_info will be transferred
-     to sb->s_fs_info - and fc->s_fs_info will be cleared if set returns
-     success (ie. 0).
-
-The following helpers all wrap sget_fc():
-
- (*) int vfs_get_super(struct fs_context *fc,
-		       enum vfs_get_super_keying keying,
-		       int (*fill_super)(struct super_block *sb,
-					 struct fs_context *fc))
-
-     This creates/looks up a deviceless superblock.  The keying indicates how
-     many superblocks of this type may exist and in what manner they may be
-     shared:
-
-	(1) vfs_get_single_super
-
-	    Only one such superblock may exist in the system.  Any further
-	    attempt to get a new superblock gets this one (and any parameter
-	    differences are ignored).
-
-	(2) vfs_get_keyed_super
-
-	    Multiple superblocks of this type may exist and they're keyed on
-	    their s_fs_info pointer (for example this may refer to a
-	    namespace).
-
-	(3) vfs_get_independent_super
-
-	    Multiple independent superblocks of this type may exist.  This
-	    function never matches an existing one and always creates a new
-	    one.
-
-
-=====================
-PARAMETER DESCRIPTION
-=====================
-
-Parameters are described using structures defined in linux/fs_parser.h.
-There's a core description struct that links everything together:
-
-	struct fs_parameter_description {
-		const struct fs_parameter_spec *specs;
-		const struct fs_parameter_enum *enums;
-	};
-
-For example:
-
-	enum {
-		Opt_autocell,
-		Opt_bar,
-		Opt_dyn,
-		Opt_foo,
-		Opt_source,
-	};
-
-	static const struct fs_parameter_description afs_fs_parameters = {
-		.specs		= afs_param_specs,
-		.enums		= afs_param_enums,
-	};
-
-The members are as follows:
-
- (1) const struct fs_parameter_specification *specs;
-
-     Table of parameter specifications, terminated with a null entry, where the
-     entries are of type:
-
-	struct fs_parameter_spec {
-		const char		*name;
-		u8			opt;
-		enum fs_parameter_type	type:8;
-		unsigned short		flags;
-	};
-
-     The 'name' field is a string to match exactly to the parameter key (no
-     wildcards, patterns and no case-independence) and 'opt' is the value that
-     will be returned by the fs_parser() function in the case of a successful
-     match.
-
-     The 'type' field indicates the desired value type and must be one of:
-
-	TYPE NAME		EXPECTED VALUE		RESULT IN
-	=======================	=======================	=====================
-	fs_param_is_flag	No value		n/a
-	fs_param_is_bool	Boolean value		result->boolean
-	fs_param_is_u32		32-bit unsigned int	result->uint_32
-	fs_param_is_u32_octal	32-bit octal int	result->uint_32
-	fs_param_is_u32_hex	32-bit hex int		result->uint_32
-	fs_param_is_s32		32-bit signed int	result->int_32
-	fs_param_is_u64		64-bit unsigned int	result->uint_64
-	fs_param_is_enum	Enum value name 	result->uint_32
-	fs_param_is_string	Arbitrary string	param->string
-	fs_param_is_blob	Binary blob		param->blob
-	fs_param_is_blockdev	Blockdev path		* Needs lookup
-	fs_param_is_path	Path			* Needs lookup
-	fs_param_is_fd		File descriptor		result->int_32
-
-     Note that if the value is of fs_param_is_bool type, fs_parse() will try
-     to match any string value against "0", "1", "no", "yes", "false", "true".
-
-     Each parameter can also be qualified with 'flags':
-
-	fs_param_v_optional	The value is optional
-	fs_param_neg_with_no	result->negated set if key is prefixed with "no"
-	fs_param_neg_with_empty	result->negated set if value is ""
-	fs_param_deprecated	The parameter is deprecated.
-
-     These are wrapped with a number of convenience wrappers:
-
-	MACRO			SPECIFIES
-	=======================	===============================================
-	fsparam_flag()		fs_param_is_flag
-	fsparam_flag_no()	fs_param_is_flag, fs_param_neg_with_no
-	fsparam_bool()		fs_param_is_bool
-	fsparam_u32()		fs_param_is_u32
-	fsparam_u32oct()	fs_param_is_u32_octal
-	fsparam_u32hex()	fs_param_is_u32_hex
-	fsparam_s32()		fs_param_is_s32
-	fsparam_u64()		fs_param_is_u64
-	fsparam_enum()		fs_param_is_enum
-	fsparam_string()	fs_param_is_string
-	fsparam_blob()		fs_param_is_blob
-	fsparam_bdev()		fs_param_is_blockdev
-	fsparam_path()		fs_param_is_path
-	fsparam_fd()		fs_param_is_fd
-
-     all of which take two arguments, name string and option number - for
-     example:
-
-	static const struct fs_parameter_spec afs_param_specs[] = {
-		fsparam_flag	("autocell",	Opt_autocell),
-		fsparam_flag	("dyn",		Opt_dyn),
-		fsparam_string	("source",	Opt_source),
-		fsparam_flag_no	("foo",		Opt_foo),
-		{}
-	};
-
-     An addition macro, __fsparam() is provided that takes an additional pair
-     of arguments to specify the type and the flags for anything that doesn't
-     match one of the above macros.
-
- (2) const struct fs_parameter_enum *enums;
-
-     Table of enum value names to integer mappings, terminated with a null
-     entry.  This is of type:
-
-	struct fs_parameter_enum {
-		u8		opt;
-		char		name[14];
-		u8		value;
-	};
-
-     Where the array is an unsorted list of { parameter ID, name }-keyed
-     elements that indicate the value to map to, e.g.:
-
-	static const struct fs_parameter_enum afs_param_enums[] = {
-		{ Opt_bar,   "x",      1},
-		{ Opt_bar,   "y",      23},
-		{ Opt_bar,   "z",      42},
-	};
-
-     If a parameter of type fs_param_is_enum is encountered, fs_parse() will
-     try to look the value up in the enum table and the result will be stored
-     in the parse result.
-
-The parser should be pointed to by the parser pointer in the file_system_type
-struct as this will provide validation on registration (if
-CONFIG_VALIDATE_FS_PARSER=y) and will allow the description to be queried from
-userspace using the fsinfo() syscall.
-
-
-==========================
-PARAMETER HELPER FUNCTIONS
-==========================
-
-A number of helper functions are provided to help a filesystem or an LSM
-process the parameters it is given.
-
- (*) int lookup_constant(const struct constant_table tbl[],
-			 const char *name, int not_found);
-
-     Look up a constant by name in a table of name -> integer mappings.  The
-     table is an array of elements of the following type:
-
-	struct constant_table {
-		const char	*name;
-		int		value;
-	};
-
-     If a match is found, the corresponding value is returned.  If a match
-     isn't found, the not_found value is returned instead.
-
- (*) bool validate_constant_table(const struct constant_table *tbl,
-				  size_t tbl_size,
-				  int low, int high, int special);
-
-     Validate a constant table.  Checks that all the elements are appropriately
-     ordered, that there are no duplicates and that the values are between low
-     and high inclusive, though provision is made for one allowable special
-     value outside of that range.  If no special value is required, special
-     should just be set to lie inside the low-to-high range.
-
-     If all is good, true is returned.  If the table is invalid, errors are
-     logged to dmesg and false is returned.
-
- (*) bool fs_validate_description(const struct fs_parameter_description *desc);
-
-     This performs some validation checks on a parameter description.  It
-     returns true if the description is good and false if it is not.  It will
-     log errors to dmesg if validation fails.
-
- (*) int fs_parse(struct fs_context *fc,
-		  const struct fs_parameter_description *desc,
-		  struct fs_parameter *param,
-		  struct fs_parse_result *result);
-
-     This is the main interpreter of parameters.  It uses the parameter
-     description to look up a parameter by key name and to convert that to an
-     option number (which it returns).
-
-     If successful, and if the parameter type indicates the result is a
-     boolean, integer or enum type, the value is converted by this function and
-     the result stored in result->{boolean,int_32,uint_32,uint_64}.
-
-     If a match isn't initially made, the key is prefixed with "no" and no
-     value is present then an attempt will be made to look up the key with the
-     prefix removed.  If this matches a parameter for which the type has flag
-     fs_param_neg_with_no set, then a match will be made and result->negated
-     will be set to true.
-
-     If the parameter isn't matched, -ENOPARAM will be returned; if the
-     parameter is matched, but the value is erroneous, -EINVAL will be
-     returned; otherwise the parameter's option number will be returned.
-
- (*) int fs_lookup_param(struct fs_context *fc,
-			 struct fs_parameter *value,
-			 bool want_bdev,
-			 struct path *_path);
-
-     This takes a parameter that carries a string or filename type and attempts
-     to do a path lookup on it.  If the parameter expects a blockdev, a check
-     is made that the inode actually represents one.
-
-     Returns 0 if successful and *_path will be set; returns a negative error
-     code if not.
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index e6c3e4c61dad..5f24fcbfbfb4 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -85,7 +85,7 @@ struct p_log {
  * Superblock creation fills in ->root whereas reconfiguration begins with this
  * already set.
  *
- * See Documentation/filesystems/mount_api.txt
+ * See Documentation/filesystems/mount_api.rst
  */
 struct fs_context {
 	const struct fs_context_operations *ops;
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 988ca0df7824..44d5422c18e4 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -77,7 +77,7 @@
  *	state.  This is called immediately after commit_creds().
  *
  * Security hooks for mount using fs_context.
- *	[See also Documentation/filesystems/mount_api.txt]
+ *	[See also Documentation/filesystems/mount_api.rst]
  *
  * @fs_context_dup:
  *	Allocate and attach a security structure to sc->security.  This pointer
-- 
cgit v1.2.3


From 9b6f151e768682206673de065eaca401e2baaf11 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:10 +0200
Subject: docs: filesystems: convert quota.txt to ReST

- Add a SPDX header;
- Adjust document title;
- Some whitespace fixes and new line breaks;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/10a707377475bb252f454af2b8f58a038527933f.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/index.rst |  1 +
 Documentation/filesystems/quota.rst | 85 +++++++++++++++++++++++++++++++++++++
 Documentation/filesystems/quota.txt | 68 -----------------------------
 MAINTAINERS                         |  2 +-
 4 files changed, 87 insertions(+), 69 deletions(-)
 create mode 100644 Documentation/filesystems/quota.rst
 delete mode 100644 Documentation/filesystems/quota.txt

diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 4532b4d2631f..e2bbccf6d5bc 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -31,6 +31,7 @@ algorithms work.
    locks
    mandatory-locking
    mount_api
+   quota
 
    automount-support
 
diff --git a/Documentation/filesystems/quota.rst b/Documentation/filesystems/quota.rst
new file mode 100644
index 000000000000..a30cdd47c652
--- /dev/null
+++ b/Documentation/filesystems/quota.rst
@@ -0,0 +1,85 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Quota subsystem
+===============
+
+Quota subsystem allows system administrator to set limits on used space and
+number of used inodes (inode is a filesystem structure which is associated with
+each file or directory) for users and/or groups. For both used space and number
+of used inodes there are actually two limits. The first one is called softlimit
+and the second one hardlimit.  A user can never exceed a hardlimit for any
+resource (unless he has CAP_SYS_RESOURCE capability). User is allowed to exceed
+softlimit but only for limited period of time. This period is called "grace
+period" or "grace time". When grace time is over, user is not able to allocate
+more space/inodes until he frees enough of them to get below softlimit.
+
+Quota limits (and amount of grace time) are set independently for each
+filesystem.
+
+For more details about quota design, see the documentation in quota-tools package
+(http://sourceforge.net/projects/linuxquota).
+
+Quota netlink interface
+=======================
+When user exceeds a softlimit, runs out of grace time or reaches hardlimit,
+quota subsystem traditionally printed a message to the controlling terminal of
+the process which caused the excess. This method has the disadvantage that
+when user is using a graphical desktop he usually cannot see the message.
+Thus quota netlink interface has been designed to pass information about
+the above events to userspace. There they can be captured by an application
+and processed accordingly.
+
+The interface uses generic netlink framework (see
+http://lwn.net/Articles/208755/ and http://people.suug.ch/~tgr/libnl/ for more
+details about this layer). The name of the quota generic netlink interface
+is "VFS_DQUOT". Definitions of constants below are in <linux/quota.h>.
+Since the quota netlink protocol is not namespace aware, quota netlink messages
+are sent only in initial network namespace.
+
+Currently, the interface supports only one message type QUOTA_NL_C_WARNING.
+This command is used to send a notification about any of the above mentioned
+events. Each message has six attributes. These are (type of the argument is
+in parentheses):
+
+        QUOTA_NL_A_QTYPE (u32)
+	  - type of quota being exceeded (one of USRQUOTA, GRPQUOTA)
+        QUOTA_NL_A_EXCESS_ID (u64)
+	  - UID/GID (depends on quota type) of user / group whose limit
+	    is being exceeded.
+        QUOTA_NL_A_CAUSED_ID (u64)
+	  - UID of a user who caused the event
+        QUOTA_NL_A_WARNING (u32)
+	  - what kind of limit is exceeded:
+
+		QUOTA_NL_IHARDWARN
+		    inode hardlimit
+		QUOTA_NL_ISOFTLONGWARN
+		    inode softlimit is exceeded longer
+		    than given grace period
+		QUOTA_NL_ISOFTWARN
+		    inode softlimit
+		QUOTA_NL_BHARDWARN
+		    space (block) hardlimit
+		QUOTA_NL_BSOFTLONGWARN
+		    space (block) softlimit is exceeded
+		    longer than given grace period.
+		QUOTA_NL_BSOFTWARN
+		    space (block) softlimit
+
+	  - four warnings are also defined for the event when user stops
+	    exceeding some limit:
+
+		QUOTA_NL_IHARDBELOW
+		    inode hardlimit
+		QUOTA_NL_ISOFTBELOW
+		    inode softlimit
+		QUOTA_NL_BHARDBELOW
+		    space (block) hardlimit
+		QUOTA_NL_BSOFTBELOW
+		    space (block) softlimit
+
+        QUOTA_NL_A_DEV_MAJOR (u32)
+	  - major number of a device with the affected filesystem
+        QUOTA_NL_A_DEV_MINOR (u32)
+	  - minor number of a device with the affected filesystem
diff --git a/Documentation/filesystems/quota.txt b/Documentation/filesystems/quota.txt
deleted file mode 100644
index 32874b06ebe9..000000000000
--- a/Documentation/filesystems/quota.txt
+++ /dev/null
@@ -1,68 +0,0 @@
-
-Quota subsystem
-===============
-
-Quota subsystem allows system administrator to set limits on used space and
-number of used inodes (inode is a filesystem structure which is associated with
-each file or directory) for users and/or groups. For both used space and number
-of used inodes there are actually two limits. The first one is called softlimit
-and the second one hardlimit.  A user can never exceed a hardlimit for any
-resource (unless he has CAP_SYS_RESOURCE capability). User is allowed to exceed
-softlimit but only for limited period of time. This period is called "grace
-period" or "grace time". When grace time is over, user is not able to allocate
-more space/inodes until he frees enough of them to get below softlimit.
-
-Quota limits (and amount of grace time) are set independently for each
-filesystem.
-
-For more details about quota design, see the documentation in quota-tools package
-(http://sourceforge.net/projects/linuxquota).
-
-Quota netlink interface
-=======================
-When user exceeds a softlimit, runs out of grace time or reaches hardlimit,
-quota subsystem traditionally printed a message to the controlling terminal of
-the process which caused the excess. This method has the disadvantage that
-when user is using a graphical desktop he usually cannot see the message.
-Thus quota netlink interface has been designed to pass information about
-the above events to userspace. There they can be captured by an application
-and processed accordingly.
-
-The interface uses generic netlink framework (see
-http://lwn.net/Articles/208755/ and http://people.suug.ch/~tgr/libnl/ for more
-details about this layer). The name of the quota generic netlink interface
-is "VFS_DQUOT". Definitions of constants below are in <linux/quota.h>.
-Since the quota netlink protocol is not namespace aware, quota netlink messages
-are sent only in initial network namespace.
-
-Currently, the interface supports only one message type QUOTA_NL_C_WARNING.
-This command is used to send a notification about any of the above mentioned
-events. Each message has six attributes. These are (type of the argument is
-in parentheses):
-        QUOTA_NL_A_QTYPE (u32)
-	  - type of quota being exceeded (one of USRQUOTA, GRPQUOTA)
-        QUOTA_NL_A_EXCESS_ID (u64)
-	  - UID/GID (depends on quota type) of user / group whose limit
-	    is being exceeded.
-        QUOTA_NL_A_CAUSED_ID (u64)
-	  - UID of a user who caused the event
-        QUOTA_NL_A_WARNING (u32)
-	  - what kind of limit is exceeded:
-		QUOTA_NL_IHARDWARN - inode hardlimit
-		QUOTA_NL_ISOFTLONGWARN - inode softlimit is exceeded longer
-		  than given grace period
-		QUOTA_NL_ISOFTWARN - inode softlimit
-		QUOTA_NL_BHARDWARN - space (block) hardlimit
-		QUOTA_NL_BSOFTLONGWARN - space (block) softlimit is exceeded
-		  longer than given grace period.
-		QUOTA_NL_BSOFTWARN - space (block) softlimit
-	  - four warnings are also defined for the event when user stops
-	    exceeding some limit:
-		QUOTA_NL_IHARDBELOW - inode hardlimit
-		QUOTA_NL_ISOFTBELOW - inode softlimit
-		QUOTA_NL_BHARDBELOW - space (block) hardlimit
-		QUOTA_NL_BSOFTBELOW - space (block) softlimit
-        QUOTA_NL_A_DEV_MAJOR (u32)
-	  - major number of a device with the affected filesystem
-        QUOTA_NL_A_DEV_MINOR (u32)
-	  - minor number of a device with the affected filesystem
diff --git a/MAINTAINERS b/MAINTAINERS
index 46ba68a07a8f..1c3bd61ad03e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5010,7 +5010,7 @@ W:	http://www.win.tue.nl/~aeb/partitions/partition_types-1.html
 DISKQUOTA
 M:	Jan Kara <jack@suse.com>
 S:	Maintained
-F:	Documentation/filesystems/quota.txt
+F:	Documentation/filesystems/quota.rst
 F:	fs/quota/
 F:	include/linux/quota*.h
 F:	include/uapi/linux/quota*.h
-- 
cgit v1.2.3


From 53a41d3eec863f12fead722f774fc36bd697f550 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:11 +0200
Subject: docs: filesystems: convert seq_file.txt to ReST

- Add a SPDX header;
- Add a document title;
- Adjust section titles;
- Some whitespace fixes and new line breaks;
- Mark literal blocks as such;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/f950a0a56178ee05872ae2a2711a04d7af8ebb24.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/index.rst    |   1 +
 Documentation/filesystems/seq_file.rst | 372 +++++++++++++++++++++++++++++++++
 Documentation/filesystems/seq_file.txt | 359 -------------------------------
 3 files changed, 373 insertions(+), 359 deletions(-)
 create mode 100644 Documentation/filesystems/seq_file.rst
 delete mode 100644 Documentation/filesystems/seq_file.txt

diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index e2bbccf6d5bc..3c8c5b7908e0 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -32,6 +32,7 @@ algorithms work.
    mandatory-locking
    mount_api
    quota
+   seq_file
 
    automount-support
 
diff --git a/Documentation/filesystems/seq_file.rst b/Documentation/filesystems/seq_file.rst
new file mode 100644
index 000000000000..fab302046b13
--- /dev/null
+++ b/Documentation/filesystems/seq_file.rst
@@ -0,0 +1,372 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+The seq_file Interface
+======================
+
+	Copyright 2003 Jonathan Corbet <corbet@lwn.net>
+
+	This file is originally from the LWN.net Driver Porting series at
+	http://lwn.net/Articles/driver-porting/
+
+
+There are numerous ways for a device driver (or other kernel component) to
+provide information to the user or system administrator.  One useful
+technique is the creation of virtual files, in debugfs, /proc or elsewhere.
+Virtual files can provide human-readable output that is easy to get at
+without any special utility programs; they can also make life easier for
+script writers. It is not surprising that the use of virtual files has
+grown over the years.
+
+Creating those files correctly has always been a bit of a challenge,
+however. It is not that hard to make a virtual file which returns a
+string. But life gets trickier if the output is long - anything greater
+than an application is likely to read in a single operation.  Handling
+multiple reads (and seeks) requires careful attention to the reader's
+position within the virtual file - that position is, likely as not, in the
+middle of a line of output. The kernel has traditionally had a number of
+implementations that got this wrong.
+
+The 2.6 kernel contains a set of functions (implemented by Alexander Viro)
+which are designed to make it easy for virtual file creators to get it
+right.
+
+The seq_file interface is available via <linux/seq_file.h>. There are
+three aspects to seq_file:
+
+     * An iterator interface which lets a virtual file implementation
+       step through the objects it is presenting.
+
+     * Some utility functions for formatting objects for output without
+       needing to worry about things like output buffers.
+
+     * A set of canned file_operations which implement most operations on
+       the virtual file.
+
+We'll look at the seq_file interface via an extremely simple example: a
+loadable module which creates a file called /proc/sequence. The file, when
+read, simply produces a set of increasing integer values, one per line. The
+sequence will continue until the user loses patience and finds something
+better to do. The file is seekable, in that one can do something like the
+following::
+
+    dd if=/proc/sequence of=out1 count=1
+    dd if=/proc/sequence skip=1 of=out2 count=1
+
+Then concatenate the output files out1 and out2 and get the right
+result. Yes, it is a thoroughly useless module, but the point is to show
+how the mechanism works without getting lost in other details.  (Those
+wanting to see the full source for this module can find it at
+http://lwn.net/Articles/22359/).
+
+Deprecated create_proc_entry
+============================
+
+Note that the above article uses create_proc_entry which was removed in
+kernel 3.10. Current versions require the following update::
+
+    -	entry = create_proc_entry("sequence", 0, NULL);
+    -	if (entry)
+    -		entry->proc_fops = &ct_file_ops;
+    +	entry = proc_create("sequence", 0, NULL, &ct_file_ops);
+
+The iterator interface
+======================
+
+Modules implementing a virtual file with seq_file must implement an
+iterator object that allows stepping through the data of interest
+during a "session" (roughly one read() system call).  If the iterator
+is able to move to a specific position - like the file they implement,
+though with freedom to map the position number to a sequence location
+in whatever way is convenient - the iterator need only exist
+transiently during a session.  If the iterator cannot easily find a
+numerical position but works well with a first/next interface, the
+iterator can be stored in the private data area and continue from one
+session to the next.
+
+A seq_file implementation that is formatting firewall rules from a
+table, for example, could provide a simple iterator that interprets
+position N as the Nth rule in the chain.  A seq_file implementation
+that presents the content of a, potentially volatile, linked list
+might record a pointer into that list, providing that can be done
+without risk of the current location being removed.
+
+Positioning can thus be done in whatever way makes the most sense for
+the generator of the data, which need not be aware of how a position
+translates to an offset in the virtual file. The one obvious exception
+is that a position of zero should indicate the beginning of the file.
+
+The /proc/sequence iterator just uses the count of the next number it
+will output as its position.
+
+Four functions must be implemented to make the iterator work. The
+first, called start(), starts a session and takes a position as an
+argument, returning an iterator which will start reading at that
+position.  The pos passed to start() will always be either zero, or
+the most recent pos used in the previous session.
+
+For our simple sequence example,
+the start() function looks like::
+
+	static void *ct_seq_start(struct seq_file *s, loff_t *pos)
+	{
+	        loff_t *spos = kmalloc(sizeof(loff_t), GFP_KERNEL);
+	        if (! spos)
+	                return NULL;
+	        *spos = *pos;
+	        return spos;
+	}
+
+The entire data structure for this iterator is a single loff_t value
+holding the current position. There is no upper bound for the sequence
+iterator, but that will not be the case for most other seq_file
+implementations; in most cases the start() function should check for a
+"past end of file" condition and return NULL if need be.
+
+For more complicated applications, the private field of the seq_file
+structure can be used to hold state from session to session.  There is
+also a special value which can be returned by the start() function
+called SEQ_START_TOKEN; it can be used if you wish to instruct your
+show() function (described below) to print a header at the top of the
+output. SEQ_START_TOKEN should only be used if the offset is zero,
+however.
+
+The next function to implement is called, amazingly, next(); its job is to
+move the iterator forward to the next position in the sequence.  The
+example module can simply increment the position by one; more useful
+modules will do what is needed to step through some data structure. The
+next() function returns a new iterator, or NULL if the sequence is
+complete. Here's the example version::
+
+	static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
+	{
+	        loff_t *spos = v;
+	        *pos = ++*spos;
+	        return spos;
+	}
+
+The stop() function closes a session; its job, of course, is to clean
+up. If dynamic memory is allocated for the iterator, stop() is the
+place to free it; if a lock was taken by start(), stop() must release
+that lock.  The value that ``*pos`` was set to by the last next() call
+before stop() is remembered, and used for the first start() call of
+the next session unless lseek() has been called on the file; in that
+case next start() will be asked to start at position zero::
+
+	static void ct_seq_stop(struct seq_file *s, void *v)
+	{
+	        kfree(v);
+	}
+
+Finally, the show() function should format the object currently pointed to
+by the iterator for output.  The example module's show() function is::
+
+	static int ct_seq_show(struct seq_file *s, void *v)
+	{
+	        loff_t *spos = v;
+	        seq_printf(s, "%lld\n", (long long)*spos);
+	        return 0;
+	}
+
+If all is well, the show() function should return zero.  A negative error
+code in the usual manner indicates that something went wrong; it will be
+passed back to user space.  This function can also return SEQ_SKIP, which
+causes the current item to be skipped; if the show() function has already
+generated output before returning SEQ_SKIP, that output will be dropped.
+
+We will look at seq_printf() in a moment. But first, the definition of the
+seq_file iterator is finished by creating a seq_operations structure with
+the four functions we have just defined::
+
+	static const struct seq_operations ct_seq_ops = {
+	        .start = ct_seq_start,
+	        .next  = ct_seq_next,
+	        .stop  = ct_seq_stop,
+	        .show  = ct_seq_show
+	};
+
+This structure will be needed to tie our iterator to the /proc file in
+a little bit.
+
+It's worth noting that the iterator value returned by start() and
+manipulated by the other functions is considered to be completely opaque by
+the seq_file code. It can thus be anything that is useful in stepping
+through the data to be output. Counters can be useful, but it could also be
+a direct pointer into an array or linked list. Anything goes, as long as
+the programmer is aware that things can happen between calls to the
+iterator function. However, the seq_file code (by design) will not sleep
+between the calls to start() and stop(), so holding a lock during that time
+is a reasonable thing to do. The seq_file code will also avoid taking any
+other locks while the iterator is active.
+
+
+Formatted output
+================
+
+The seq_file code manages positioning within the output created by the
+iterator and getting it into the user's buffer. But, for that to work, that
+output must be passed to the seq_file code. Some utility functions have
+been defined which make this task easy.
+
+Most code will simply use seq_printf(), which works pretty much like
+printk(), but which requires the seq_file pointer as an argument.
+
+For straight character output, the following functions may be used::
+
+	seq_putc(struct seq_file *m, char c);
+	seq_puts(struct seq_file *m, const char *s);
+	seq_escape(struct seq_file *m, const char *s, const char *esc);
+
+The first two output a single character and a string, just like one would
+expect. seq_escape() is like seq_puts(), except that any character in s
+which is in the string esc will be represented in octal form in the output.
+
+There are also a pair of functions for printing filenames::
+
+	int seq_path(struct seq_file *m, const struct path *path,
+		     const char *esc);
+	int seq_path_root(struct seq_file *m, const struct path *path,
+			  const struct path *root, const char *esc)
+
+Here, path indicates the file of interest, and esc is a set of characters
+which should be escaped in the output.  A call to seq_path() will output
+the path relative to the current process's filesystem root.  If a different
+root is desired, it can be used with seq_path_root().  If it turns out that
+path cannot be reached from root, seq_path_root() returns SEQ_SKIP.
+
+A function producing complicated output may want to check::
+
+	bool seq_has_overflowed(struct seq_file *m);
+
+and avoid further seq_<output> calls if true is returned.
+
+A true return from seq_has_overflowed means that the seq_file buffer will
+be discarded and the seq_show function will attempt to allocate a larger
+buffer and retry printing.
+
+
+Making it all work
+==================
+
+So far, we have a nice set of functions which can produce output within the
+seq_file system, but we have not yet turned them into a file that a user
+can see. Creating a file within the kernel requires, of course, the
+creation of a set of file_operations which implement the operations on that
+file. The seq_file interface provides a set of canned operations which do
+most of the work. The virtual file author still must implement the open()
+method, however, to hook everything up. The open function is often a single
+line, as in the example module::
+
+	static int ct_open(struct inode *inode, struct file *file)
+	{
+		return seq_open(file, &ct_seq_ops);
+	}
+
+Here, the call to seq_open() takes the seq_operations structure we created
+before, and gets set up to iterate through the virtual file.
+
+On a successful open, seq_open() stores the struct seq_file pointer in
+file->private_data. If you have an application where the same iterator can
+be used for more than one file, you can store an arbitrary pointer in the
+private field of the seq_file structure; that value can then be retrieved
+by the iterator functions.
+
+There is also a wrapper function to seq_open() called seq_open_private(). It
+kmallocs a zero filled block of memory and stores a pointer to it in the
+private field of the seq_file structure, returning 0 on success. The
+block size is specified in a third parameter to the function, e.g.::
+
+	static int ct_open(struct inode *inode, struct file *file)
+	{
+		return seq_open_private(file, &ct_seq_ops,
+					sizeof(struct mystruct));
+	}
+
+There is also a variant function, __seq_open_private(), which is functionally
+identical except that, if successful, it returns the pointer to the allocated
+memory block, allowing further initialisation e.g.::
+
+	static int ct_open(struct inode *inode, struct file *file)
+	{
+		struct mystruct *p =
+			__seq_open_private(file, &ct_seq_ops, sizeof(*p));
+
+		if (!p)
+			return -ENOMEM;
+
+		p->foo = bar; /* initialize my stuff */
+			...
+		p->baz = true;
+
+		return 0;
+	}
+
+A corresponding close function, seq_release_private() is available which
+frees the memory allocated in the corresponding open.
+
+The other operations of interest - read(), llseek(), and release() - are
+all implemented by the seq_file code itself. So a virtual file's
+file_operations structure will look like::
+
+	static const struct file_operations ct_file_ops = {
+	        .owner   = THIS_MODULE,
+	        .open    = ct_open,
+	        .read    = seq_read,
+	        .llseek  = seq_lseek,
+	        .release = seq_release
+	};
+
+There is also a seq_release_private() which passes the contents of the
+seq_file private field to kfree() before releasing the structure.
+
+The final step is the creation of the /proc file itself. In the example
+code, that is done in the initialization code in the usual way::
+
+	static int ct_init(void)
+	{
+	        struct proc_dir_entry *entry;
+
+	        proc_create("sequence", 0, NULL, &ct_file_ops);
+	        return 0;
+	}
+
+	module_init(ct_init);
+
+And that is pretty much it.
+
+
+seq_list
+========
+
+If your file will be iterating through a linked list, you may find these
+routines useful::
+
+	struct list_head *seq_list_start(struct list_head *head,
+	       		 		 loff_t pos);
+	struct list_head *seq_list_start_head(struct list_head *head,
+			 		      loff_t pos);
+	struct list_head *seq_list_next(void *v, struct list_head *head,
+					loff_t *ppos);
+
+These helpers will interpret pos as a position within the list and iterate
+accordingly.  Your start() and next() functions need only invoke the
+``seq_list_*`` helpers with a pointer to the appropriate list_head structure.
+
+
+The extra-simple version
+========================
+
+For extremely simple virtual files, there is an even easier interface.  A
+module can define only the show() function, which should create all the
+output that the virtual file will contain. The file's open() method then
+calls::
+
+	int single_open(struct file *file,
+	                int (*show)(struct seq_file *m, void *p),
+	                void *data);
+
+When output time comes, the show() function will be called once. The data
+value given to single_open() can be found in the private field of the
+seq_file structure. When using single_open(), the programmer should use
+single_release() instead of seq_release() in the file_operations structure
+to avoid a memory leak.
diff --git a/Documentation/filesystems/seq_file.txt b/Documentation/filesystems/seq_file.txt
deleted file mode 100644
index d412b236a9d6..000000000000
--- a/Documentation/filesystems/seq_file.txt
+++ /dev/null
@@ -1,359 +0,0 @@
-The seq_file interface
-
-	Copyright 2003 Jonathan Corbet <corbet@lwn.net>
-	This file is originally from the LWN.net Driver Porting series at
-	http://lwn.net/Articles/driver-porting/
-
-
-There are numerous ways for a device driver (or other kernel component) to
-provide information to the user or system administrator.  One useful
-technique is the creation of virtual files, in debugfs, /proc or elsewhere.
-Virtual files can provide human-readable output that is easy to get at
-without any special utility programs; they can also make life easier for
-script writers. It is not surprising that the use of virtual files has
-grown over the years.
-
-Creating those files correctly has always been a bit of a challenge,
-however. It is not that hard to make a virtual file which returns a
-string. But life gets trickier if the output is long - anything greater
-than an application is likely to read in a single operation.  Handling
-multiple reads (and seeks) requires careful attention to the reader's
-position within the virtual file - that position is, likely as not, in the
-middle of a line of output. The kernel has traditionally had a number of
-implementations that got this wrong.
-
-The 2.6 kernel contains a set of functions (implemented by Alexander Viro)
-which are designed to make it easy for virtual file creators to get it
-right.
-
-The seq_file interface is available via <linux/seq_file.h>. There are
-three aspects to seq_file:
-
-     * An iterator interface which lets a virtual file implementation
-       step through the objects it is presenting.
-
-     * Some utility functions for formatting objects for output without
-       needing to worry about things like output buffers.
-
-     * A set of canned file_operations which implement most operations on
-       the virtual file.
-
-We'll look at the seq_file interface via an extremely simple example: a
-loadable module which creates a file called /proc/sequence. The file, when
-read, simply produces a set of increasing integer values, one per line. The
-sequence will continue until the user loses patience and finds something
-better to do. The file is seekable, in that one can do something like the
-following:
-
-    dd if=/proc/sequence of=out1 count=1
-    dd if=/proc/sequence skip=1 of=out2 count=1
-
-Then concatenate the output files out1 and out2 and get the right
-result. Yes, it is a thoroughly useless module, but the point is to show
-how the mechanism works without getting lost in other details.  (Those
-wanting to see the full source for this module can find it at
-http://lwn.net/Articles/22359/).
-
-Deprecated create_proc_entry
-
-Note that the above article uses create_proc_entry which was removed in
-kernel 3.10. Current versions require the following update
-
--	entry = create_proc_entry("sequence", 0, NULL);
--	if (entry)
--		entry->proc_fops = &ct_file_ops;
-+	entry = proc_create("sequence", 0, NULL, &ct_file_ops);
-
-The iterator interface
-
-Modules implementing a virtual file with seq_file must implement an
-iterator object that allows stepping through the data of interest
-during a "session" (roughly one read() system call).  If the iterator
-is able to move to a specific position - like the file they implement,
-though with freedom to map the position number to a sequence location
-in whatever way is convenient - the iterator need only exist
-transiently during a session.  If the iterator cannot easily find a
-numerical position but works well with a first/next interface, the
-iterator can be stored in the private data area and continue from one
-session to the next.
-
-A seq_file implementation that is formatting firewall rules from a
-table, for example, could provide a simple iterator that interprets
-position N as the Nth rule in the chain.  A seq_file implementation
-that presents the content of a, potentially volatile, linked list
-might record a pointer into that list, providing that can be done
-without risk of the current location being removed.
-
-Positioning can thus be done in whatever way makes the most sense for
-the generator of the data, which need not be aware of how a position
-translates to an offset in the virtual file. The one obvious exception
-is that a position of zero should indicate the beginning of the file.
-
-The /proc/sequence iterator just uses the count of the next number it
-will output as its position.
-
-Four functions must be implemented to make the iterator work. The
-first, called start(), starts a session and takes a position as an
-argument, returning an iterator which will start reading at that
-position.  The pos passed to start() will always be either zero, or
-the most recent pos used in the previous session.
-
-For our simple sequence example,
-the start() function looks like:
-
-	static void *ct_seq_start(struct seq_file *s, loff_t *pos)
-	{
-	        loff_t *spos = kmalloc(sizeof(loff_t), GFP_KERNEL);
-	        if (! spos)
-	                return NULL;
-	        *spos = *pos;
-	        return spos;
-	}
-
-The entire data structure for this iterator is a single loff_t value
-holding the current position. There is no upper bound for the sequence
-iterator, but that will not be the case for most other seq_file
-implementations; in most cases the start() function should check for a
-"past end of file" condition and return NULL if need be.
-
-For more complicated applications, the private field of the seq_file
-structure can be used to hold state from session to session.  There is
-also a special value which can be returned by the start() function
-called SEQ_START_TOKEN; it can be used if you wish to instruct your
-show() function (described below) to print a header at the top of the
-output. SEQ_START_TOKEN should only be used if the offset is zero,
-however.
-
-The next function to implement is called, amazingly, next(); its job is to
-move the iterator forward to the next position in the sequence.  The
-example module can simply increment the position by one; more useful
-modules will do what is needed to step through some data structure. The
-next() function returns a new iterator, or NULL if the sequence is
-complete. Here's the example version:
-
-	static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
-	{
-	        loff_t *spos = v;
-	        *pos = ++*spos;
-	        return spos;
-	}
-
-The stop() function closes a session; its job, of course, is to clean
-up. If dynamic memory is allocated for the iterator, stop() is the
-place to free it; if a lock was taken by start(), stop() must release
-that lock.  The value that *pos was set to by the last next() call
-before stop() is remembered, and used for the first start() call of
-the next session unless lseek() has been called on the file; in that
-case next start() will be asked to start at position zero.
-
-	static void ct_seq_stop(struct seq_file *s, void *v)
-	{
-	        kfree(v);
-	}
-
-Finally, the show() function should format the object currently pointed to
-by the iterator for output.  The example module's show() function is:
-
-	static int ct_seq_show(struct seq_file *s, void *v)
-	{
-	        loff_t *spos = v;
-	        seq_printf(s, "%lld\n", (long long)*spos);
-	        return 0;
-	}
-
-If all is well, the show() function should return zero.  A negative error
-code in the usual manner indicates that something went wrong; it will be
-passed back to user space.  This function can also return SEQ_SKIP, which
-causes the current item to be skipped; if the show() function has already
-generated output before returning SEQ_SKIP, that output will be dropped.
-
-We will look at seq_printf() in a moment. But first, the definition of the
-seq_file iterator is finished by creating a seq_operations structure with
-the four functions we have just defined:
-
-	static const struct seq_operations ct_seq_ops = {
-	        .start = ct_seq_start,
-	        .next  = ct_seq_next,
-	        .stop  = ct_seq_stop,
-	        .show  = ct_seq_show
-	};
-
-This structure will be needed to tie our iterator to the /proc file in
-a little bit.
-
-It's worth noting that the iterator value returned by start() and
-manipulated by the other functions is considered to be completely opaque by
-the seq_file code. It can thus be anything that is useful in stepping
-through the data to be output. Counters can be useful, but it could also be
-a direct pointer into an array or linked list. Anything goes, as long as
-the programmer is aware that things can happen between calls to the
-iterator function. However, the seq_file code (by design) will not sleep
-between the calls to start() and stop(), so holding a lock during that time
-is a reasonable thing to do. The seq_file code will also avoid taking any
-other locks while the iterator is active.
-
-
-Formatted output
-
-The seq_file code manages positioning within the output created by the
-iterator and getting it into the user's buffer. But, for that to work, that
-output must be passed to the seq_file code. Some utility functions have
-been defined which make this task easy.
-
-Most code will simply use seq_printf(), which works pretty much like
-printk(), but which requires the seq_file pointer as an argument.
-
-For straight character output, the following functions may be used:
-
-	seq_putc(struct seq_file *m, char c);
-	seq_puts(struct seq_file *m, const char *s);
-	seq_escape(struct seq_file *m, const char *s, const char *esc);
-
-The first two output a single character and a string, just like one would
-expect. seq_escape() is like seq_puts(), except that any character in s
-which is in the string esc will be represented in octal form in the output.
-
-There are also a pair of functions for printing filenames:
-
-	int seq_path(struct seq_file *m, const struct path *path,
-		     const char *esc);
-	int seq_path_root(struct seq_file *m, const struct path *path,
-			  const struct path *root, const char *esc)
-
-Here, path indicates the file of interest, and esc is a set of characters
-which should be escaped in the output.  A call to seq_path() will output
-the path relative to the current process's filesystem root.  If a different
-root is desired, it can be used with seq_path_root().  If it turns out that
-path cannot be reached from root, seq_path_root() returns SEQ_SKIP.
-
-A function producing complicated output may want to check
-	bool seq_has_overflowed(struct seq_file *m);
-and avoid further seq_<output> calls if true is returned.
-
-A true return from seq_has_overflowed means that the seq_file buffer will
-be discarded and the seq_show function will attempt to allocate a larger
-buffer and retry printing.
-
-
-Making it all work
-
-So far, we have a nice set of functions which can produce output within the
-seq_file system, but we have not yet turned them into a file that a user
-can see. Creating a file within the kernel requires, of course, the
-creation of a set of file_operations which implement the operations on that
-file. The seq_file interface provides a set of canned operations which do
-most of the work. The virtual file author still must implement the open()
-method, however, to hook everything up. The open function is often a single
-line, as in the example module:
-
-	static int ct_open(struct inode *inode, struct file *file)
-	{
-		return seq_open(file, &ct_seq_ops);
-	}
-
-Here, the call to seq_open() takes the seq_operations structure we created
-before, and gets set up to iterate through the virtual file.
-
-On a successful open, seq_open() stores the struct seq_file pointer in
-file->private_data. If you have an application where the same iterator can
-be used for more than one file, you can store an arbitrary pointer in the
-private field of the seq_file structure; that value can then be retrieved
-by the iterator functions.
-
-There is also a wrapper function to seq_open() called seq_open_private(). It
-kmallocs a zero filled block of memory and stores a pointer to it in the
-private field of the seq_file structure, returning 0 on success. The
-block size is specified in a third parameter to the function, e.g.:
-
-	static int ct_open(struct inode *inode, struct file *file)
-	{
-		return seq_open_private(file, &ct_seq_ops,
-					sizeof(struct mystruct));
-	}
-
-There is also a variant function, __seq_open_private(), which is functionally
-identical except that, if successful, it returns the pointer to the allocated
-memory block, allowing further initialisation e.g.:
-
-	static int ct_open(struct inode *inode, struct file *file)
-	{
-		struct mystruct *p =
-			__seq_open_private(file, &ct_seq_ops, sizeof(*p));
-
-		if (!p)
-			return -ENOMEM;
-
-		p->foo = bar; /* initialize my stuff */
-			...
-		p->baz = true;
-
-		return 0;
-	}
-
-A corresponding close function, seq_release_private() is available which
-frees the memory allocated in the corresponding open.
-
-The other operations of interest - read(), llseek(), and release() - are
-all implemented by the seq_file code itself. So a virtual file's
-file_operations structure will look like:
-
-	static const struct file_operations ct_file_ops = {
-	        .owner   = THIS_MODULE,
-	        .open    = ct_open,
-	        .read    = seq_read,
-	        .llseek  = seq_lseek,
-	        .release = seq_release
-	};
-
-There is also a seq_release_private() which passes the contents of the
-seq_file private field to kfree() before releasing the structure.
-
-The final step is the creation of the /proc file itself. In the example
-code, that is done in the initialization code in the usual way:
-
-	static int ct_init(void)
-	{
-	        struct proc_dir_entry *entry;
-
-	        proc_create("sequence", 0, NULL, &ct_file_ops);
-	        return 0;
-	}
-
-	module_init(ct_init);
-
-And that is pretty much it.
-
-
-seq_list
-
-If your file will be iterating through a linked list, you may find these
-routines useful:
-
-	struct list_head *seq_list_start(struct list_head *head,
-	       		 		 loff_t pos);
-	struct list_head *seq_list_start_head(struct list_head *head,
-			 		      loff_t pos);
-	struct list_head *seq_list_next(void *v, struct list_head *head,
-					loff_t *ppos);
-
-These helpers will interpret pos as a position within the list and iterate
-accordingly.  Your start() and next() functions need only invoke the
-seq_list_* helpers with a pointer to the appropriate list_head structure.
-
-
-The extra-simple version
-
-For extremely simple virtual files, there is an even easier interface.  A
-module can define only the show() function, which should create all the
-output that the virtual file will contain. The file's open() method then
-calls:
-
-	int single_open(struct file *file,
-	                int (*show)(struct seq_file *m, void *p),
-	                void *data);
-
-When output time comes, the show() function will be called once. The data
-value given to single_open() can be found in the private field of the
-seq_file structure. When using single_open(), the programmer should use
-single_release() instead of seq_release() in the file_operations structure
-to avoid a memory leak.
-- 
cgit v1.2.3


From cf06612c65e5dc86f42cdad0b9fc24a90fe18a07 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:12 +0200
Subject: docs: filesystems: convert sharedsubtree.txt to ReST

- Add a SPDX header;
- Adjust document and section titles;
- Some whitespace fixes and new line breaks;
- Mark literal blocks as such;
- Add table markups;
- Add it to filesystems/index.rst

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/6692b8abc177130e9e53aace94117a2ad076cab5.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/index.rst         |   1 +
 Documentation/filesystems/proc.rst          |   2 +-
 Documentation/filesystems/sharedsubtree.rst | 995 ++++++++++++++++++++++++++++
 Documentation/filesystems/sharedsubtree.txt | 939 --------------------------
 4 files changed, 997 insertions(+), 940 deletions(-)
 create mode 100644 Documentation/filesystems/sharedsubtree.rst
 delete mode 100644 Documentation/filesystems/sharedsubtree.txt

diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 3c8c5b7908e0..c658ddee40fa 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -33,6 +33,7 @@ algorithms work.
    mount_api
    quota
    seq_file
+   sharedsubtree
 
    automount-support
 
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 38b606991065..a567bcccbb02 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -1870,7 +1870,7 @@ unbindable        mount is unbindable
 
 For more information on mount propagation see:
 
-  Documentation/filesystems/sharedsubtree.txt
+  Documentation/filesystems/sharedsubtree.rst
 
 
 3.6	/proc/<pid>/comm  & /proc/<pid>/task/<tid>/comm
diff --git a/Documentation/filesystems/sharedsubtree.rst b/Documentation/filesystems/sharedsubtree.rst
new file mode 100644
index 000000000000..d83395354250
--- /dev/null
+++ b/Documentation/filesystems/sharedsubtree.rst
@@ -0,0 +1,995 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Shared Subtrees
+===============
+
+.. Contents:
+	1) Overview
+	2) Features
+	3) Setting mount states
+	4) Use-case
+	5) Detailed semantics
+	6) Quiz
+	7) FAQ
+	8) Implementation
+
+
+1) Overview
+-----------
+
+Consider the following situation:
+
+A process wants to clone its own namespace, but still wants to access the CD
+that got mounted recently.  Shared subtree semantics provide the necessary
+mechanism to accomplish the above.
+
+It provides the necessary building blocks for features like per-user-namespace
+and versioned filesystem.
+
+2) Features
+-----------
+
+Shared subtree provides four different flavors of mounts; struct vfsmount to be
+precise
+
+	a. shared mount
+	b. slave mount
+	c. private mount
+	d. unbindable mount
+
+
+2a) A shared mount can be replicated to as many mountpoints and all the
+replicas continue to be exactly same.
+
+	Here is an example:
+
+	Let's say /mnt has a mount that is shared::
+
+	    mount --make-shared /mnt
+
+	Note: mount(8) command now supports the --make-shared flag,
+	so the sample 'smount' program is no longer needed and has been
+	removed.
+
+	::
+
+	    # mount --bind /mnt /tmp
+
+	The above command replicates the mount at /mnt to the mountpoint /tmp
+	and the contents of both the mounts remain identical.
+
+	::
+
+	    #ls /mnt
+	    a b c
+
+	    #ls /tmp
+	    a b c
+
+	Now let's say we mount a device at /tmp/a::
+
+	    # mount /dev/sd0  /tmp/a
+
+	    #ls /tmp/a
+	    t1 t2 t3
+
+	    #ls /mnt/a
+	    t1 t2 t3
+
+	Note that the mount has propagated to the mount at /mnt as well.
+
+	And the same is true even when /dev/sd0 is mounted on /mnt/a. The
+	contents will be visible under /tmp/a too.
+
+
+2b) A slave mount is like a shared mount except that mount and umount events
+	only propagate towards it.
+
+	All slave mounts have a master mount which is a shared.
+
+	Here is an example:
+
+	Let's say /mnt has a mount which is shared.
+	# mount --make-shared /mnt
+
+	Let's bind mount /mnt to /tmp
+	# mount --bind /mnt /tmp
+
+	the new mount at /tmp becomes a shared mount and it is a replica of
+	the mount at /mnt.
+
+	Now let's make the mount at /tmp; a slave of /mnt
+	# mount --make-slave /tmp
+
+	let's mount /dev/sd0 on /mnt/a
+	# mount /dev/sd0 /mnt/a
+
+	#ls /mnt/a
+	t1 t2 t3
+
+	#ls /tmp/a
+	t1 t2 t3
+
+	Note the mount event has propagated to the mount at /tmp
+
+	However let's see what happens if we mount something on the mount at /tmp
+
+	# mount /dev/sd1 /tmp/b
+
+	#ls /tmp/b
+	s1 s2 s3
+
+	#ls /mnt/b
+
+	Note how the mount event has not propagated to the mount at
+	/mnt
+
+
+2c) A private mount does not forward or receive propagation.
+
+	This is the mount we are familiar with. Its the default type.
+
+
+2d) A unbindable mount is a unbindable private mount
+
+	let's say we have a mount at /mnt and we make it unbindable::
+
+	    # mount --make-unbindable /mnt
+
+	 Let's try to bind mount this mount somewhere else::
+
+	    # mount --bind /mnt /tmp
+	    mount: wrong fs type, bad option, bad superblock on /mnt,
+		    or too many mounted file systems
+
+	Binding a unbindable mount is a invalid operation.
+
+
+3) Setting mount states
+
+	The mount command (util-linux package) can be used to set mount
+	states::
+
+	    mount --make-shared mountpoint
+	    mount --make-slave mountpoint
+	    mount --make-private mountpoint
+	    mount --make-unbindable mountpoint
+
+
+4) Use cases
+------------
+
+	A) A process wants to clone its own namespace, but still wants to
+	   access the CD that got mounted recently.
+
+	   Solution:
+
+		The system administrator can make the mount at /cdrom shared::
+
+		    mount --bind /cdrom /cdrom
+		    mount --make-shared /cdrom
+
+		Now any process that clones off a new namespace will have a
+		mount at /cdrom which is a replica of the same mount in the
+		parent namespace.
+
+		So when a CD is inserted and mounted at /cdrom that mount gets
+		propagated to the other mount at /cdrom in all the other clone
+		namespaces.
+
+	B) A process wants its mounts invisible to any other process, but
+	still be able to see the other system mounts.
+
+	   Solution:
+
+		To begin with, the administrator can mark the entire mount tree
+		as shareable::
+
+		    mount --make-rshared /
+
+		A new process can clone off a new namespace. And mark some part
+		of its namespace as slave::
+
+		    mount --make-rslave /myprivatetree
+
+		Hence forth any mounts within the /myprivatetree done by the
+		process will not show up in any other namespace. However mounts
+		done in the parent namespace under /myprivatetree still shows
+		up in the process's namespace.
+
+
+	Apart from the above semantics this feature provides the
+	building blocks to solve the following problems:
+
+	C)  Per-user namespace
+
+		The above semantics allows a way to share mounts across
+		namespaces.  But namespaces are associated with processes. If
+		namespaces are made first class objects with user API to
+		associate/disassociate a namespace with userid, then each user
+		could have his/her own namespace and tailor it to his/her
+		requirements. This needs to be supported in PAM.
+
+	D)  Versioned files
+
+		If the entire mount tree is visible at multiple locations, then
+		an underlying versioning file system can return different
+		versions of the file depending on the path used to access that
+		file.
+
+		An example is::
+
+		    mount --make-shared /
+		    mount --rbind / /view/v1
+		    mount --rbind / /view/v2
+		    mount --rbind / /view/v3
+		    mount --rbind / /view/v4
+
+		and if /usr has a versioning filesystem mounted, then that
+		mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and
+		/view/v4/usr too
+
+		A user can request v3 version of the file /usr/fs/namespace.c
+		by accessing /view/v3/usr/fs/namespace.c . The underlying
+		versioning filesystem can then decipher that v3 version of the
+		filesystem is being requested and return the corresponding
+		inode.
+
+5) Detailed semantics
+---------------------
+	The section below explains the detailed semantics of
+	bind, rbind, move, mount, umount and clone-namespace operations.
+
+	Note: the word 'vfsmount' and the noun 'mount' have been used
+	to mean the same thing, throughout this document.
+
+5a) Mount states
+
+	A given mount can be in one of the following states
+
+	1) shared
+	2) slave
+	3) shared and slave
+	4) private
+	5) unbindable
+
+	A 'propagation event' is defined as event generated on a vfsmount
+	that leads to mount or unmount actions in other vfsmounts.
+
+	A 'peer group' is defined as a group of vfsmounts that propagate
+	events to each other.
+
+	(1) Shared mounts
+
+		A 'shared mount' is defined as a vfsmount that belongs to a
+		'peer group'.
+
+		For example::
+
+			mount --make-shared /mnt
+			mount --bind /mnt /tmp
+
+		The mount at /mnt and that at /tmp are both shared and belong
+		to the same peer group. Anything mounted or unmounted under
+		/mnt or /tmp reflect in all the other mounts of its peer
+		group.
+
+
+	(2) Slave mounts
+
+		A 'slave mount' is defined as a vfsmount that receives
+		propagation events and does not forward propagation events.
+
+		A slave mount as the name implies has a master mount from which
+		mount/unmount events are received. Events do not propagate from
+		the slave mount to the master.  Only a shared mount can be made
+		a slave by executing the following command::
+
+			mount --make-slave mount
+
+		A shared mount that is made as a slave is no more shared unless
+		modified to become shared.
+
+	(3) Shared and Slave
+
+		A vfsmount can be both shared as well as slave.  This state
+		indicates that the mount is a slave of some vfsmount, and
+		has its own peer group too.  This vfsmount receives propagation
+		events from its master vfsmount, and also forwards propagation
+		events to its 'peer group' and to its slave vfsmounts.
+
+		Strictly speaking, the vfsmount is shared having its own
+		peer group, and this peer-group is a slave of some other
+		peer group.
+
+		Only a slave vfsmount can be made as 'shared and slave' by
+		either executing the following command::
+
+			mount --make-shared mount
+
+		or by moving the slave vfsmount under a shared vfsmount.
+
+	(4) Private mount
+
+		A 'private mount' is defined as vfsmount that does not
+		receive or forward any propagation events.
+
+	(5) Unbindable mount
+
+		A 'unbindable mount' is defined as vfsmount that does not
+		receive or forward any propagation events and cannot
+		be bind mounted.
+
+
+   	State diagram:
+
+   	The state diagram below explains the state transition of a mount,
+	in response to various commands::
+
+	    -----------------------------------------------------------------------
+	    |             |make-shared |  make-slave  | make-private |make-unbindab|
+	    --------------|------------|--------------|--------------|-------------|
+	    |shared	  |shared      |*slave/private|   private    | unbindable  |
+	    |             |            |              |              |             |
+	    |-------------|------------|--------------|--------------|-------------|
+	    |slave	  |shared      | **slave      |    private   | unbindable  |
+	    |             |and slave   |              |              |             |
+	    |-------------|------------|--------------|--------------|-------------|
+	    |shared       |shared      | slave        |    private   | unbindable  |
+	    |and slave    |and slave   |              |              |             |
+	    |-------------|------------|--------------|--------------|-------------|
+	    |private      |shared      |  **private   |    private   | unbindable  |
+	    |-------------|------------|--------------|--------------|-------------|
+	    |unbindable   |shared      |**unbindable  |    private   | unbindable  |
+	    ------------------------------------------------------------------------
+
+	    * if the shared mount is the only mount in its peer group, making it
+	    slave, makes it private automatically. Note that there is no master to
+	    which it can be slaved to.
+
+	    ** slaving a non-shared mount has no effect on the mount.
+
+	Apart from the commands listed below, the 'move' operation also changes
+	the state of a mount depending on type of the destination mount. Its
+	explained in section 5d.
+
+5b) Bind semantics
+
+	Consider the following command::
+
+	    mount --bind A/a  B/b
+
+	where 'A' is the source mount, 'a' is the dentry in the mount 'A', 'B'
+	is the destination mount and 'b' is the dentry in the destination mount.
+
+	The outcome depends on the type of mount of 'A' and 'B'. The table
+	below contains quick reference::
+
+	    --------------------------------------------------------------------------
+	    |         BIND MOUNT OPERATION                                           |
+	    |************************************************************************|
+	    |source(A)->| shared      |       private  |       slave    | unbindable |
+	    | dest(B)  |              |                |                |            |
+	    |   |      |              |                |                |            |
+	    |   v      |              |                |                |            |
+	    |************************************************************************|
+	    |  shared  | shared       |     shared     | shared & slave |  invalid   |
+	    |          |              |                |                |            |
+	    |non-shared| shared       |      private   |      slave     |  invalid   |
+	    **************************************************************************
+
+     	Details:
+
+    1. 'A' is a shared mount and 'B' is a shared mount. A new mount 'C'
+	which is clone of 'A', is created. Its root dentry is 'a' . 'C' is
+	mounted on mount 'B' at dentry 'b'. Also new mount 'C1', 'C2', 'C3' ...
+	are created and mounted at the dentry 'b' on all mounts where 'B'
+	propagates to. A new propagation tree containing 'C1',..,'Cn' is
+	created. This propagation tree is identical to the propagation tree of
+	'B'.  And finally the peer-group of 'C' is merged with the peer group
+	of 'A'.
+
+    2. 'A' is a private mount and 'B' is a shared mount. A new mount 'C'
+	which is clone of 'A', is created. Its root dentry is 'a'. 'C' is
+	mounted on mount 'B' at dentry 'b'. Also new mount 'C1', 'C2', 'C3' ...
+	are created and mounted at the dentry 'b' on all mounts where 'B'
+	propagates to. A new propagation tree is set containing all new mounts
+	'C', 'C1', .., 'Cn' with exactly the same configuration as the
+	propagation tree for 'B'.
+
+    3. 'A' is a slave mount of mount 'Z' and 'B' is a shared mount. A new
+	mount 'C' which is clone of 'A', is created. Its root dentry is 'a' .
+	'C' is mounted on mount 'B' at dentry 'b'. Also new mounts 'C1', 'C2',
+	'C3' ... are created and mounted at the dentry 'b' on all mounts where
+	'B' propagates to. A new propagation tree containing the new mounts
+	'C','C1',..  'Cn' is created. This propagation tree is identical to the
+	propagation tree for 'B'. And finally the mount 'C' and its peer group
+	is made the slave of mount 'Z'.  In other words, mount 'C' is in the
+	state 'slave and shared'.
+
+    4. 'A' is a unbindable mount and 'B' is a shared mount. This is a
+	invalid operation.
+
+    5. 'A' is a private mount and 'B' is a non-shared(private or slave or
+	unbindable) mount. A new mount 'C' which is clone of 'A', is created.
+	Its root dentry is 'a'. 'C' is mounted on mount 'B' at dentry 'b'.
+
+    6. 'A' is a shared mount and 'B' is a non-shared mount. A new mount 'C'
+	which is a clone of 'A' is created. Its root dentry is 'a'. 'C' is
+	mounted on mount 'B' at dentry 'b'.  'C' is made a member of the
+	peer-group of 'A'.
+
+    7. 'A' is a slave mount of mount 'Z' and 'B' is a non-shared mount. A
+	new mount 'C' which is a clone of 'A' is created. Its root dentry is
+	'a'.  'C' is mounted on mount 'B' at dentry 'b'. Also 'C' is set as a
+	slave mount of 'Z'. In other words 'A' and 'C' are both slave mounts of
+	'Z'.  All mount/unmount events on 'Z' propagates to 'A' and 'C'. But
+	mount/unmount on 'A' do not propagate anywhere else. Similarly
+	mount/unmount on 'C' do not propagate anywhere else.
+
+    8. 'A' is a unbindable mount and 'B' is a non-shared mount. This is a
+	invalid operation. A unbindable mount cannot be bind mounted.
+
+5c) Rbind semantics
+
+	rbind is same as bind. Bind replicates the specified mount.  Rbind
+	replicates all the mounts in the tree belonging to the specified mount.
+	Rbind mount is bind mount applied to all the mounts in the tree.
+
+	If the source tree that is rbind has some unbindable mounts,
+	then the subtree under the unbindable mount is pruned in the new
+	location.
+
+	eg:
+
+	  let's say we have the following mount tree::
+
+		A
+	      /   \
+	      B   C
+	     / \ / \
+	     D E F G
+
+	  Let's say all the mount except the mount C in the tree are
+	  of a type other than unbindable.
+
+	  If this tree is rbound to say Z
+
+	  We will have the following tree at the new location::
+
+		Z
+		|
+		A'
+	       /
+	      B'		Note how the tree under C is pruned
+	     / \ 		in the new location.
+	    D' E'
+
+
+
+5d) Move semantics
+
+	Consider the following command
+
+	mount --move A  B/b
+
+	where 'A' is the source mount, 'B' is the destination mount and 'b' is
+	the dentry in the destination mount.
+
+	The outcome depends on the type of the mount of 'A' and 'B'. The table
+	below is a quick reference::
+
+	    ---------------------------------------------------------------------------
+	    |         		MOVE MOUNT OPERATION                                 |
+	    |**************************************************************************
+	    | source(A)->| shared      |       private  |       slave    | unbindable |
+	    | dest(B)  |               |                |                |            |
+	    |   |      |               |                |                |            |
+	    |   v      |               |                |                |            |
+	    |**************************************************************************
+	    |  shared  | shared        |     shared     |shared and slave|  invalid   |
+	    |          |               |                |                |            |
+	    |non-shared| shared        |      private   |    slave       | unbindable |
+	    ***************************************************************************
+
+	.. Note:: moving a mount residing under a shared mount is invalid.
+
+      Details follow:
+
+    1. 'A' is a shared mount and 'B' is a shared mount.  The mount 'A' is
+	mounted on mount 'B' at dentry 'b'.  Also new mounts 'A1', 'A2'...'An'
+	are created and mounted at dentry 'b' on all mounts that receive
+	propagation from mount 'B'. A new propagation tree is created in the
+	exact same configuration as that of 'B'. This new propagation tree
+	contains all the new mounts 'A1', 'A2'...  'An'.  And this new
+	propagation tree is appended to the already existing propagation tree
+	of 'A'.
+
+    2. 'A' is a private mount and 'B' is a shared mount. The mount 'A' is
+	mounted on mount 'B' at dentry 'b'. Also new mount 'A1', 'A2'... 'An'
+	are created and mounted at dentry 'b' on all mounts that receive
+	propagation from mount 'B'. The mount 'A' becomes a shared mount and a
+	propagation tree is created which is identical to that of
+	'B'. This new propagation tree contains all the new mounts 'A1',
+	'A2'...  'An'.
+
+    3. 'A' is a slave mount of mount 'Z' and 'B' is a shared mount.  The
+	mount 'A' is mounted on mount 'B' at dentry 'b'.  Also new mounts 'A1',
+	'A2'... 'An' are created and mounted at dentry 'b' on all mounts that
+	receive propagation from mount 'B'. A new propagation tree is created
+	in the exact same configuration as that of 'B'. This new propagation
+	tree contains all the new mounts 'A1', 'A2'...  'An'.  And this new
+	propagation tree is appended to the already existing propagation tree of
+	'A'.  Mount 'A' continues to be the slave mount of 'Z' but it also
+	becomes 'shared'.
+
+    4. 'A' is a unbindable mount and 'B' is a shared mount. The operation
+	is invalid. Because mounting anything on the shared mount 'B' can
+	create new mounts that get mounted on the mounts that receive
+	propagation from 'B'.  And since the mount 'A' is unbindable, cloning
+	it to mount at other mountpoints is not possible.
+
+    5. 'A' is a private mount and 'B' is a non-shared(private or slave or
+	unbindable) mount. The mount 'A' is mounted on mount 'B' at dentry 'b'.
+
+    6. 'A' is a shared mount and 'B' is a non-shared mount.  The mount 'A'
+	is mounted on mount 'B' at dentry 'b'.  Mount 'A' continues to be a
+	shared mount.
+
+    7. 'A' is a slave mount of mount 'Z' and 'B' is a non-shared mount.
+	The mount 'A' is mounted on mount 'B' at dentry 'b'.  Mount 'A'
+	continues to be a slave mount of mount 'Z'.
+
+    8. 'A' is a unbindable mount and 'B' is a non-shared mount. The mount
+	'A' is mounted on mount 'B' at dentry 'b'. Mount 'A' continues to be a
+	unbindable mount.
+
+5e) Mount semantics
+
+	Consider the following command::
+
+	    mount device  B/b
+
+	'B' is the destination mount and 'b' is the dentry in the destination
+	mount.
+
+	The above operation is the same as bind operation with the exception
+	that the source mount is always a private mount.
+
+
+5f) Unmount semantics
+
+	Consider the following command::
+
+	    umount A
+
+	where 'A' is a mount mounted on mount 'B' at dentry 'b'.
+
+	If mount 'B' is shared, then all most-recently-mounted mounts at dentry
+	'b' on mounts that receive propagation from mount 'B' and does not have
+	sub-mounts within them are unmounted.
+
+	Example: Let's say 'B1', 'B2', 'B3' are shared mounts that propagate to
+	each other.
+
+	let's say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount
+	'B1', 'B2' and 'B3' respectively.
+
+	let's say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on
+	mount 'B1', 'B2' and 'B3' respectively.
+
+	if 'C1' is unmounted, all the mounts that are most-recently-mounted on
+	'B1' and on the mounts that 'B1' propagates-to are unmounted.
+
+	'B1' propagates to 'B2' and 'B3'. And the most recently mounted mount
+	on 'B2' at dentry 'b' is 'C2', and that of mount 'B3' is 'C3'.
+
+	So all 'C1', 'C2' and 'C3' should be unmounted.
+
+	If any of 'C2' or 'C3' has some child mounts, then that mount is not
+	unmounted, but all other mounts are unmounted. However if 'C1' is told
+	to be unmounted and 'C1' has some sub-mounts, the umount operation is
+	failed entirely.
+
+5g) Clone Namespace
+
+	A cloned namespace contains all the mounts as that of the parent
+	namespace.
+
+	Let's say 'A' and 'B' are the corresponding mounts in the parent and the
+	child namespace.
+
+	If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to
+	each other.
+
+	If 'A' is a slave mount of 'Z', then 'B' is also the slave mount of
+	'Z'.
+
+	If 'A' is a private mount, then 'B' is a private mount too.
+
+	If 'A' is unbindable mount, then 'B' is a unbindable mount too.
+
+
+6) Quiz
+
+	A. What is the result of the following command sequence?
+
+		::
+
+		    mount --bind /mnt /mnt
+		    mount --make-shared /mnt
+		    mount --bind /mnt /tmp
+		    mount --move /tmp /mnt/1
+
+		what should be the contents of /mnt /mnt/1 /mnt/1/1 should be?
+		Should they all be identical? or should /mnt and /mnt/1 be
+		identical only?
+
+
+	B. What is the result of the following command sequence?
+
+		::
+
+		    mount --make-rshared /
+		    mkdir -p /v/1
+		    mount --rbind / /v/1
+
+		what should be the content of /v/1/v/1 be?
+
+
+	C. What is the result of the following command sequence?
+
+		::
+
+		    mount --bind /mnt /mnt
+		    mount --make-shared /mnt
+		    mkdir -p /mnt/1/2/3 /mnt/1/test
+		    mount --bind /mnt/1 /tmp
+		    mount --make-slave /mnt
+		    mount --make-shared /mnt
+		    mount --bind /mnt/1/2 /tmp1
+		    mount --make-slave /mnt
+
+		At this point we have the first mount at /tmp and
+		its root dentry is 1. Let's call this mount 'A'
+		And then we have a second mount at /tmp1 with root
+		dentry 2. Let's call this mount 'B'
+		Next we have a third mount at /mnt with root dentry
+		mnt. Let's call this mount 'C'
+
+		'B' is the slave of 'A' and 'C' is a slave of 'B'
+		A -> B -> C
+
+		at this point if we execute the following command
+
+		mount --bind /bin /tmp/test
+
+		The mount is attempted on 'A'
+
+		will the mount propagate to 'B' and 'C' ?
+
+		what would be the contents of
+		/mnt/1/test be?
+
+7) FAQ
+
+	Q1. Why is bind mount needed? How is it different from symbolic links?
+		symbolic links can get stale if the destination mount gets
+		unmounted or moved. Bind mounts continue to exist even if the
+		other mount is unmounted or moved.
+
+	Q2. Why can't the shared subtree be implemented using exportfs?
+
+		exportfs is a heavyweight way of accomplishing part of what
+		shared subtree can do. I cannot imagine a way to implement the
+		semantics of slave mount using exportfs?
+
+	Q3 Why is unbindable mount needed?
+
+		Let's say we want to replicate the mount tree at multiple
+		locations within the same subtree.
+
+		if one rbind mounts a tree within the same subtree 'n' times
+		the number of mounts created is an exponential function of 'n'.
+		Having unbindable mount can help prune the unneeded bind
+		mounts. Here is an example.
+
+		step 1:
+		   let's say the root tree has just two directories with
+		   one vfsmount::
+
+				    root
+				   /    \
+				  tmp    usr
+
+		    And we want to replicate the tree at multiple
+		    mountpoints under /root/tmp
+
+		step 2:
+		      ::
+
+
+			mount --make-shared /root
+
+			mkdir -p /tmp/m1
+
+			mount --rbind /root /tmp/m1
+
+		      the new tree now looks like this::
+
+				    root
+				   /    \
+				 tmp    usr
+				/
+			       m1
+			      /  \
+			     tmp  usr
+			     /
+			    m1
+
+			  it has two vfsmounts
+
+		step 3:
+		    ::
+
+			    mkdir -p /tmp/m2
+			    mount --rbind /root /tmp/m2
+
+			the new tree now looks like this::
+
+				      root
+				     /    \
+				   tmp     usr
+				  /    \
+				m1       m2
+			       / \       /  \
+			     tmp  usr   tmp  usr
+			     / \          /
+			    m1  m2      m1
+				/ \     /  \
+			      tmp usr  tmp   usr
+			      /        / \
+			     m1       m1  m2
+			    /  \
+			  tmp   usr
+			  /  \
+			 m1   m2
+
+		       it has 6 vfsmounts
+
+		step 4:
+		      ::
+			  mkdir -p /tmp/m3
+			  mount --rbind /root /tmp/m3
+
+			  I won't draw the tree..but it has 24 vfsmounts
+
+
+		at step i the number of vfsmounts is V[i] = i*V[i-1].
+		This is an exponential function. And this tree has way more
+		mounts than what we really needed in the first place.
+
+		One could use a series of umount at each step to prune
+		out the unneeded mounts. But there is a better solution.
+		Unclonable mounts come in handy here.
+
+		step 1:
+		   let's say the root tree has just two directories with
+		   one vfsmount::
+
+				    root
+				   /    \
+				  tmp    usr
+
+		    How do we set up the same tree at multiple locations under
+		    /root/tmp
+
+		step 2:
+		      ::
+
+
+			mount --bind /root/tmp /root/tmp
+
+			mount --make-rshared /root
+			mount --make-unbindable /root/tmp
+
+			mkdir -p /tmp/m1
+
+			mount --rbind /root /tmp/m1
+
+		      the new tree now looks like this::
+
+				    root
+				   /    \
+				 tmp    usr
+				/
+			       m1
+			      /  \
+			     tmp  usr
+
+		step 3:
+		      ::
+
+			    mkdir -p /tmp/m2
+			    mount --rbind /root /tmp/m2
+
+		      the new tree now looks like this::
+
+				    root
+				   /    \
+				 tmp    usr
+				/   \
+			       m1     m2
+			      /  \     / \
+			     tmp  usr tmp usr
+
+		step 4:
+		      ::
+
+			    mkdir -p /tmp/m3
+			    mount --rbind /root /tmp/m3
+
+		      the new tree now looks like this::
+
+				    	  root
+				      /    	  \
+				     tmp    	   usr
+			         /    \    \
+			       m1     m2     m3
+			      /  \     / \    /  \
+			     tmp  usr tmp usr tmp usr
+
+8) Implementation
+
+8A) Datastructure
+
+	4 new fields are introduced to struct vfsmount:
+
+	*   ->mnt_share
+	*   ->mnt_slave_list
+	*   ->mnt_slave
+	*   ->mnt_master
+
+	->mnt_share
+		links together all the mount to/from which this vfsmount
+		send/receives propagation events.
+
+	->mnt_slave_list
+		links all the mounts to which this vfsmount propagates
+		to.
+
+	->mnt_slave
+		links together all the slaves that its master vfsmount
+		propagates to.
+
+	->mnt_master
+		points to the master vfsmount from which this vfsmount
+		receives propagation.
+
+	->mnt_flags
+		takes two more flags to indicate the propagation status of
+		the vfsmount.  MNT_SHARE indicates that the vfsmount is a shared
+		vfsmount.  MNT_UNCLONABLE indicates that the vfsmount cannot be
+		replicated.
+
+	All the shared vfsmounts in a peer group form a cyclic list through
+	->mnt_share.
+
+	All vfsmounts with the same ->mnt_master form on a cyclic list anchored
+	in ->mnt_master->mnt_slave_list and going through ->mnt_slave.
+
+	 ->mnt_master can point to arbitrary (and possibly different) members
+	 of master peer group.  To find all immediate slaves of a peer group
+	 you need to go through _all_ ->mnt_slave_list of its members.
+	 Conceptually it's just a single set - distribution among the
+	 individual lists does not affect propagation or the way propagation
+	 tree is modified by operations.
+
+	All vfsmounts in a peer group have the same ->mnt_master.  If it is
+	non-NULL, they form a contiguous (ordered) segment of slave list.
+
+	A example propagation tree looks as shown in the figure below.
+	[ NOTE: Though it looks like a forest, if we consider all the shared
+	mounts as a conceptual entity called 'pnode', it becomes a tree]::
+
+
+		        A <--> B <--> C <---> D
+		       /|\	      /|      |\
+		      / F G	     J K      H I
+		     /
+		    E<-->K
+			/|\
+		       M L N
+
+	In the above figure  A,B,C and D all are shared and propagate to each
+	other.   'A' has got 3 slave mounts 'E' 'F' and 'G' 'C' has got 2 slave
+	mounts 'J' and 'K'  and  'D' has got two slave mounts 'H' and 'I'.
+	'E' is also shared with 'K' and they propagate to each other.  And
+	'K' has 3 slaves 'M', 'L' and 'N'
+
+	A's ->mnt_share links with the ->mnt_share of 'B' 'C' and 'D'
+
+	A's ->mnt_slave_list links with ->mnt_slave of 'E', 'K', 'F' and 'G'
+
+	E's ->mnt_share links with ->mnt_share of K
+
+	'E', 'K', 'F', 'G' have their ->mnt_master point to struct vfsmount of 'A'
+
+	'M', 'L', 'N' have their ->mnt_master point to struct vfsmount of 'K'
+
+	K's ->mnt_slave_list links with ->mnt_slave of 'M', 'L' and 'N'
+
+	C's ->mnt_slave_list links with ->mnt_slave of 'J' and 'K'
+
+	J and K's ->mnt_master points to struct vfsmount of C
+
+	and finally D's ->mnt_slave_list links with ->mnt_slave of 'H' and 'I'
+
+	'H' and 'I' have their ->mnt_master pointing to struct vfsmount of 'D'.
+
+
+	NOTE: The propagation tree is orthogonal to the mount tree.
+
+8B Locking:
+
+	->mnt_share, ->mnt_slave, ->mnt_slave_list, ->mnt_master are protected
+	by namespace_sem (exclusive for modifications, shared for reading).
+
+	Normally we have ->mnt_flags modifications serialized by vfsmount_lock.
+	There are two exceptions: do_add_mount() and clone_mnt().
+	The former modifies a vfsmount that has not been visible in any shared
+	data structures yet.
+	The latter holds namespace_sem and the only references to vfsmount
+	are in lists that can't be traversed without namespace_sem.
+
+8C Algorithm:
+
+	The crux of the implementation resides in rbind/move operation.
+
+	The overall algorithm breaks the operation into 3 phases: (look at
+	attach_recursive_mnt() and propagate_mnt())
+
+	1. prepare phase.
+	2. commit phases.
+	3. abort phases.
+
+	Prepare phase:
+
+	for each mount in the source tree:
+
+		   a) Create the necessary number of mount trees to
+		   	be attached to each of the mounts that receive
+			propagation from the destination mount.
+		   b) Do not attach any of the trees to its destination.
+		      However note down its ->mnt_parent and ->mnt_mountpoint
+		   c) Link all the new mounts to form a propagation tree that
+		      is identical to the propagation tree of the destination
+		      mount.
+
+		   If this phase is successful, there should be 'n' new
+		   propagation trees; where 'n' is the number of mounts in the
+		   source tree.  Go to the commit phase
+
+		   Also there should be 'm' new mount trees, where 'm' is
+		   the number of mounts to which the destination mount
+		   propagates to.
+
+		   if any memory allocations fail, go to the abort phase.
+
+	Commit phase
+		attach each of the mount trees to their corresponding
+		destination mounts.
+
+	Abort phase
+		delete all the newly created trees.
+
+	.. Note::
+	   all the propagation related functionality resides in the file pnode.c
+
+
+------------------------------------------------------------------------
+
+version 0.1  (created the initial document, Ram Pai linuxram@us.ibm.com)
+
+version 0.2  (Incorporated comments from Al Viro)
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt
deleted file mode 100644
index 8ccfbd55244b..000000000000
--- a/Documentation/filesystems/sharedsubtree.txt
+++ /dev/null
@@ -1,939 +0,0 @@
-Shared Subtrees
----------------
-
-Contents:
-	1) Overview
-	2) Features
-	3) Setting mount states
-	4) Use-case
-	5) Detailed semantics
-	6) Quiz
-	7) FAQ
-	8) Implementation
-
-
-1) Overview
------------
-
-Consider the following situation:
-
-A process wants to clone its own namespace, but still wants to access the CD
-that got mounted recently.  Shared subtree semantics provide the necessary
-mechanism to accomplish the above.
-
-It provides the necessary building blocks for features like per-user-namespace
-and versioned filesystem.
-
-2) Features
------------
-
-Shared subtree provides four different flavors of mounts; struct vfsmount to be
-precise
-
-	a. shared mount
-	b. slave mount
-	c. private mount
-	d. unbindable mount
-
-
-2a) A shared mount can be replicated to as many mountpoints and all the
-replicas continue to be exactly same.
-
-	Here is an example:
-
-	Let's say /mnt has a mount that is shared.
-	mount --make-shared /mnt
-
-	Note: mount(8) command now supports the --make-shared flag,
-	so the sample 'smount' program is no longer needed and has been
-	removed.
-
-	# mount --bind /mnt /tmp
-	The above command replicates the mount at /mnt to the mountpoint /tmp
-	and the contents of both the mounts remain identical.
-
-	#ls /mnt
-	a b c
-
-	#ls /tmp
-	a b c
-
-	Now let's say we mount a device at /tmp/a
-	# mount /dev/sd0  /tmp/a
-
-	#ls /tmp/a
-	t1 t2 t3
-
-	#ls /mnt/a
-	t1 t2 t3
-
-	Note that the mount has propagated to the mount at /mnt as well.
-
-	And the same is true even when /dev/sd0 is mounted on /mnt/a. The
-	contents will be visible under /tmp/a too.
-
-
-2b) A slave mount is like a shared mount except that mount and umount events
-	only propagate towards it.
-
-	All slave mounts have a master mount which is a shared.
-
-	Here is an example:
-
-	Let's say /mnt has a mount which is shared.
-	# mount --make-shared /mnt
-
-	Let's bind mount /mnt to /tmp
-	# mount --bind /mnt /tmp
-
-	the new mount at /tmp becomes a shared mount and it is a replica of
-	the mount at /mnt.
-
-	Now let's make the mount at /tmp; a slave of /mnt
-	# mount --make-slave /tmp
-
-	let's mount /dev/sd0 on /mnt/a
-	# mount /dev/sd0 /mnt/a
-
-	#ls /mnt/a
-	t1 t2 t3
-
-	#ls /tmp/a
-	t1 t2 t3
-
-	Note the mount event has propagated to the mount at /tmp
-
-	However let's see what happens if we mount something on the mount at /tmp
-
-	# mount /dev/sd1 /tmp/b
-
-	#ls /tmp/b
-	s1 s2 s3
-
-	#ls /mnt/b
-
-	Note how the mount event has not propagated to the mount at
-	/mnt
-
-
-2c) A private mount does not forward or receive propagation.
-
-	This is the mount we are familiar with. Its the default type.
-
-
-2d) A unbindable mount is a unbindable private mount
-
-	let's say we have a mount at /mnt and we make it unbindable
-
-	# mount --make-unbindable /mnt
-
-	 Let's try to bind mount this mount somewhere else.
-	 # mount --bind /mnt /tmp
-	 mount: wrong fs type, bad option, bad superblock on /mnt,
-	        or too many mounted file systems
-
-	Binding a unbindable mount is a invalid operation.
-
-
-3) Setting mount states
-
-	The mount command (util-linux package) can be used to set mount
-	states:
-
-	mount --make-shared mountpoint
-	mount --make-slave mountpoint
-	mount --make-private mountpoint
-	mount --make-unbindable mountpoint
-
-
-4) Use cases
-------------
-
-	A) A process wants to clone its own namespace, but still wants to
-	   access the CD that got mounted recently.
-
-	   Solution:
-
-		The system administrator can make the mount at /cdrom shared
-		mount --bind /cdrom /cdrom
-		mount --make-shared /cdrom
-
-		Now any process that clones off a new namespace will have a
-		mount at /cdrom which is a replica of the same mount in the
-		parent namespace.
-
-		So when a CD is inserted and mounted at /cdrom that mount gets
-		propagated to the other mount at /cdrom in all the other clone
-		namespaces.
-
-	B) A process wants its mounts invisible to any other process, but
-	still be able to see the other system mounts.
-
-	   Solution:
-
-		To begin with, the administrator can mark the entire mount tree
-		as shareable.
-
-		mount --make-rshared /
-
-		A new process can clone off a new namespace. And mark some part
-		of its namespace as slave
-
-		mount --make-rslave /myprivatetree
-
-		Hence forth any mounts within the /myprivatetree done by the
-		process will not show up in any other namespace. However mounts
-		done in the parent namespace under /myprivatetree still shows
-		up in the process's namespace.
-
-
-	Apart from the above semantics this feature provides the
-	building blocks to solve the following problems:
-
-	C)  Per-user namespace
-
-		The above semantics allows a way to share mounts across
-		namespaces.  But namespaces are associated with processes. If
-		namespaces are made first class objects with user API to
-		associate/disassociate a namespace with userid, then each user
-		could have his/her own namespace and tailor it to his/her
-		requirements. This needs to be supported in PAM.
-
-	D)  Versioned files
-
-		If the entire mount tree is visible at multiple locations, then
-		an underlying versioning file system can return different
-		versions of the file depending on the path used to access that
-		file.
-
-		An example is:
-
-		mount --make-shared /
-		mount --rbind / /view/v1
-		mount --rbind / /view/v2
-		mount --rbind / /view/v3
-		mount --rbind / /view/v4
-
-		and if /usr has a versioning filesystem mounted, then that
-		mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and
-		/view/v4/usr too
-
-		A user can request v3 version of the file /usr/fs/namespace.c
-		by accessing /view/v3/usr/fs/namespace.c . The underlying
-		versioning filesystem can then decipher that v3 version of the
-		filesystem is being requested and return the corresponding
-		inode.
-
-5) Detailed semantics:
--------------------
-	The section below explains the detailed semantics of
-	bind, rbind, move, mount, umount and clone-namespace operations.
-
-	Note: the word 'vfsmount' and the noun 'mount' have been used
-	to mean the same thing, throughout this document.
-
-5a) Mount states
-
-	A given mount can be in one of the following states
-	1) shared
-	2) slave
-	3) shared and slave
-	4) private
-	5) unbindable
-
-	A 'propagation event' is defined as event generated on a vfsmount
-	that leads to mount or unmount actions in other vfsmounts.
-
-	A 'peer group' is defined as a group of vfsmounts that propagate
-	events to each other.
-
-	(1) Shared mounts
-
-		A 'shared mount' is defined as a vfsmount that belongs to a
-		'peer group'.
-
-		For example:
-			mount --make-shared /mnt
-			mount --bind /mnt /tmp
-
-		The mount at /mnt and that at /tmp are both shared and belong
-		to the same peer group. Anything mounted or unmounted under
-		/mnt or /tmp reflect in all the other mounts of its peer
-		group.
-
-
-	(2) Slave mounts
-
-		A 'slave mount' is defined as a vfsmount that receives
-		propagation events and does not forward propagation events.
-
-		A slave mount as the name implies has a master mount from which
-		mount/unmount events are received. Events do not propagate from
-		the slave mount to the master.  Only a shared mount can be made
-		a slave by executing the following command
-
-			mount --make-slave mount
-
-		A shared mount that is made as a slave is no more shared unless
-		modified to become shared.
-
-	(3) Shared and Slave
-
-		A vfsmount can be both shared as well as slave.  This state
-		indicates that the mount is a slave of some vfsmount, and
-		has its own peer group too.  This vfsmount receives propagation
-		events from its master vfsmount, and also forwards propagation
-		events to its 'peer group' and to its slave vfsmounts.
-
-		Strictly speaking, the vfsmount is shared having its own
-		peer group, and this peer-group is a slave of some other
-		peer group.
-
-		Only a slave vfsmount can be made as 'shared and slave' by
-		either executing the following command
-			mount --make-shared mount
-		or by moving the slave vfsmount under a shared vfsmount.
-
-	(4) Private mount
-
-		A 'private mount' is defined as vfsmount that does not
-		receive or forward any propagation events.
-
-	(5) Unbindable mount
-
-		A 'unbindable mount' is defined as vfsmount that does not
-		receive or forward any propagation events and cannot
-		be bind mounted.
-
-
-   	State diagram:
-   	The state diagram below explains the state transition of a mount,
-	in response to various commands.
-	------------------------------------------------------------------------
-	|             |make-shared |  make-slave  | make-private |make-unbindab|
-	--------------|------------|--------------|--------------|-------------|
-	|shared	      |shared	   |*slave/private|   private	 | unbindable  |
-	|             |            |              |              |             |
-	|-------------|------------|--------------|--------------|-------------|
-	|slave	      |shared      |	**slave	  |    private   | unbindable  |
-	|             |and slave   |              |              |             |
-	|-------------|------------|--------------|--------------|-------------|
-	|shared	      |shared      |    slave	  |    private   | unbindable  |
-	|and slave    |and slave   |              |              |             |
-	|-------------|------------|--------------|--------------|-------------|
-	|private      |shared	   |  **private	  |    private   | unbindable  |
-	|-------------|------------|--------------|--------------|-------------|
-	|unbindable   |shared	   |**unbindable  |    private   | unbindable  |
-	------------------------------------------------------------------------
-
-	* if the shared mount is the only mount in its peer group, making it
-	slave, makes it private automatically. Note that there is no master to
-	which it can be slaved to.
-
-	** slaving a non-shared mount has no effect on the mount.
-
-	Apart from the commands listed below, the 'move' operation also changes
-	the state of a mount depending on type of the destination mount. Its
-	explained in section 5d.
-
-5b) Bind semantics
-
-	Consider the following command
-
-	mount --bind A/a  B/b
-
-	where 'A' is the source mount, 'a' is the dentry in the mount 'A', 'B'
-	is the destination mount and 'b' is the dentry in the destination mount.
-
-	The outcome depends on the type of mount of 'A' and 'B'. The table
-	below contains quick reference.
-   ---------------------------------------------------------------------------
-   |         BIND MOUNT OPERATION                                            |
-   |**************************************************************************
-   |source(A)->| shared       |       private  |       slave    | unbindable |
-   | dest(B)  |               |                |                |            |
-   |   |      |               |                |                |            |
-   |   v      |               |                |                |            |
-   |**************************************************************************
-   |  shared  | shared        |     shared     | shared & slave |  invalid   |
-   |          |               |                |                |            |
-   |non-shared| shared        |      private   |      slave     |  invalid   |
-   ***************************************************************************
-
-     	Details:
-
-	1. 'A' is a shared mount and 'B' is a shared mount. A new mount 'C'
-	which is clone of 'A', is created. Its root dentry is 'a' . 'C' is
-	mounted on mount 'B' at dentry 'b'. Also new mount 'C1', 'C2', 'C3' ...
-	are created and mounted at the dentry 'b' on all mounts where 'B'
-	propagates to. A new propagation tree containing 'C1',..,'Cn' is
-	created. This propagation tree is identical to the propagation tree of
-	'B'.  And finally the peer-group of 'C' is merged with the peer group
-	of 'A'.
-
-	2. 'A' is a private mount and 'B' is a shared mount. A new mount 'C'
-	which is clone of 'A', is created. Its root dentry is 'a'. 'C' is
-	mounted on mount 'B' at dentry 'b'. Also new mount 'C1', 'C2', 'C3' ...
-	are created and mounted at the dentry 'b' on all mounts where 'B'
-	propagates to. A new propagation tree is set containing all new mounts
-	'C', 'C1', .., 'Cn' with exactly the same configuration as the
-	propagation tree for 'B'.
-
-	3. 'A' is a slave mount of mount 'Z' and 'B' is a shared mount. A new
-	mount 'C' which is clone of 'A', is created. Its root dentry is 'a' .
-	'C' is mounted on mount 'B' at dentry 'b'. Also new mounts 'C1', 'C2',
-	'C3' ... are created and mounted at the dentry 'b' on all mounts where
-	'B' propagates to. A new propagation tree containing the new mounts
-	'C','C1',..  'Cn' is created. This propagation tree is identical to the
-	propagation tree for 'B'. And finally the mount 'C' and its peer group
-	is made the slave of mount 'Z'.  In other words, mount 'C' is in the
-	state 'slave and shared'.
-
-	4. 'A' is a unbindable mount and 'B' is a shared mount. This is a
-	invalid operation.
-
-	5. 'A' is a private mount and 'B' is a non-shared(private or slave or
-	unbindable) mount. A new mount 'C' which is clone of 'A', is created.
-	Its root dentry is 'a'. 'C' is mounted on mount 'B' at dentry 'b'.
-
-	6. 'A' is a shared mount and 'B' is a non-shared mount. A new mount 'C'
-	which is a clone of 'A' is created. Its root dentry is 'a'. 'C' is
-	mounted on mount 'B' at dentry 'b'.  'C' is made a member of the
-	peer-group of 'A'.
-
-	7. 'A' is a slave mount of mount 'Z' and 'B' is a non-shared mount. A
-	new mount 'C' which is a clone of 'A' is created. Its root dentry is
-	'a'.  'C' is mounted on mount 'B' at dentry 'b'. Also 'C' is set as a
-	slave mount of 'Z'. In other words 'A' and 'C' are both slave mounts of
-	'Z'.  All mount/unmount events on 'Z' propagates to 'A' and 'C'. But
-	mount/unmount on 'A' do not propagate anywhere else. Similarly
-	mount/unmount on 'C' do not propagate anywhere else.
-
-	8. 'A' is a unbindable mount and 'B' is a non-shared mount. This is a
-	invalid operation. A unbindable mount cannot be bind mounted.
-
-5c) Rbind semantics
-
-	rbind is same as bind. Bind replicates the specified mount.  Rbind
-	replicates all the mounts in the tree belonging to the specified mount.
-	Rbind mount is bind mount applied to all the mounts in the tree.
-
-	If the source tree that is rbind has some unbindable mounts,
-	then the subtree under the unbindable mount is pruned in the new
-	location.
-
-	eg: let's say we have the following mount tree.
-
-		A
-	      /   \
-	      B   C
-	     / \ / \
-	     D E F G
-
-	     Let's say all the mount except the mount C in the tree are
-	     of a type other than unbindable.
-
-	     If this tree is rbound to say Z
-
-	     We will have the following tree at the new location.
-
-		Z
-		|
-		A'
-	       /
-	      B'		Note how the tree under C is pruned
-	     / \ 		in the new location.
-	    D' E'
-
-
-
-5d) Move semantics
-
-	Consider the following command
-
-	mount --move A  B/b
-
-	where 'A' is the source mount, 'B' is the destination mount and 'b' is
-	the dentry in the destination mount.
-
-	The outcome depends on the type of the mount of 'A' and 'B'. The table
-	below is a quick reference.
-   ---------------------------------------------------------------------------
-   |         		MOVE MOUNT OPERATION                                 |
-   |**************************************************************************
-   | source(A)->| shared      |       private  |       slave    | unbindable |
-   | dest(B)  |               |                |                |            |
-   |   |      |               |                |                |            |
-   |   v      |               |                |                |            |
-   |**************************************************************************
-   |  shared  | shared        |     shared     |shared and slave|  invalid   |
-   |          |               |                |                |            |
-   |non-shared| shared        |      private   |    slave       | unbindable |
-   ***************************************************************************
-	NOTE: moving a mount residing under a shared mount is invalid.
-
-      Details follow:
-
-	1. 'A' is a shared mount and 'B' is a shared mount.  The mount 'A' is
-	mounted on mount 'B' at dentry 'b'.  Also new mounts 'A1', 'A2'...'An'
-	are created and mounted at dentry 'b' on all mounts that receive
-	propagation from mount 'B'. A new propagation tree is created in the
-	exact same configuration as that of 'B'. This new propagation tree
-	contains all the new mounts 'A1', 'A2'...  'An'.  And this new
-	propagation tree is appended to the already existing propagation tree
-	of 'A'.
-
-	2. 'A' is a private mount and 'B' is a shared mount. The mount 'A' is
-	mounted on mount 'B' at dentry 'b'. Also new mount 'A1', 'A2'... 'An'
-	are created and mounted at dentry 'b' on all mounts that receive
-	propagation from mount 'B'. The mount 'A' becomes a shared mount and a
-	propagation tree is created which is identical to that of
-	'B'. This new propagation tree contains all the new mounts 'A1',
-	'A2'...  'An'.
-
-	3. 'A' is a slave mount of mount 'Z' and 'B' is a shared mount.  The
-	mount 'A' is mounted on mount 'B' at dentry 'b'.  Also new mounts 'A1',
-	'A2'... 'An' are created and mounted at dentry 'b' on all mounts that
-	receive propagation from mount 'B'. A new propagation tree is created
-	in the exact same configuration as that of 'B'. This new propagation
-	tree contains all the new mounts 'A1', 'A2'...  'An'.  And this new
-	propagation tree is appended to the already existing propagation tree of
-	'A'.  Mount 'A' continues to be the slave mount of 'Z' but it also
-	becomes 'shared'.
-
-	4. 'A' is a unbindable mount and 'B' is a shared mount. The operation
-	is invalid. Because mounting anything on the shared mount 'B' can
-	create new mounts that get mounted on the mounts that receive
-	propagation from 'B'.  And since the mount 'A' is unbindable, cloning
-	it to mount at other mountpoints is not possible.
-
-	5. 'A' is a private mount and 'B' is a non-shared(private or slave or
-	unbindable) mount. The mount 'A' is mounted on mount 'B' at dentry 'b'.
-
-	6. 'A' is a shared mount and 'B' is a non-shared mount.  The mount 'A'
-	is mounted on mount 'B' at dentry 'b'.  Mount 'A' continues to be a
-	shared mount.
-
-	7. 'A' is a slave mount of mount 'Z' and 'B' is a non-shared mount.
-	The mount 'A' is mounted on mount 'B' at dentry 'b'.  Mount 'A'
-	continues to be a slave mount of mount 'Z'.
-
-	8. 'A' is a unbindable mount and 'B' is a non-shared mount. The mount
-	'A' is mounted on mount 'B' at dentry 'b'. Mount 'A' continues to be a
-	unbindable mount.
-
-5e) Mount semantics
-
-	Consider the following command
-
-	mount device  B/b
-
-	'B' is the destination mount and 'b' is the dentry in the destination
-	mount.
-
-	The above operation is the same as bind operation with the exception
-	that the source mount is always a private mount.
-
-
-5f) Unmount semantics
-
-	Consider the following command
-
-	umount A
-
-	where 'A' is a mount mounted on mount 'B' at dentry 'b'.
-
-	If mount 'B' is shared, then all most-recently-mounted mounts at dentry
-	'b' on mounts that receive propagation from mount 'B' and does not have
-	sub-mounts within them are unmounted.
-
-	Example: Let's say 'B1', 'B2', 'B3' are shared mounts that propagate to
-	each other.
-
-	let's say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount
-	'B1', 'B2' and 'B3' respectively.
-
-	let's say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on
-	mount 'B1', 'B2' and 'B3' respectively.
-
-	if 'C1' is unmounted, all the mounts that are most-recently-mounted on
-	'B1' and on the mounts that 'B1' propagates-to are unmounted.
-
-	'B1' propagates to 'B2' and 'B3'. And the most recently mounted mount
-	on 'B2' at dentry 'b' is 'C2', and that of mount 'B3' is 'C3'.
-
-	So all 'C1', 'C2' and 'C3' should be unmounted.
-
-	If any of 'C2' or 'C3' has some child mounts, then that mount is not
-	unmounted, but all other mounts are unmounted. However if 'C1' is told
-	to be unmounted and 'C1' has some sub-mounts, the umount operation is
-	failed entirely.
-
-5g) Clone Namespace
-
-	A cloned namespace contains all the mounts as that of the parent
-	namespace.
-
-	Let's say 'A' and 'B' are the corresponding mounts in the parent and the
-	child namespace.
-
-	If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to
-	each other.
-
-	If 'A' is a slave mount of 'Z', then 'B' is also the slave mount of
-	'Z'.
-
-	If 'A' is a private mount, then 'B' is a private mount too.
-
-	If 'A' is unbindable mount, then 'B' is a unbindable mount too.
-
-
-6) Quiz
-
-	A. What is the result of the following command sequence?
-
-		mount --bind /mnt /mnt
-		mount --make-shared /mnt
-		mount --bind /mnt /tmp
-		mount --move /tmp /mnt/1
-
-		what should be the contents of /mnt /mnt/1 /mnt/1/1 should be?
-		Should they all be identical? or should /mnt and /mnt/1 be
-		identical only?
-
-
-	B. What is the result of the following command sequence?
-
-		mount --make-rshared /
-		mkdir -p /v/1
-		mount --rbind / /v/1
-
-		what should be the content of /v/1/v/1 be?
-
-
-	C. What is the result of the following command sequence?
-
-		mount --bind /mnt /mnt
-		mount --make-shared /mnt
-		mkdir -p /mnt/1/2/3 /mnt/1/test
-		mount --bind /mnt/1 /tmp
-		mount --make-slave /mnt
-		mount --make-shared /mnt
-		mount --bind /mnt/1/2 /tmp1
-		mount --make-slave /mnt
-
-		At this point we have the first mount at /tmp and
-		its root dentry is 1. Let's call this mount 'A'
-		And then we have a second mount at /tmp1 with root
-		dentry 2. Let's call this mount 'B'
-		Next we have a third mount at /mnt with root dentry
-		mnt. Let's call this mount 'C'
-
-		'B' is the slave of 'A' and 'C' is a slave of 'B'
-		A -> B -> C
-
-		at this point if we execute the following command
-
-		mount --bind /bin /tmp/test
-
-		The mount is attempted on 'A'
-
-		will the mount propagate to 'B' and 'C' ?
-
-		what would be the contents of
-		/mnt/1/test be?
-
-7) FAQ
-
-	Q1. Why is bind mount needed? How is it different from symbolic links?
-		symbolic links can get stale if the destination mount gets
-		unmounted or moved. Bind mounts continue to exist even if the
-		other mount is unmounted or moved.
-
-	Q2. Why can't the shared subtree be implemented using exportfs?
-
-		exportfs is a heavyweight way of accomplishing part of what
-		shared subtree can do. I cannot imagine a way to implement the
-		semantics of slave mount using exportfs?
-
-	Q3 Why is unbindable mount needed?
-
-		Let's say we want to replicate the mount tree at multiple
-		locations within the same subtree.
-
-		if one rbind mounts a tree within the same subtree 'n' times
-		the number of mounts created is an exponential function of 'n'.
-		Having unbindable mount can help prune the unneeded bind
-		mounts. Here is an example.
-
-		step 1:
-		   let's say the root tree has just two directories with
-		   one vfsmount.
-				    root
-				   /    \
-				  tmp    usr
-
-		    And we want to replicate the tree at multiple
-		    mountpoints under /root/tmp
-
-		step2:
-		      mount --make-shared /root
-
-		      mkdir -p /tmp/m1
-
-		      mount --rbind /root /tmp/m1
-
-		      the new tree now looks like this:
-
-				    root
-				   /    \
-				 tmp    usr
-				/
-			       m1
-			      /  \
-			     tmp  usr
-			     /
-			    m1
-
-			  it has two vfsmounts
-
-		step3:
-			    mkdir -p /tmp/m2
-			    mount --rbind /root /tmp/m2
-
-			the new tree now looks like this:
-
-				      root
-				     /    \
-				   tmp     usr
-				  /    \
-				m1       m2
-			       / \       /  \
-			     tmp  usr   tmp  usr
-			     / \          /
-			    m1  m2      m1
-				/ \     /  \
-			      tmp usr  tmp   usr
-			      /        / \
-			     m1       m1  m2
-			    /  \
-			  tmp   usr
-			  /  \
-			 m1   m2
-
-		       it has 6 vfsmounts
-
-		step 4:
-			  mkdir -p /tmp/m3
-			  mount --rbind /root /tmp/m3
-
-			  I won't draw the tree..but it has 24 vfsmounts
-
-
-		at step i the number of vfsmounts is V[i] = i*V[i-1].
-		This is an exponential function. And this tree has way more
-		mounts than what we really needed in the first place.
-
-		One could use a series of umount at each step to prune
-		out the unneeded mounts. But there is a better solution.
-		Unclonable mounts come in handy here.
-
-		step 1:
-		   let's say the root tree has just two directories with
-		   one vfsmount.
-				    root
-				   /    \
-				  tmp    usr
-
-		    How do we set up the same tree at multiple locations under
-		    /root/tmp
-
-		step2:
-		      mount --bind /root/tmp /root/tmp
-
-		      mount --make-rshared /root
-		      mount --make-unbindable /root/tmp
-
-		      mkdir -p /tmp/m1
-
-		      mount --rbind /root /tmp/m1
-
-		      the new tree now looks like this:
-
-				    root
-				   /    \
-				 tmp    usr
-				/
-			       m1
-			      /  \
-			     tmp  usr
-
-		step3:
-			    mkdir -p /tmp/m2
-			    mount --rbind /root /tmp/m2
-
-		      the new tree now looks like this:
-
-				    root
-				   /    \
-				 tmp    usr
-				/   \
-			       m1     m2
-			      /  \     / \
-			     tmp  usr tmp usr
-
-		step4:
-
-			    mkdir -p /tmp/m3
-			    mount --rbind /root /tmp/m3
-
-		      the new tree now looks like this:
-
-				    	  root
-				      /    	  \
-				     tmp    	   usr
-			         /    \    \
-			       m1     m2     m3
-			      /  \     / \    /  \
-			     tmp  usr tmp usr tmp usr
-
-8) Implementation
-
-8A) Datastructure
-
-	4 new fields are introduced to struct vfsmount
-	->mnt_share
-	->mnt_slave_list
-	->mnt_slave
-	->mnt_master
-
-	->mnt_share links together all the mount to/from which this vfsmount
-		send/receives propagation events.
-
-	->mnt_slave_list links all the mounts to which this vfsmount propagates
-		to.
-
-	->mnt_slave links together all the slaves that its master vfsmount
-		propagates to.
-
-	->mnt_master points to the master vfsmount from which this vfsmount
-		receives propagation.
-
-	->mnt_flags takes two more flags to indicate the propagation status of
-		the vfsmount.  MNT_SHARE indicates that the vfsmount is a shared
-		vfsmount.  MNT_UNCLONABLE indicates that the vfsmount cannot be
-		replicated.
-
-	All the shared vfsmounts in a peer group form a cyclic list through
-	->mnt_share.
-
-	All vfsmounts with the same ->mnt_master form on a cyclic list anchored
-	in ->mnt_master->mnt_slave_list and going through ->mnt_slave.
-
-	 ->mnt_master can point to arbitrary (and possibly different) members
-	 of master peer group.  To find all immediate slaves of a peer group
-	 you need to go through _all_ ->mnt_slave_list of its members.
-	 Conceptually it's just a single set - distribution among the
-	 individual lists does not affect propagation or the way propagation
-	 tree is modified by operations.
-
-	All vfsmounts in a peer group have the same ->mnt_master.  If it is
-	non-NULL, they form a contiguous (ordered) segment of slave list.
-
-	A example propagation tree looks as shown in the figure below.
-	[ NOTE: Though it looks like a forest, if we consider all the shared
-	mounts as a conceptual entity called 'pnode', it becomes a tree]
-
-
-		        A <--> B <--> C <---> D
-		       /|\	      /|      |\
-		      / F G	     J K      H I
-		     /
-		    E<-->K
-			/|\
-		       M L N
-
-	In the above figure  A,B,C and D all are shared and propagate to each
-	other.   'A' has got 3 slave mounts 'E' 'F' and 'G' 'C' has got 2 slave
-	mounts 'J' and 'K'  and  'D' has got two slave mounts 'H' and 'I'.
-	'E' is also shared with 'K' and they propagate to each other.  And
-	'K' has 3 slaves 'M', 'L' and 'N'
-
-	A's ->mnt_share links with the ->mnt_share of 'B' 'C' and 'D'
-
-	A's ->mnt_slave_list links with ->mnt_slave of 'E', 'K', 'F' and 'G'
-
-	E's ->mnt_share links with ->mnt_share of K
-	'E', 'K', 'F', 'G' have their ->mnt_master point to struct
-				vfsmount of 'A'
-	'M', 'L', 'N' have their ->mnt_master point to struct vfsmount of 'K'
-	K's ->mnt_slave_list links with ->mnt_slave of 'M', 'L' and 'N'
-
-	C's ->mnt_slave_list links with ->mnt_slave of 'J' and 'K'
-	J and K's ->mnt_master points to struct vfsmount of C
-	and finally D's ->mnt_slave_list links with ->mnt_slave of 'H' and 'I'
-	'H' and 'I' have their ->mnt_master pointing to struct vfsmount of 'D'.
-
-
-	NOTE: The propagation tree is orthogonal to the mount tree.
-
-8B Locking:
-
-	->mnt_share, ->mnt_slave, ->mnt_slave_list, ->mnt_master are protected
-	by namespace_sem (exclusive for modifications, shared for reading).
-
-	Normally we have ->mnt_flags modifications serialized by vfsmount_lock.
-	There are two exceptions: do_add_mount() and clone_mnt().
-	The former modifies a vfsmount that has not been visible in any shared
-	data structures yet.
-	The latter holds namespace_sem and the only references to vfsmount
-	are in lists that can't be traversed without namespace_sem.
-
-8C Algorithm:
-
-	The crux of the implementation resides in rbind/move operation.
-
-	The overall algorithm breaks the operation into 3 phases: (look at
-	attach_recursive_mnt() and propagate_mnt())
-
-	1. prepare phase.
-	2. commit phases.
-	3. abort phases.
-
-	Prepare phase:
-
-	for each mount in the source tree:
-		   a) Create the necessary number of mount trees to
-		   	be attached to each of the mounts that receive
-			propagation from the destination mount.
-		   b) Do not attach any of the trees to its destination.
-		      However note down its ->mnt_parent and ->mnt_mountpoint
-		   c) Link all the new mounts to form a propagation tree that
-		      is identical to the propagation tree of the destination
-		      mount.
-
-		   If this phase is successful, there should be 'n' new
-		   propagation trees; where 'n' is the number of mounts in the
-		   source tree.  Go to the commit phase
-
-		   Also there should be 'm' new mount trees, where 'm' is
-		   the number of mounts to which the destination mount
-		   propagates to.
-
-		   if any memory allocations fail, go to the abort phase.
-
-	Commit phase
-		attach each of the mount trees to their corresponding
-		destination mounts.
-
-	Abort phase
-		delete all the newly created trees.
-
-	NOTE: all the propagation related functionality resides in the file
-	pnode.c
-
-
-------------------------------------------------------------------------
-
-version 0.1  (created the initial document, Ram Pai linuxram@us.ibm.com)
-version 0.2  (Incorporated comments from Al Viro)
-- 
cgit v1.2.3


From a5a1c349ac48252e871abaecabc158f761e0aa92 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:13 +0200
Subject: docs: filesystems: split spufs.txt into 3 separate files

This file has manpages for 3 different things. As we'll
be converting it to ReST, let's fist split it into their
individual components.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/3753aa73524f4e1cbf0c19e34f7b322420e0b1c6.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/spufs.txt            | 521 -------------------------
 Documentation/filesystems/spufs/spu_create.txt | 119 ++++++
 Documentation/filesystems/spufs/spu_run.txt    | 121 ++++++
 Documentation/filesystems/spufs/spufs.txt      | 276 +++++++++++++
 4 files changed, 516 insertions(+), 521 deletions(-)
 delete mode 100644 Documentation/filesystems/spufs.txt
 create mode 100644 Documentation/filesystems/spufs/spu_create.txt
 create mode 100644 Documentation/filesystems/spufs/spu_run.txt
 create mode 100644 Documentation/filesystems/spufs/spufs.txt

diff --git a/Documentation/filesystems/spufs.txt b/Documentation/filesystems/spufs.txt
deleted file mode 100644
index eb9e3aa63026..000000000000
--- a/Documentation/filesystems/spufs.txt
+++ /dev/null
@@ -1,521 +0,0 @@
-SPUFS(2)                   Linux Programmer's Manual                  SPUFS(2)
-
-
-
-NAME
-       spufs - the SPU file system
-
-
-DESCRIPTION
-       The SPU file system is used on PowerPC machines that implement the Cell
-       Broadband Engine Architecture in order to access Synergistic  Processor
-       Units (SPUs).
-
-       The file system provides a name space similar to posix shared memory or
-       message queues. Users that have write permissions on  the  file  system
-       can use spu_create(2) to establish SPU contexts in the spufs root.
-
-       Every SPU context is represented by a directory containing a predefined
-       set of files. These files can be used for manipulating the state of the
-       logical SPU. Users can change permissions on those files, but not actu-
-       ally add or remove files.
-
-
-MOUNT OPTIONS
-       uid=<uid>
-              set the user owning the mount point, the default is 0 (root).
-
-       gid=<gid>
-              set the group owning the mount point, the default is 0 (root).
-
-
-FILES
-       The files in spufs mostly follow the standard behavior for regular sys-
-       tem  calls like read(2) or write(2), but often support only a subset of
-       the operations supported on regular file systems. This list details the
-       supported  operations  and  the  deviations  from  the behaviour in the
-       respective man pages.
-
-       All files that support the read(2) operation also support readv(2)  and
-       all  files  that support the write(2) operation also support writev(2).
-       All files support the access(2) and stat(2) family of  operations,  but
-       only  the  st_mode,  st_nlink,  st_uid and st_gid fields of struct stat
-       contain reliable information.
-
-       All files support the chmod(2)/fchmod(2) and chown(2)/fchown(2)  opera-
-       tions,  but  will  not be able to grant permissions that contradict the
-       possible operations, e.g. read access on the wbox file.
-
-       The current set of files is:
-
-
-   /mem
-       the contents of the local storage memory  of  the  SPU.   This  can  be
-       accessed  like  a regular shared memory file and contains both code and
-       data in the address space of the SPU.  The possible  operations  on  an
-       open mem file are:
-
-       read(2), pread(2), write(2), pwrite(2), lseek(2)
-              These  operate  as  documented, with the exception that seek(2),
-              write(2) and pwrite(2) are not supported beyond the end  of  the
-              file. The file size is the size of the local storage of the SPU,
-              which normally is 256 kilobytes.
-
-       mmap(2)
-              Mapping mem into the process address space gives access  to  the
-              SPU  local  storage  within  the  process  address  space.  Only
-              MAP_SHARED mappings are allowed.
-
-
-   /mbox
-       The first SPU to CPU communication mailbox. This file is read-only  and
-       can  be  read  in  units of 32 bits.  The file can only be used in non-
-       blocking mode and it even poll() will not block on  it.   The  possible
-       operations on an open mbox file are:
-
-       read(2)
-              If  a  count smaller than four is requested, read returns -1 and
-              sets errno to EINVAL.  If there is no data available in the mail
-              box,  the  return  value  is set to -1 and errno becomes EAGAIN.
-              When data has been read successfully, four bytes are  placed  in
-              the data buffer and the value four is returned.
-
-
-   /ibox
-       The  second  SPU  to CPU communication mailbox. This file is similar to
-       the first mailbox file, but can be read in blocking I/O mode,  and  the
-       poll  family of system calls can be used to wait for it.  The  possible
-       operations on an open ibox file are:
-
-       read(2)
-              If a count smaller than four is requested, read returns  -1  and
-              sets errno to EINVAL.  If there is no data available in the mail
-              box and the file descriptor has been opened with O_NONBLOCK, the
-              return value is set to -1 and errno becomes EAGAIN.
-
-              If  there  is  no  data  available  in the mail box and the file
-              descriptor has been opened without  O_NONBLOCK,  the  call  will
-              block  until  the  SPU  writes to its interrupt mailbox channel.
-              When data has been read successfully, four bytes are  placed  in
-              the data buffer and the value four is returned.
-
-       poll(2)
-              Poll  on  the  ibox  file returns (POLLIN | POLLRDNORM) whenever
-              data is available for reading.
-
-
-   /wbox
-       The CPU to SPU communation mailbox. It is write-only and can be written
-       in  units  of  32  bits. If the mailbox is full, write() will block and
-       poll can be used to wait for it becoming  empty  again.   The  possible
-       operations  on  an open wbox file are: write(2) If a count smaller than
-       four is requested, write returns -1 and sets errno to EINVAL.  If there
-       is  no space available in the mail box and the file descriptor has been
-       opened with O_NONBLOCK, the return value is set to -1 and errno becomes
-       EAGAIN.
-
-       If  there is no space available in the mail box and the file descriptor
-       has been opened without O_NONBLOCK, the call will block until  the  SPU
-       reads  from  its PPE mailbox channel.  When data has been read success-
-       fully, four bytes are placed in the data buffer and the value  four  is
-       returned.
-
-       poll(2)
-              Poll  on  the  ibox file returns (POLLOUT | POLLWRNORM) whenever
-              space is available for writing.
-
-
-   /mbox_stat
-   /ibox_stat
-   /wbox_stat
-       Read-only files that contain the length of the current queue, i.e.  how
-       many  words  can  be  read  from  mbox or ibox or how many words can be
-       written to wbox without blocking.  The files can be read only in 4-byte
-       units  and  return  a  big-endian  binary integer number.  The possible
-       operations on an open *box_stat file are:
-
-       read(2)
-              If a count smaller than four is requested, read returns  -1  and
-              sets errno to EINVAL.  Otherwise, a four byte value is placed in
-              the data buffer, containing the number of elements that  can  be
-              read  from  (for  mbox_stat  and  ibox_stat)  or written to (for
-              wbox_stat) the respective mail box without blocking or resulting
-              in EAGAIN.
-
-
-   /npc
-   /decr
-   /decr_status
-   /spu_tag_mask
-   /event_mask
-   /srr0
-       Internal  registers  of  the SPU. The representation is an ASCII string
-       with the numeric value of the next instruction to  be  executed.  These
-       can  be  used in read/write mode for debugging, but normal operation of
-       programs should not rely on them because access to any of  them  except
-       npc requires an SPU context save and is therefore very inefficient.
-
-       The contents of these files are:
-
-       npc                 Next Program Counter
-
-       decr                SPU Decrementer
-
-       decr_status         Decrementer Status
-
-       spu_tag_mask        MFC tag mask for SPU DMA
-
-       event_mask          Event mask for SPU interrupts
-
-       srr0                Interrupt Return address register
-
-
-       The   possible   operations   on   an   open  npc,  decr,  decr_status,
-       spu_tag_mask, event_mask or srr0 file are:
-
-       read(2)
-              When the count supplied to the read call  is  shorter  than  the
-              required  length for the pointer value plus a newline character,
-              subsequent reads from the same file descriptor  will  result  in
-              completing  the string, regardless of changes to the register by
-              a running SPU task.  When a complete string has been  read,  all
-              subsequent read operations will return zero bytes and a new file
-              descriptor needs to be opened to read the value again.
-
-       write(2)
-              A write operation on the file results in setting the register to
-              the  value  given  in  the string. The string is parsed from the
-              beginning to the first non-numeric character or the end  of  the
-              buffer.  Subsequent writes to the same file descriptor overwrite
-              the previous setting.
-
-
-   /fpcr
-       This file gives access to the Floating Point Status and Control  Regis-
-       ter as a four byte long file. The operations on the fpcr file are:
-
-       read(2)
-              If  a  count smaller than four is requested, read returns -1 and
-              sets errno to EINVAL.  Otherwise, a four byte value is placed in
-              the data buffer, containing the current value of the fpcr regis-
-              ter.
-
-       write(2)
-              If a count smaller than four is requested, write returns -1  and
-              sets  errno  to  EINVAL.  Otherwise, a four byte value is copied
-              from the data buffer, updating the value of the fpcr register.
-
-
-   /signal1
-   /signal2
-       The two signal notification channels of an SPU.  These  are  read-write
-       files  that  operate  on  a 32 bit word.  Writing to one of these files
-       triggers an interrupt on the SPU.  The  value  written  to  the  signal
-       files can be read from the SPU through a channel read or from host user
-       space through the file.  After the value has been read by the  SPU,  it
-       is  reset  to zero.  The possible operations on an open signal1 or sig-
-       nal2 file are:
-
-       read(2)
-              If a count smaller than four is requested, read returns  -1  and
-              sets errno to EINVAL.  Otherwise, a four byte value is placed in
-              the data buffer, containing the current value of  the  specified
-              signal notification register.
-
-       write(2)
-              If  a count smaller than four is requested, write returns -1 and
-              sets errno to EINVAL.  Otherwise, a four byte  value  is  copied
-              from the data buffer, updating the value of the specified signal
-              notification register.  The signal  notification  register  will
-              either be replaced with the input data or will be updated to the
-              bitwise OR or the old value and the input data, depending on the
-              contents  of  the  signal1_type,  or  signal2_type respectively,
-              file.
-
-
-   /signal1_type
-   /signal2_type
-       These two files change the behavior of the signal1 and signal2  notifi-
-       cation  files.  The  contain  a numerical ASCII string which is read as
-       either "1" or "0".  In mode 0 (overwrite), the  hardware  replaces  the
-       contents of the signal channel with the data that is written to it.  in
-       mode 1 (logical OR), the hardware accumulates the bits that are  subse-
-       quently written to it.  The possible operations on an open signal1_type
-       or signal2_type file are:
-
-       read(2)
-              When the count supplied to the read call  is  shorter  than  the
-              required  length  for the digit plus a newline character, subse-
-              quent reads from the same file descriptor will  result  in  com-
-              pleting  the  string.  When a complete string has been read, all
-              subsequent read operations will return zero bytes and a new file
-              descriptor needs to be opened to read the value again.
-
-       write(2)
-              A write operation on the file results in setting the register to
-              the value given in the string. The string  is  parsed  from  the
-              beginning  to  the first non-numeric character or the end of the
-              buffer.  Subsequent writes to the same file descriptor overwrite
-              the previous setting.
-
-
-EXAMPLES
-       /etc/fstab entry
-              none      /spu      spufs     gid=spu   0    0
-
-
-AUTHORS
-       Arnd  Bergmann  <arndb@de.ibm.com>,  Mark  Nutter <mnutter@us.ibm.com>,
-       Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
-
-SEE ALSO
-       capabilities(7), close(2), spu_create(2), spu_run(2), spufs(7)
-
-
-
-Linux                             2005-09-28                          SPUFS(2)
-
-------------------------------------------------------------------------------
-
-SPU_RUN(2)                 Linux Programmer's Manual                SPU_RUN(2)
-
-
-
-NAME
-       spu_run - execute an spu context
-
-
-SYNOPSIS
-       #include <sys/spu.h>
-
-       int spu_run(int fd, unsigned int *npc, unsigned int *event);
-
-DESCRIPTION
-       The  spu_run system call is used on PowerPC machines that implement the
-       Cell Broadband Engine Architecture in order to access Synergistic  Pro-
-       cessor  Units  (SPUs).  It  uses the fd that was returned from spu_cre-
-       ate(2) to address a specific SPU context. When the context gets  sched-
-       uled  to a physical SPU, it starts execution at the instruction pointer
-       passed in npc.
-
-       Execution of SPU code happens synchronously, meaning that spu_run  does
-       not  return  while the SPU is still running. If there is a need to exe-
-       cute SPU code in parallel with other code on either  the  main  CPU  or
-       other  SPUs,  you  need to create a new thread of execution first, e.g.
-       using the pthread_create(3) call.
-
-       When spu_run returns, the current value of the SPU instruction  pointer
-       is  written back to npc, so you can call spu_run again without updating
-       the pointers.
-
-       event can be a NULL pointer or point to an extended  status  code  that
-       gets  filled  when spu_run returns. It can be one of the following con-
-       stants:
-
-       SPE_EVENT_DMA_ALIGNMENT
-              A DMA alignment error
-
-       SPE_EVENT_SPE_DATA_SEGMENT
-              A DMA segmentation error
-
-       SPE_EVENT_SPE_DATA_STORAGE
-              A DMA storage error
-
-       If NULL is passed as the event argument, these errors will result in  a
-       signal delivered to the calling process.
-
-RETURN VALUE
-       spu_run  returns the value of the spu_status register or -1 to indicate
-       an error and set errno to one of the error  codes  listed  below.   The
-       spu_status  register  value  contains  a  bit  mask of status codes and
-       optionally a 14 bit code returned from the stop-and-signal  instruction
-       on the SPU. The bit masks for the status codes are:
-
-       0x02   SPU was stopped by stop-and-signal.
-
-       0x04   SPU was stopped by halt.
-
-       0x08   SPU is waiting for a channel.
-
-       0x10   SPU is in single-step mode.
-
-       0x20   SPU has tried to execute an invalid instruction.
-
-       0x40   SPU has tried to access an invalid channel.
-
-       0x3fff0000
-              The  bits  masked with this value contain the code returned from
-              stop-and-signal.
-
-       There are always one or more of the lower eight bits set  or  an  error
-       code is returned from spu_run.
-
-ERRORS
-       EAGAIN or EWOULDBLOCK
-              fd is in non-blocking mode and spu_run would block.
-
-       EBADF  fd is not a valid file descriptor.
-
-       EFAULT npc is not a valid pointer or status is neither NULL nor a valid
-              pointer.
-
-       EINTR  A signal occurred while spu_run was in progress.  The npc  value
-              has  been updated to the new program counter value if necessary.
-
-       EINVAL fd is not a file descriptor returned from spu_create(2).
-
-       ENOMEM Insufficient memory was available to handle a page fault result-
-              ing from an MFC direct memory access.
-
-       ENOSYS the functionality is not provided by the current system, because
-              either the hardware does not provide SPUs or the spufs module is
-              not loaded.
-
-
-NOTES
-       spu_run  is  meant  to  be  used  from  libraries that implement a more
-       abstract interface to SPUs, not to be used from  regular  applications.
-       See  http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec-
-       ommended libraries.
-
-
-CONFORMING TO
-       This call is Linux specific and only implemented by the ppc64 architec-
-       ture. Programs using this system call are not portable.
-
-
-BUGS
-       The code does not yet fully implement all features lined out here.
-
-
-AUTHOR
-       Arnd Bergmann <arndb@de.ibm.com>
-
-SEE ALSO
-       capabilities(7), close(2), spu_create(2), spufs(7)
-
-
-
-Linux                             2005-09-28                        SPU_RUN(2)
-
-------------------------------------------------------------------------------
-
-SPU_CREATE(2)              Linux Programmer's Manual             SPU_CREATE(2)
-
-
-
-NAME
-       spu_create - create a new spu context
-
-
-SYNOPSIS
-       #include <sys/types.h>
-       #include <sys/spu.h>
-
-       int spu_create(const char *pathname, int flags, mode_t mode);
-
-DESCRIPTION
-       The  spu_create  system call is used on PowerPC machines that implement
-       the Cell Broadband Engine Architecture in order to  access  Synergistic
-       Processor  Units (SPUs). It creates a new logical context for an SPU in
-       pathname and returns a handle to associated  with  it.   pathname  must
-       point  to  a  non-existing directory in the mount point of the SPU file
-       system (spufs).  When spu_create is successful, a directory  gets  cre-
-       ated on pathname and it is populated with files.
-
-       The  returned  file  handle can only be passed to spu_run(2) or closed,
-       other operations are not defined on it. When it is closed, all  associ-
-       ated  directory entries in spufs are removed. When the last file handle
-       pointing either inside  of  the  context  directory  or  to  this  file
-       descriptor is closed, the logical SPU context is destroyed.
-
-       The  parameter flags can be zero or any bitwise or'd combination of the
-       following constants:
-
-       SPU_RAWIO
-              Allow mapping of some of the hardware registers of the SPU  into
-              user space. This flag requires the CAP_SYS_RAWIO capability, see
-              capabilities(7).
-
-       The mode parameter specifies the permissions used for creating the  new
-       directory  in  spufs.   mode is modified with the user's umask(2) value
-       and then used for both the directory and the files contained in it. The
-       file permissions mask out some more bits of mode because they typically
-       support only read or write access. See stat(2) for a full list  of  the
-       possible mode values.
-
-
-RETURN VALUE
-       spu_create  returns a new file descriptor. It may return -1 to indicate
-       an error condition and set errno to  one  of  the  error  codes  listed
-       below.
-
-
-ERRORS
-       EACCES
-              The  current  user does not have write access on the spufs mount
-              point.
-
-       EEXIST An SPU context already exists at the given path name.
-
-       EFAULT pathname is not a valid string pointer in  the  current  address
-              space.
-
-       EINVAL pathname is not a directory in the spufs mount point.
-
-       ELOOP  Too many symlinks were found while resolving pathname.
-
-       EMFILE The process has reached its maximum open file limit.
-
-       ENAMETOOLONG
-              pathname was too long.
-
-       ENFILE The system has reached the global open file limit.
-
-       ENOENT Part of pathname could not be resolved.
-
-       ENOMEM The kernel could not allocate all resources required.
-
-       ENOSPC There  are  not  enough  SPU resources available to create a new
-              context or the user specific limit for the number  of  SPU  con-
-              texts has been reached.
-
-       ENOSYS the functionality is not provided by the current system, because
-              either the hardware does not provide SPUs or the spufs module is
-              not loaded.
-
-       ENOTDIR
-              A part of pathname is not a directory.
-
-
-
-NOTES
-       spu_create  is  meant  to  be used from libraries that implement a more
-       abstract interface to SPUs, not to be used from  regular  applications.
-       See  http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec-
-       ommended libraries.
-
-
-FILES
-       pathname must point to a location beneath the mount point of spufs.  By
-       convention, it gets mounted in /spu.
-
-
-CONFORMING TO
-       This call is Linux specific and only implemented by the ppc64 architec-
-       ture. Programs using this system call are not portable.
-
-
-BUGS
-       The code does not yet fully implement all features lined out here.
-
-
-AUTHOR
-       Arnd Bergmann <arndb@de.ibm.com>
-
-SEE ALSO
-       capabilities(7), close(2), spu_run(2), spufs(7)
-
-
-
-Linux                             2005-09-28                     SPU_CREATE(2)
diff --git a/Documentation/filesystems/spufs/spu_create.txt b/Documentation/filesystems/spufs/spu_create.txt
new file mode 100644
index 000000000000..8ede5a35340f
--- /dev/null
+++ b/Documentation/filesystems/spufs/spu_create.txt
@@ -0,0 +1,119 @@
+SPU_CREATE(2)              Linux Programmer's Manual             SPU_CREATE(2)
+
+
+
+NAME
+       spu_create - create a new spu context
+
+
+SYNOPSIS
+       #include <sys/types.h>
+       #include <sys/spu.h>
+
+       int spu_create(const char *pathname, int flags, mode_t mode);
+
+DESCRIPTION
+       The  spu_create  system call is used on PowerPC machines that implement
+       the Cell Broadband Engine Architecture in order to  access  Synergistic
+       Processor  Units (SPUs). It creates a new logical context for an SPU in
+       pathname and returns a handle to associated  with  it.   pathname  must
+       point  to  a  non-existing directory in the mount point of the SPU file
+       system (spufs).  When spu_create is successful, a directory  gets  cre-
+       ated on pathname and it is populated with files.
+
+       The  returned  file  handle can only be passed to spu_run(2) or closed,
+       other operations are not defined on it. When it is closed, all  associ-
+       ated  directory entries in spufs are removed. When the last file handle
+       pointing either inside  of  the  context  directory  or  to  this  file
+       descriptor is closed, the logical SPU context is destroyed.
+
+       The  parameter flags can be zero or any bitwise or'd combination of the
+       following constants:
+
+       SPU_RAWIO
+              Allow mapping of some of the hardware registers of the SPU  into
+              user space. This flag requires the CAP_SYS_RAWIO capability, see
+              capabilities(7).
+
+       The mode parameter specifies the permissions used for creating the  new
+       directory  in  spufs.   mode is modified with the user's umask(2) value
+       and then used for both the directory and the files contained in it. The
+       file permissions mask out some more bits of mode because they typically
+       support only read or write access. See stat(2) for a full list  of  the
+       possible mode values.
+
+
+RETURN VALUE
+       spu_create  returns a new file descriptor. It may return -1 to indicate
+       an error condition and set errno to  one  of  the  error  codes  listed
+       below.
+
+
+ERRORS
+       EACCES
+              The  current  user does not have write access on the spufs mount
+              point.
+
+       EEXIST An SPU context already exists at the given path name.
+
+       EFAULT pathname is not a valid string pointer in  the  current  address
+              space.
+
+       EINVAL pathname is not a directory in the spufs mount point.
+
+       ELOOP  Too many symlinks were found while resolving pathname.
+
+       EMFILE The process has reached its maximum open file limit.
+
+       ENAMETOOLONG
+              pathname was too long.
+
+       ENFILE The system has reached the global open file limit.
+
+       ENOENT Part of pathname could not be resolved.
+
+       ENOMEM The kernel could not allocate all resources required.
+
+       ENOSPC There  are  not  enough  SPU resources available to create a new
+              context or the user specific limit for the number  of  SPU  con-
+              texts has been reached.
+
+       ENOSYS the functionality is not provided by the current system, because
+              either the hardware does not provide SPUs or the spufs module is
+              not loaded.
+
+       ENOTDIR
+              A part of pathname is not a directory.
+
+
+
+NOTES
+       spu_create  is  meant  to  be used from libraries that implement a more
+       abstract interface to SPUs, not to be used from  regular  applications.
+       See  http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec-
+       ommended libraries.
+
+
+FILES
+       pathname must point to a location beneath the mount point of spufs.  By
+       convention, it gets mounted in /spu.
+
+
+CONFORMING TO
+       This call is Linux specific and only implemented by the ppc64 architec-
+       ture. Programs using this system call are not portable.
+
+
+BUGS
+       The code does not yet fully implement all features lined out here.
+
+
+AUTHOR
+       Arnd Bergmann <arndb@de.ibm.com>
+
+SEE ALSO
+       capabilities(7), close(2), spu_run(2), spufs(7)
+
+
+
+Linux                             2005-09-28                     SPU_CREATE(2)
diff --git a/Documentation/filesystems/spufs/spu_run.txt b/Documentation/filesystems/spufs/spu_run.txt
new file mode 100644
index 000000000000..d5c6a00d0f97
--- /dev/null
+++ b/Documentation/filesystems/spufs/spu_run.txt
@@ -0,0 +1,121 @@
+SPU_RUN(2)                 Linux Programmer's Manual                SPU_RUN(2)
+
+
+
+NAME
+       spu_run - execute an spu context
+
+
+SYNOPSIS
+       #include <sys/spu.h>
+
+       int spu_run(int fd, unsigned int *npc, unsigned int *event);
+
+DESCRIPTION
+       The  spu_run system call is used on PowerPC machines that implement the
+       Cell Broadband Engine Architecture in order to access Synergistic  Pro-
+       cessor  Units  (SPUs).  It  uses the fd that was returned from spu_cre-
+       ate(2) to address a specific SPU context. When the context gets  sched-
+       uled  to a physical SPU, it starts execution at the instruction pointer
+       passed in npc.
+
+       Execution of SPU code happens synchronously, meaning that spu_run  does
+       not  return  while the SPU is still running. If there is a need to exe-
+       cute SPU code in parallel with other code on either  the  main  CPU  or
+       other  SPUs,  you  need to create a new thread of execution first, e.g.
+       using the pthread_create(3) call.
+
+       When spu_run returns, the current value of the SPU instruction  pointer
+       is  written back to npc, so you can call spu_run again without updating
+       the pointers.
+
+       event can be a NULL pointer or point to an extended  status  code  that
+       gets  filled  when spu_run returns. It can be one of the following con-
+       stants:
+
+       SPE_EVENT_DMA_ALIGNMENT
+              A DMA alignment error
+
+       SPE_EVENT_SPE_DATA_SEGMENT
+              A DMA segmentation error
+
+       SPE_EVENT_SPE_DATA_STORAGE
+              A DMA storage error
+
+       If NULL is passed as the event argument, these errors will result in  a
+       signal delivered to the calling process.
+
+RETURN VALUE
+       spu_run  returns the value of the spu_status register or -1 to indicate
+       an error and set errno to one of the error  codes  listed  below.   The
+       spu_status  register  value  contains  a  bit  mask of status codes and
+       optionally a 14 bit code returned from the stop-and-signal  instruction
+       on the SPU. The bit masks for the status codes are:
+
+       0x02   SPU was stopped by stop-and-signal.
+
+       0x04   SPU was stopped by halt.
+
+       0x08   SPU is waiting for a channel.
+
+       0x10   SPU is in single-step mode.
+
+       0x20   SPU has tried to execute an invalid instruction.
+
+       0x40   SPU has tried to access an invalid channel.
+
+       0x3fff0000
+              The  bits  masked with this value contain the code returned from
+              stop-and-signal.
+
+       There are always one or more of the lower eight bits set  or  an  error
+       code is returned from spu_run.
+
+ERRORS
+       EAGAIN or EWOULDBLOCK
+              fd is in non-blocking mode and spu_run would block.
+
+       EBADF  fd is not a valid file descriptor.
+
+       EFAULT npc is not a valid pointer or status is neither NULL nor a valid
+              pointer.
+
+       EINTR  A signal occurred while spu_run was in progress.  The npc  value
+              has  been updated to the new program counter value if necessary.
+
+       EINVAL fd is not a file descriptor returned from spu_create(2).
+
+       ENOMEM Insufficient memory was available to handle a page fault result-
+              ing from an MFC direct memory access.
+
+       ENOSYS the functionality is not provided by the current system, because
+              either the hardware does not provide SPUs or the spufs module is
+              not loaded.
+
+
+NOTES
+       spu_run  is  meant  to  be  used  from  libraries that implement a more
+       abstract interface to SPUs, not to be used from  regular  applications.
+       See  http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec-
+       ommended libraries.
+
+
+CONFORMING TO
+       This call is Linux specific and only implemented by the ppc64 architec-
+       ture. Programs using this system call are not portable.
+
+
+BUGS
+       The code does not yet fully implement all features lined out here.
+
+
+AUTHOR
+       Arnd Bergmann <arndb@de.ibm.com>
+
+SEE ALSO
+       capabilities(7), close(2), spu_create(2), spufs(7)
+
+
+
+
+Linux                             2005-09-28                        SPU_RUN(2)
diff --git a/Documentation/filesystems/spufs/spufs.txt b/Documentation/filesystems/spufs/spufs.txt
new file mode 100644
index 000000000000..caf36aaae804
--- /dev/null
+++ b/Documentation/filesystems/spufs/spufs.txt
@@ -0,0 +1,276 @@
+SPUFS(2)                   Linux Programmer's Manual                  SPUFS(2)
+
+
+
+NAME
+       spufs - the SPU file system
+
+
+DESCRIPTION
+       The SPU file system is used on PowerPC machines that implement the Cell
+       Broadband Engine Architecture in order to access Synergistic  Processor
+       Units (SPUs).
+
+       The file system provides a name space similar to posix shared memory or
+       message queues. Users that have write permissions on  the  file  system
+       can use spu_create(2) to establish SPU contexts in the spufs root.
+
+       Every SPU context is represented by a directory containing a predefined
+       set of files. These files can be used for manipulating the state of the
+       logical SPU. Users can change permissions on those files, but not actu-
+       ally add or remove files.
+
+
+MOUNT OPTIONS
+       uid=<uid>
+              set the user owning the mount point, the default is 0 (root).
+
+       gid=<gid>
+              set the group owning the mount point, the default is 0 (root).
+
+
+FILES
+       The files in spufs mostly follow the standard behavior for regular sys-
+       tem  calls like read(2) or write(2), but often support only a subset of
+       the operations supported on regular file systems. This list details the
+       supported  operations  and  the  deviations  from  the behaviour in the
+       respective man pages.
+
+       All files that support the read(2) operation also support readv(2)  and
+       all  files  that support the write(2) operation also support writev(2).
+       All files support the access(2) and stat(2) family of  operations,  but
+       only  the  st_mode,  st_nlink,  st_uid and st_gid fields of struct stat
+       contain reliable information.
+
+       All files support the chmod(2)/fchmod(2) and chown(2)/fchown(2)  opera-
+       tions,  but  will  not be able to grant permissions that contradict the
+       possible operations, e.g. read access on the wbox file.
+
+       The current set of files is:
+
+
+   /mem
+       the contents of the local storage memory  of  the  SPU.   This  can  be
+       accessed  like  a regular shared memory file and contains both code and
+       data in the address space of the SPU.  The possible  operations  on  an
+       open mem file are:
+
+       read(2), pread(2), write(2), pwrite(2), lseek(2)
+              These  operate  as  documented, with the exception that seek(2),
+              write(2) and pwrite(2) are not supported beyond the end  of  the
+              file. The file size is the size of the local storage of the SPU,
+              which normally is 256 kilobytes.
+
+       mmap(2)
+              Mapping mem into the process address space gives access  to  the
+              SPU  local  storage  within  the  process  address  space.  Only
+              MAP_SHARED mappings are allowed.
+
+
+   /mbox
+       The first SPU to CPU communication mailbox. This file is read-only  and
+       can  be  read  in  units of 32 bits.  The file can only be used in non-
+       blocking mode and it even poll() will not block on  it.   The  possible
+       operations on an open mbox file are:
+
+       read(2)
+              If  a  count smaller than four is requested, read returns -1 and
+              sets errno to EINVAL.  If there is no data available in the mail
+              box,  the  return  value  is set to -1 and errno becomes EAGAIN.
+              When data has been read successfully, four bytes are  placed  in
+              the data buffer and the value four is returned.
+
+
+   /ibox
+       The  second  SPU  to CPU communication mailbox. This file is similar to
+       the first mailbox file, but can be read in blocking I/O mode,  and  the
+       poll  family of system calls can be used to wait for it.  The  possible
+       operations on an open ibox file are:
+
+       read(2)
+              If a count smaller than four is requested, read returns  -1  and
+              sets errno to EINVAL.  If there is no data available in the mail
+              box and the file descriptor has been opened with O_NONBLOCK, the
+              return value is set to -1 and errno becomes EAGAIN.
+
+              If  there  is  no  data  available  in the mail box and the file
+              descriptor has been opened without  O_NONBLOCK,  the  call  will
+              block  until  the  SPU  writes to its interrupt mailbox channel.
+              When data has been read successfully, four bytes are  placed  in
+              the data buffer and the value four is returned.
+
+       poll(2)
+              Poll  on  the  ibox  file returns (POLLIN | POLLRDNORM) whenever
+              data is available for reading.
+
+
+   /wbox
+       The CPU to SPU communation mailbox. It is write-only and can be written
+       in  units  of  32  bits. If the mailbox is full, write() will block and
+       poll can be used to wait for it becoming  empty  again.   The  possible
+       operations  on  an open wbox file are: write(2) If a count smaller than
+       four is requested, write returns -1 and sets errno to EINVAL.  If there
+       is  no space available in the mail box and the file descriptor has been
+       opened with O_NONBLOCK, the return value is set to -1 and errno becomes
+       EAGAIN.
+
+       If  there is no space available in the mail box and the file descriptor
+       has been opened without O_NONBLOCK, the call will block until  the  SPU
+       reads  from  its PPE mailbox channel.  When data has been read success-
+       fully, four bytes are placed in the data buffer and the value  four  is
+       returned.
+
+       poll(2)
+              Poll  on  the  ibox file returns (POLLOUT | POLLWRNORM) whenever
+              space is available for writing.
+
+
+   /mbox_stat
+   /ibox_stat
+   /wbox_stat
+       Read-only files that contain the length of the current queue, i.e.  how
+       many  words  can  be  read  from  mbox or ibox or how many words can be
+       written to wbox without blocking.  The files can be read only in 4-byte
+       units  and  return  a  big-endian  binary integer number.  The possible
+       operations on an open *box_stat file are:
+
+       read(2)
+              If a count smaller than four is requested, read returns  -1  and
+              sets errno to EINVAL.  Otherwise, a four byte value is placed in
+              the data buffer, containing the number of elements that  can  be
+              read  from  (for  mbox_stat  and  ibox_stat)  or written to (for
+              wbox_stat) the respective mail box without blocking or resulting
+              in EAGAIN.
+
+
+   /npc
+   /decr
+   /decr_status
+   /spu_tag_mask
+   /event_mask
+   /srr0
+       Internal  registers  of  the SPU. The representation is an ASCII string
+       with the numeric value of the next instruction to  be  executed.  These
+       can  be  used in read/write mode for debugging, but normal operation of
+       programs should not rely on them because access to any of  them  except
+       npc requires an SPU context save and is therefore very inefficient.
+
+       The contents of these files are:
+
+       npc                 Next Program Counter
+
+       decr                SPU Decrementer
+
+       decr_status         Decrementer Status
+
+       spu_tag_mask        MFC tag mask for SPU DMA
+
+       event_mask          Event mask for SPU interrupts
+
+       srr0                Interrupt Return address register
+
+
+       The   possible   operations   on   an   open  npc,  decr,  decr_status,
+       spu_tag_mask, event_mask or srr0 file are:
+
+       read(2)
+              When the count supplied to the read call  is  shorter  than  the
+              required  length for the pointer value plus a newline character,
+              subsequent reads from the same file descriptor  will  result  in
+              completing  the string, regardless of changes to the register by
+              a running SPU task.  When a complete string has been  read,  all
+              subsequent read operations will return zero bytes and a new file
+              descriptor needs to be opened to read the value again.
+
+       write(2)
+              A write operation on the file results in setting the register to
+              the  value  given  in  the string. The string is parsed from the
+              beginning to the first non-numeric character or the end  of  the
+              buffer.  Subsequent writes to the same file descriptor overwrite
+              the previous setting.
+
+
+   /fpcr
+       This file gives access to the Floating Point Status and Control  Regis-
+       ter as a four byte long file. The operations on the fpcr file are:
+
+       read(2)
+              If  a  count smaller than four is requested, read returns -1 and
+              sets errno to EINVAL.  Otherwise, a four byte value is placed in
+              the data buffer, containing the current value of the fpcr regis-
+              ter.
+
+       write(2)
+              If a count smaller than four is requested, write returns -1  and
+              sets  errno  to  EINVAL.  Otherwise, a four byte value is copied
+              from the data buffer, updating the value of the fpcr register.
+
+
+   /signal1
+   /signal2
+       The two signal notification channels of an SPU.  These  are  read-write
+       files  that  operate  on  a 32 bit word.  Writing to one of these files
+       triggers an interrupt on the SPU.  The  value  written  to  the  signal
+       files can be read from the SPU through a channel read or from host user
+       space through the file.  After the value has been read by the  SPU,  it
+       is  reset  to zero.  The possible operations on an open signal1 or sig-
+       nal2 file are:
+
+       read(2)
+              If a count smaller than four is requested, read returns  -1  and
+              sets errno to EINVAL.  Otherwise, a four byte value is placed in
+              the data buffer, containing the current value of  the  specified
+              signal notification register.
+
+       write(2)
+              If  a count smaller than four is requested, write returns -1 and
+              sets errno to EINVAL.  Otherwise, a four byte  value  is  copied
+              from the data buffer, updating the value of the specified signal
+              notification register.  The signal  notification  register  will
+              either be replaced with the input data or will be updated to the
+              bitwise OR or the old value and the input data, depending on the
+              contents  of  the  signal1_type,  or  signal2_type respectively,
+              file.
+
+
+   /signal1_type
+   /signal2_type
+       These two files change the behavior of the signal1 and signal2  notifi-
+       cation  files.  The  contain  a numerical ASCII string which is read as
+       either "1" or "0".  In mode 0 (overwrite), the  hardware  replaces  the
+       contents of the signal channel with the data that is written to it.  in
+       mode 1 (logical OR), the hardware accumulates the bits that are  subse-
+       quently written to it.  The possible operations on an open signal1_type
+       or signal2_type file are:
+
+       read(2)
+              When the count supplied to the read call  is  shorter  than  the
+              required  length  for the digit plus a newline character, subse-
+              quent reads from the same file descriptor will  result  in  com-
+              pleting  the  string.  When a complete string has been read, all
+              subsequent read operations will return zero bytes and a new file
+              descriptor needs to be opened to read the value again.
+
+       write(2)
+              A write operation on the file results in setting the register to
+              the value given in the string. The string  is  parsed  from  the
+              beginning  to  the first non-numeric character or the end of the
+              buffer.  Subsequent writes to the same file descriptor overwrite
+              the previous setting.
+
+
+EXAMPLES
+       /etc/fstab entry
+              none      /spu      spufs     gid=spu   0    0
+
+
+AUTHORS
+       Arnd  Bergmann  <arndb@de.ibm.com>,  Mark  Nutter <mnutter@us.ibm.com>,
+       Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
+
+SEE ALSO
+       capabilities(7), close(2), spu_create(2), spu_run(2), spufs(7)
+
+
+
+Linux                             2005-09-28                          SPUFS(2)
-- 
cgit v1.2.3


From dc3f043ff0e44862a237d87f37ca04694d884049 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:14 +0200
Subject: docs: filesystems: convert spufs/spu_create.txt to ReST

This file is at groff output format. Manually convert it to
ReST format, trying to preserve a similar output after parsed.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/4d42e97d5560a79bd5dd443c592be04f9ae9a757.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/index.rst            |   1 +
 Documentation/filesystems/spufs/index.rst      |  11 +++
 Documentation/filesystems/spufs/spu_create.rst | 131 +++++++++++++++++++++++++
 Documentation/filesystems/spufs/spu_create.txt | 119 ----------------------
 4 files changed, 143 insertions(+), 119 deletions(-)
 create mode 100644 Documentation/filesystems/spufs/index.rst
 create mode 100644 Documentation/filesystems/spufs/spu_create.rst
 delete mode 100644 Documentation/filesystems/spufs/spu_create.txt

diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index c658ddee40fa..5fb2b2166204 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -105,6 +105,7 @@ Documentation for filesystem implementations.
    ramfs-rootfs-initramfs
    relay
    romfs
+   spufs/index
    squashfs
    sysfs
    sysv-fs
diff --git a/Documentation/filesystems/spufs/index.rst b/Documentation/filesystems/spufs/index.rst
new file mode 100644
index 000000000000..39553c6ebefd
--- /dev/null
+++ b/Documentation/filesystems/spufs/index.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+SPU Filesystem
+==============
+
+
+.. toctree::
+   :maxdepth: 1
+
+   spu_create
diff --git a/Documentation/filesystems/spufs/spu_create.rst b/Documentation/filesystems/spufs/spu_create.rst
new file mode 100644
index 000000000000..83108c099696
--- /dev/null
+++ b/Documentation/filesystems/spufs/spu_create.rst
@@ -0,0 +1,131 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========
+spu_create
+==========
+
+Name
+====
+       spu_create - create a new spu context
+
+
+Synopsis
+========
+
+       ::
+
+         #include <sys/types.h>
+         #include <sys/spu.h>
+
+         int spu_create(const char *pathname, int flags, mode_t mode);
+
+Description
+===========
+       The  spu_create  system call is used on PowerPC machines that implement
+       the Cell Broadband Engine Architecture in order to  access  Synergistic
+       Processor  Units (SPUs). It creates a new logical context for an SPU in
+       pathname and returns a handle to associated  with  it.   pathname  must
+       point  to  a  non-existing directory in the mount point of the SPU file
+       system (spufs).  When spu_create is successful, a directory  gets  cre-
+       ated on pathname and it is populated with files.
+
+       The  returned  file  handle can only be passed to spu_run(2) or closed,
+       other operations are not defined on it. When it is closed, all  associ-
+       ated  directory entries in spufs are removed. When the last file handle
+       pointing either inside  of  the  context  directory  or  to  this  file
+       descriptor is closed, the logical SPU context is destroyed.
+
+       The  parameter flags can be zero or any bitwise or'd combination of the
+       following constants:
+
+       SPU_RAWIO
+              Allow mapping of some of the hardware registers of the SPU  into
+              user space. This flag requires the CAP_SYS_RAWIO capability, see
+              capabilities(7).
+
+       The mode parameter specifies the permissions used for creating the  new
+       directory  in  spufs.   mode is modified with the user's umask(2) value
+       and then used for both the directory and the files contained in it. The
+       file permissions mask out some more bits of mode because they typically
+       support only read or write access. See stat(2) for a full list  of  the
+       possible mode values.
+
+
+Return Value
+============
+       spu_create  returns a new file descriptor. It may return -1 to indicate
+       an error condition and set errno to  one  of  the  error  codes  listed
+       below.
+
+
+Errors
+======
+       EACCES
+              The  current  user does not have write access on the spufs mount
+              point.
+
+       EEXIST An SPU context already exists at the given path name.
+
+       EFAULT pathname is not a valid string pointer in  the  current  address
+              space.
+
+       EINVAL pathname is not a directory in the spufs mount point.
+
+       ELOOP  Too many symlinks were found while resolving pathname.
+
+       EMFILE The process has reached its maximum open file limit.
+
+       ENAMETOOLONG
+              pathname was too long.
+
+       ENFILE The system has reached the global open file limit.
+
+       ENOENT Part of pathname could not be resolved.
+
+       ENOMEM The kernel could not allocate all resources required.
+
+       ENOSPC There  are  not  enough  SPU resources available to create a new
+              context or the user specific limit for the number  of  SPU  con-
+              texts has been reached.
+
+       ENOSYS the functionality is not provided by the current system, because
+              either the hardware does not provide SPUs or the spufs module is
+              not loaded.
+
+       ENOTDIR
+              A part of pathname is not a directory.
+
+
+
+Notes
+=====
+       spu_create  is  meant  to  be used from libraries that implement a more
+       abstract interface to SPUs, not to be used from  regular  applications.
+       See  http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec-
+       ommended libraries.
+
+
+Files
+=====
+       pathname must point to a location beneath the mount point of spufs.  By
+       convention, it gets mounted in /spu.
+
+
+Conforming to
+=============
+       This call is Linux specific and only implemented by the ppc64 architec-
+       ture. Programs using this system call are not portable.
+
+
+Bugs
+====
+       The code does not yet fully implement all features lined out here.
+
+
+Author
+======
+       Arnd Bergmann <arndb@de.ibm.com>
+
+See Also
+========
+       capabilities(7), close(2), spu_run(2), spufs(7)
diff --git a/Documentation/filesystems/spufs/spu_create.txt b/Documentation/filesystems/spufs/spu_create.txt
deleted file mode 100644
index 8ede5a35340f..000000000000
--- a/Documentation/filesystems/spufs/spu_create.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-SPU_CREATE(2)              Linux Programmer's Manual             SPU_CREATE(2)
-
-
-
-NAME
-       spu_create - create a new spu context
-
-
-SYNOPSIS
-       #include <sys/types.h>
-       #include <sys/spu.h>
-
-       int spu_create(const char *pathname, int flags, mode_t mode);
-
-DESCRIPTION
-       The  spu_create  system call is used on PowerPC machines that implement
-       the Cell Broadband Engine Architecture in order to  access  Synergistic
-       Processor  Units (SPUs). It creates a new logical context for an SPU in
-       pathname and returns a handle to associated  with  it.   pathname  must
-       point  to  a  non-existing directory in the mount point of the SPU file
-       system (spufs).  When spu_create is successful, a directory  gets  cre-
-       ated on pathname and it is populated with files.
-
-       The  returned  file  handle can only be passed to spu_run(2) or closed,
-       other operations are not defined on it. When it is closed, all  associ-
-       ated  directory entries in spufs are removed. When the last file handle
-       pointing either inside  of  the  context  directory  or  to  this  file
-       descriptor is closed, the logical SPU context is destroyed.
-
-       The  parameter flags can be zero or any bitwise or'd combination of the
-       following constants:
-
-       SPU_RAWIO
-              Allow mapping of some of the hardware registers of the SPU  into
-              user space. This flag requires the CAP_SYS_RAWIO capability, see
-              capabilities(7).
-
-       The mode parameter specifies the permissions used for creating the  new
-       directory  in  spufs.   mode is modified with the user's umask(2) value
-       and then used for both the directory and the files contained in it. The
-       file permissions mask out some more bits of mode because they typically
-       support only read or write access. See stat(2) for a full list  of  the
-       possible mode values.
-
-
-RETURN VALUE
-       spu_create  returns a new file descriptor. It may return -1 to indicate
-       an error condition and set errno to  one  of  the  error  codes  listed
-       below.
-
-
-ERRORS
-       EACCES
-              The  current  user does not have write access on the spufs mount
-              point.
-
-       EEXIST An SPU context already exists at the given path name.
-
-       EFAULT pathname is not a valid string pointer in  the  current  address
-              space.
-
-       EINVAL pathname is not a directory in the spufs mount point.
-
-       ELOOP  Too many symlinks were found while resolving pathname.
-
-       EMFILE The process has reached its maximum open file limit.
-
-       ENAMETOOLONG
-              pathname was too long.
-
-       ENFILE The system has reached the global open file limit.
-
-       ENOENT Part of pathname could not be resolved.
-
-       ENOMEM The kernel could not allocate all resources required.
-
-       ENOSPC There  are  not  enough  SPU resources available to create a new
-              context or the user specific limit for the number  of  SPU  con-
-              texts has been reached.
-
-       ENOSYS the functionality is not provided by the current system, because
-              either the hardware does not provide SPUs or the spufs module is
-              not loaded.
-
-       ENOTDIR
-              A part of pathname is not a directory.
-
-
-
-NOTES
-       spu_create  is  meant  to  be used from libraries that implement a more
-       abstract interface to SPUs, not to be used from  regular  applications.
-       See  http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec-
-       ommended libraries.
-
-
-FILES
-       pathname must point to a location beneath the mount point of spufs.  By
-       convention, it gets mounted in /spu.
-
-
-CONFORMING TO
-       This call is Linux specific and only implemented by the ppc64 architec-
-       ture. Programs using this system call are not portable.
-
-
-BUGS
-       The code does not yet fully implement all features lined out here.
-
-
-AUTHOR
-       Arnd Bergmann <arndb@de.ibm.com>
-
-SEE ALSO
-       capabilities(7), close(2), spu_run(2), spufs(7)
-
-
-
-Linux                             2005-09-28                     SPU_CREATE(2)
-- 
cgit v1.2.3


From 299cd2747c61009f32943217c79f3c89728eb2ac Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:15 +0200
Subject: docs: filesystems: convert spufs/spufs.txt to ReST

This file is at groff output format. Manually convert it to
ReST format, trying to preserve a similar output after parsed.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/9ca05fad12390931bc7da0fa2502d1a450a4b87f.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/spufs/index.rst |   1 +
 Documentation/filesystems/spufs/spufs.rst | 273 +++++++++++++++++++++++++++++
 Documentation/filesystems/spufs/spufs.txt | 276 ------------------------------
 MAINTAINERS                               |   2 +-
 4 files changed, 275 insertions(+), 277 deletions(-)
 create mode 100644 Documentation/filesystems/spufs/spufs.rst
 delete mode 100644 Documentation/filesystems/spufs/spufs.txt

diff --git a/Documentation/filesystems/spufs/index.rst b/Documentation/filesystems/spufs/index.rst
index 39553c6ebefd..939cf59a7d9e 100644
--- a/Documentation/filesystems/spufs/index.rst
+++ b/Documentation/filesystems/spufs/index.rst
@@ -8,4 +8,5 @@ SPU Filesystem
 .. toctree::
    :maxdepth: 1
 
+   spufs
    spu_create
diff --git a/Documentation/filesystems/spufs/spufs.rst b/Documentation/filesystems/spufs/spufs.rst
new file mode 100644
index 000000000000..8a42859bb100
--- /dev/null
+++ b/Documentation/filesystems/spufs/spufs.rst
@@ -0,0 +1,273 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====
+spufs
+=====
+
+Name
+====
+
+       spufs - the SPU file system
+
+
+Description
+===========
+
+       The SPU file system is used on PowerPC machines that implement the Cell
+       Broadband Engine Architecture in order to access Synergistic  Processor
+       Units (SPUs).
+
+       The file system provides a name space similar to posix shared memory or
+       message queues. Users that have write permissions on  the  file  system
+       can use spu_create(2) to establish SPU contexts in the spufs root.
+
+       Every SPU context is represented by a directory containing a predefined
+       set of files. These files can be used for manipulating the state of the
+       logical SPU. Users can change permissions on those files, but not actu-
+       ally add or remove files.
+
+
+Mount Options
+=============
+
+       uid=<uid>
+              set the user owning the mount point, the default is 0 (root).
+
+       gid=<gid>
+              set the group owning the mount point, the default is 0 (root).
+
+
+Files
+=====
+
+       The files in spufs mostly follow the standard behavior for regular sys-
+       tem  calls like read(2) or write(2), but often support only a subset of
+       the operations supported on regular file systems. This list details the
+       supported  operations  and  the  deviations  from  the behaviour in the
+       respective man pages.
+
+       All files that support the read(2) operation also support readv(2)  and
+       all  files  that support the write(2) operation also support writev(2).
+       All files support the access(2) and stat(2) family of  operations,  but
+       only  the  st_mode,  st_nlink,  st_uid and st_gid fields of struct stat
+       contain reliable information.
+
+       All files support the chmod(2)/fchmod(2) and chown(2)/fchown(2)  opera-
+       tions,  but  will  not be able to grant permissions that contradict the
+       possible operations, e.g. read access on the wbox file.
+
+       The current set of files is:
+
+
+   /mem
+       the contents of the local storage memory  of  the  SPU.   This  can  be
+       accessed  like  a regular shared memory file and contains both code and
+       data in the address space of the SPU.  The possible  operations  on  an
+       open mem file are:
+
+       read(2), pread(2), write(2), pwrite(2), lseek(2)
+              These  operate  as  documented, with the exception that seek(2),
+              write(2) and pwrite(2) are not supported beyond the end  of  the
+              file. The file size is the size of the local storage of the SPU,
+              which normally is 256 kilobytes.
+
+       mmap(2)
+              Mapping mem into the process address space gives access  to  the
+              SPU  local  storage  within  the  process  address  space.  Only
+              MAP_SHARED mappings are allowed.
+
+
+   /mbox
+       The first SPU to CPU communication mailbox. This file is read-only  and
+       can  be  read  in  units of 32 bits.  The file can only be used in non-
+       blocking mode and it even poll() will not block on  it.   The  possible
+       operations on an open mbox file are:
+
+       read(2)
+              If  a  count smaller than four is requested, read returns -1 and
+              sets errno to EINVAL.  If there is no data available in the mail
+              box,  the  return  value  is set to -1 and errno becomes EAGAIN.
+              When data has been read successfully, four bytes are  placed  in
+              the data buffer and the value four is returned.
+
+
+   /ibox
+       The  second  SPU  to CPU communication mailbox. This file is similar to
+       the first mailbox file, but can be read in blocking I/O mode,  and  the
+       poll  family of system calls can be used to wait for it.  The  possible
+       operations on an open ibox file are:
+
+       read(2)
+              If a count smaller than four is requested, read returns  -1  and
+              sets errno to EINVAL.  If there is no data available in the mail
+              box and the file descriptor has been opened with O_NONBLOCK, the
+              return value is set to -1 and errno becomes EAGAIN.
+
+              If  there  is  no  data  available  in the mail box and the file
+              descriptor has been opened without  O_NONBLOCK,  the  call  will
+              block  until  the  SPU  writes to its interrupt mailbox channel.
+              When data has been read successfully, four bytes are  placed  in
+              the data buffer and the value four is returned.
+
+       poll(2)
+              Poll  on  the  ibox  file returns (POLLIN | POLLRDNORM) whenever
+              data is available for reading.
+
+
+   /wbox
+       The CPU to SPU communation mailbox. It is write-only and can be written
+       in  units  of  32  bits. If the mailbox is full, write() will block and
+       poll can be used to wait for it becoming  empty  again.   The  possible
+       operations  on  an open wbox file are: write(2) If a count smaller than
+       four is requested, write returns -1 and sets errno to EINVAL.  If there
+       is  no space available in the mail box and the file descriptor has been
+       opened with O_NONBLOCK, the return value is set to -1 and errno becomes
+       EAGAIN.
+
+       If  there is no space available in the mail box and the file descriptor
+       has been opened without O_NONBLOCK, the call will block until  the  SPU
+       reads  from  its PPE mailbox channel.  When data has been read success-
+       fully, four bytes are placed in the data buffer and the value  four  is
+       returned.
+
+       poll(2)
+              Poll  on  the  ibox file returns (POLLOUT | POLLWRNORM) whenever
+              space is available for writing.
+
+
+   /mbox_stat, /ibox_stat, /wbox_stat
+       Read-only files that contain the length of the current queue, i.e.  how
+       many  words  can  be  read  from  mbox or ibox or how many words can be
+       written to wbox without blocking.  The files can be read only in 4-byte
+       units  and  return  a  big-endian  binary integer number.  The possible
+       operations on an open ``*box_stat`` file are:
+
+       read(2)
+              If a count smaller than four is requested, read returns  -1  and
+              sets errno to EINVAL.  Otherwise, a four byte value is placed in
+              the data buffer, containing the number of elements that  can  be
+              read  from  (for  mbox_stat  and  ibox_stat)  or written to (for
+              wbox_stat) the respective mail box without blocking or resulting
+              in EAGAIN.
+
+
+   /npc, /decr, /decr_status, /spu_tag_mask, /event_mask, /srr0
+       Internal  registers  of  the SPU. The representation is an ASCII string
+       with the numeric value of the next instruction to  be  executed.  These
+       can  be  used in read/write mode for debugging, but normal operation of
+       programs should not rely on them because access to any of  them  except
+       npc requires an SPU context save and is therefore very inefficient.
+
+       The contents of these files are:
+
+       =================== ===================================
+       npc                 Next Program Counter
+       decr                SPU Decrementer
+       decr_status         Decrementer Status
+       spu_tag_mask        MFC tag mask for SPU DMA
+       event_mask          Event mask for SPU interrupts
+       srr0                Interrupt Return address register
+       =================== ===================================
+
+
+       The   possible   operations   on   an   open  npc,  decr,  decr_status,
+       spu_tag_mask, event_mask or srr0 file are:
+
+       read(2)
+              When the count supplied to the read call  is  shorter  than  the
+              required  length for the pointer value plus a newline character,
+              subsequent reads from the same file descriptor  will  result  in
+              completing  the string, regardless of changes to the register by
+              a running SPU task.  When a complete string has been  read,  all
+              subsequent read operations will return zero bytes and a new file
+              descriptor needs to be opened to read the value again.
+
+       write(2)
+              A write operation on the file results in setting the register to
+              the  value  given  in  the string. The string is parsed from the
+              beginning to the first non-numeric character or the end  of  the
+              buffer.  Subsequent writes to the same file descriptor overwrite
+              the previous setting.
+
+
+   /fpcr
+       This file gives access to the Floating Point Status and Control  Regis-
+       ter as a four byte long file. The operations on the fpcr file are:
+
+       read(2)
+              If  a  count smaller than four is requested, read returns -1 and
+              sets errno to EINVAL.  Otherwise, a four byte value is placed in
+              the data buffer, containing the current value of the fpcr regis-
+              ter.
+
+       write(2)
+              If a count smaller than four is requested, write returns -1  and
+              sets  errno  to  EINVAL.  Otherwise, a four byte value is copied
+              from the data buffer, updating the value of the fpcr register.
+
+
+   /signal1, /signal2
+       The two signal notification channels of an SPU.  These  are  read-write
+       files  that  operate  on  a 32 bit word.  Writing to one of these files
+       triggers an interrupt on the SPU.  The  value  written  to  the  signal
+       files can be read from the SPU through a channel read or from host user
+       space through the file.  After the value has been read by the  SPU,  it
+       is  reset  to zero.  The possible operations on an open signal1 or sig-
+       nal2 file are:
+
+       read(2)
+              If a count smaller than four is requested, read returns  -1  and
+              sets errno to EINVAL.  Otherwise, a four byte value is placed in
+              the data buffer, containing the current value of  the  specified
+              signal notification register.
+
+       write(2)
+              If  a count smaller than four is requested, write returns -1 and
+              sets errno to EINVAL.  Otherwise, a four byte  value  is  copied
+              from the data buffer, updating the value of the specified signal
+              notification register.  The signal  notification  register  will
+              either be replaced with the input data or will be updated to the
+              bitwise OR or the old value and the input data, depending on the
+              contents  of  the  signal1_type,  or  signal2_type respectively,
+              file.
+
+
+   /signal1_type, /signal2_type
+       These two files change the behavior of the signal1 and signal2  notifi-
+       cation  files.  The  contain  a numerical ASCII string which is read as
+       either "1" or "0".  In mode 0 (overwrite), the  hardware  replaces  the
+       contents of the signal channel with the data that is written to it.  in
+       mode 1 (logical OR), the hardware accumulates the bits that are  subse-
+       quently written to it.  The possible operations on an open signal1_type
+       or signal2_type file are:
+
+       read(2)
+              When the count supplied to the read call  is  shorter  than  the
+              required  length  for the digit plus a newline character, subse-
+              quent reads from the same file descriptor will  result  in  com-
+              pleting  the  string.  When a complete string has been read, all
+              subsequent read operations will return zero bytes and a new file
+              descriptor needs to be opened to read the value again.
+
+       write(2)
+              A write operation on the file results in setting the register to
+              the value given in the string. The string  is  parsed  from  the
+              beginning  to  the first non-numeric character or the end of the
+              buffer.  Subsequent writes to the same file descriptor overwrite
+              the previous setting.
+
+
+Examples
+========
+       /etc/fstab entry
+              none      /spu      spufs     gid=spu   0    0
+
+
+Authors
+=======
+       Arnd  Bergmann  <arndb@de.ibm.com>,  Mark  Nutter <mnutter@us.ibm.com>,
+       Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
+
+See Also
+========
+       capabilities(7), close(2), spu_create(2), spu_run(2), spufs(7)
diff --git a/Documentation/filesystems/spufs/spufs.txt b/Documentation/filesystems/spufs/spufs.txt
deleted file mode 100644
index caf36aaae804..000000000000
--- a/Documentation/filesystems/spufs/spufs.txt
+++ /dev/null
@@ -1,276 +0,0 @@
-SPUFS(2)                   Linux Programmer's Manual                  SPUFS(2)
-
-
-
-NAME
-       spufs - the SPU file system
-
-
-DESCRIPTION
-       The SPU file system is used on PowerPC machines that implement the Cell
-       Broadband Engine Architecture in order to access Synergistic  Processor
-       Units (SPUs).
-
-       The file system provides a name space similar to posix shared memory or
-       message queues. Users that have write permissions on  the  file  system
-       can use spu_create(2) to establish SPU contexts in the spufs root.
-
-       Every SPU context is represented by a directory containing a predefined
-       set of files. These files can be used for manipulating the state of the
-       logical SPU. Users can change permissions on those files, but not actu-
-       ally add or remove files.
-
-
-MOUNT OPTIONS
-       uid=<uid>
-              set the user owning the mount point, the default is 0 (root).
-
-       gid=<gid>
-              set the group owning the mount point, the default is 0 (root).
-
-
-FILES
-       The files in spufs mostly follow the standard behavior for regular sys-
-       tem  calls like read(2) or write(2), but often support only a subset of
-       the operations supported on regular file systems. This list details the
-       supported  operations  and  the  deviations  from  the behaviour in the
-       respective man pages.
-
-       All files that support the read(2) operation also support readv(2)  and
-       all  files  that support the write(2) operation also support writev(2).
-       All files support the access(2) and stat(2) family of  operations,  but
-       only  the  st_mode,  st_nlink,  st_uid and st_gid fields of struct stat
-       contain reliable information.
-
-       All files support the chmod(2)/fchmod(2) and chown(2)/fchown(2)  opera-
-       tions,  but  will  not be able to grant permissions that contradict the
-       possible operations, e.g. read access on the wbox file.
-
-       The current set of files is:
-
-
-   /mem
-       the contents of the local storage memory  of  the  SPU.   This  can  be
-       accessed  like  a regular shared memory file and contains both code and
-       data in the address space of the SPU.  The possible  operations  on  an
-       open mem file are:
-
-       read(2), pread(2), write(2), pwrite(2), lseek(2)
-              These  operate  as  documented, with the exception that seek(2),
-              write(2) and pwrite(2) are not supported beyond the end  of  the
-              file. The file size is the size of the local storage of the SPU,
-              which normally is 256 kilobytes.
-
-       mmap(2)
-              Mapping mem into the process address space gives access  to  the
-              SPU  local  storage  within  the  process  address  space.  Only
-              MAP_SHARED mappings are allowed.
-
-
-   /mbox
-       The first SPU to CPU communication mailbox. This file is read-only  and
-       can  be  read  in  units of 32 bits.  The file can only be used in non-
-       blocking mode and it even poll() will not block on  it.   The  possible
-       operations on an open mbox file are:
-
-       read(2)
-              If  a  count smaller than four is requested, read returns -1 and
-              sets errno to EINVAL.  If there is no data available in the mail
-              box,  the  return  value  is set to -1 and errno becomes EAGAIN.
-              When data has been read successfully, four bytes are  placed  in
-              the data buffer and the value four is returned.
-
-
-   /ibox
-       The  second  SPU  to CPU communication mailbox. This file is similar to
-       the first mailbox file, but can be read in blocking I/O mode,  and  the
-       poll  family of system calls can be used to wait for it.  The  possible
-       operations on an open ibox file are:
-
-       read(2)
-              If a count smaller than four is requested, read returns  -1  and
-              sets errno to EINVAL.  If there is no data available in the mail
-              box and the file descriptor has been opened with O_NONBLOCK, the
-              return value is set to -1 and errno becomes EAGAIN.
-
-              If  there  is  no  data  available  in the mail box and the file
-              descriptor has been opened without  O_NONBLOCK,  the  call  will
-              block  until  the  SPU  writes to its interrupt mailbox channel.
-              When data has been read successfully, four bytes are  placed  in
-              the data buffer and the value four is returned.
-
-       poll(2)
-              Poll  on  the  ibox  file returns (POLLIN | POLLRDNORM) whenever
-              data is available for reading.
-
-
-   /wbox
-       The CPU to SPU communation mailbox. It is write-only and can be written
-       in  units  of  32  bits. If the mailbox is full, write() will block and
-       poll can be used to wait for it becoming  empty  again.   The  possible
-       operations  on  an open wbox file are: write(2) If a count smaller than
-       four is requested, write returns -1 and sets errno to EINVAL.  If there
-       is  no space available in the mail box and the file descriptor has been
-       opened with O_NONBLOCK, the return value is set to -1 and errno becomes
-       EAGAIN.
-
-       If  there is no space available in the mail box and the file descriptor
-       has been opened without O_NONBLOCK, the call will block until  the  SPU
-       reads  from  its PPE mailbox channel.  When data has been read success-
-       fully, four bytes are placed in the data buffer and the value  four  is
-       returned.
-
-       poll(2)
-              Poll  on  the  ibox file returns (POLLOUT | POLLWRNORM) whenever
-              space is available for writing.
-
-
-   /mbox_stat
-   /ibox_stat
-   /wbox_stat
-       Read-only files that contain the length of the current queue, i.e.  how
-       many  words  can  be  read  from  mbox or ibox or how many words can be
-       written to wbox without blocking.  The files can be read only in 4-byte
-       units  and  return  a  big-endian  binary integer number.  The possible
-       operations on an open *box_stat file are:
-
-       read(2)
-              If a count smaller than four is requested, read returns  -1  and
-              sets errno to EINVAL.  Otherwise, a four byte value is placed in
-              the data buffer, containing the number of elements that  can  be
-              read  from  (for  mbox_stat  and  ibox_stat)  or written to (for
-              wbox_stat) the respective mail box without blocking or resulting
-              in EAGAIN.
-
-
-   /npc
-   /decr
-   /decr_status
-   /spu_tag_mask
-   /event_mask
-   /srr0
-       Internal  registers  of  the SPU. The representation is an ASCII string
-       with the numeric value of the next instruction to  be  executed.  These
-       can  be  used in read/write mode for debugging, but normal operation of
-       programs should not rely on them because access to any of  them  except
-       npc requires an SPU context save and is therefore very inefficient.
-
-       The contents of these files are:
-
-       npc                 Next Program Counter
-
-       decr                SPU Decrementer
-
-       decr_status         Decrementer Status
-
-       spu_tag_mask        MFC tag mask for SPU DMA
-
-       event_mask          Event mask for SPU interrupts
-
-       srr0                Interrupt Return address register
-
-
-       The   possible   operations   on   an   open  npc,  decr,  decr_status,
-       spu_tag_mask, event_mask or srr0 file are:
-
-       read(2)
-              When the count supplied to the read call  is  shorter  than  the
-              required  length for the pointer value plus a newline character,
-              subsequent reads from the same file descriptor  will  result  in
-              completing  the string, regardless of changes to the register by
-              a running SPU task.  When a complete string has been  read,  all
-              subsequent read operations will return zero bytes and a new file
-              descriptor needs to be opened to read the value again.
-
-       write(2)
-              A write operation on the file results in setting the register to
-              the  value  given  in  the string. The string is parsed from the
-              beginning to the first non-numeric character or the end  of  the
-              buffer.  Subsequent writes to the same file descriptor overwrite
-              the previous setting.
-
-
-   /fpcr
-       This file gives access to the Floating Point Status and Control  Regis-
-       ter as a four byte long file. The operations on the fpcr file are:
-
-       read(2)
-              If  a  count smaller than four is requested, read returns -1 and
-              sets errno to EINVAL.  Otherwise, a four byte value is placed in
-              the data buffer, containing the current value of the fpcr regis-
-              ter.
-
-       write(2)
-              If a count smaller than four is requested, write returns -1  and
-              sets  errno  to  EINVAL.  Otherwise, a four byte value is copied
-              from the data buffer, updating the value of the fpcr register.
-
-
-   /signal1
-   /signal2
-       The two signal notification channels of an SPU.  These  are  read-write
-       files  that  operate  on  a 32 bit word.  Writing to one of these files
-       triggers an interrupt on the SPU.  The  value  written  to  the  signal
-       files can be read from the SPU through a channel read or from host user
-       space through the file.  After the value has been read by the  SPU,  it
-       is  reset  to zero.  The possible operations on an open signal1 or sig-
-       nal2 file are:
-
-       read(2)
-              If a count smaller than four is requested, read returns  -1  and
-              sets errno to EINVAL.  Otherwise, a four byte value is placed in
-              the data buffer, containing the current value of  the  specified
-              signal notification register.
-
-       write(2)
-              If  a count smaller than four is requested, write returns -1 and
-              sets errno to EINVAL.  Otherwise, a four byte  value  is  copied
-              from the data buffer, updating the value of the specified signal
-              notification register.  The signal  notification  register  will
-              either be replaced with the input data or will be updated to the
-              bitwise OR or the old value and the input data, depending on the
-              contents  of  the  signal1_type,  or  signal2_type respectively,
-              file.
-
-
-   /signal1_type
-   /signal2_type
-       These two files change the behavior of the signal1 and signal2  notifi-
-       cation  files.  The  contain  a numerical ASCII string which is read as
-       either "1" or "0".  In mode 0 (overwrite), the  hardware  replaces  the
-       contents of the signal channel with the data that is written to it.  in
-       mode 1 (logical OR), the hardware accumulates the bits that are  subse-
-       quently written to it.  The possible operations on an open signal1_type
-       or signal2_type file are:
-
-       read(2)
-              When the count supplied to the read call  is  shorter  than  the
-              required  length  for the digit plus a newline character, subse-
-              quent reads from the same file descriptor will  result  in  com-
-              pleting  the  string.  When a complete string has been read, all
-              subsequent read operations will return zero bytes and a new file
-              descriptor needs to be opened to read the value again.
-
-       write(2)
-              A write operation on the file results in setting the register to
-              the value given in the string. The string  is  parsed  from  the
-              beginning  to  the first non-numeric character or the end of the
-              buffer.  Subsequent writes to the same file descriptor overwrite
-              the previous setting.
-
-
-EXAMPLES
-       /etc/fstab entry
-              none      /spu      spufs     gid=spu   0    0
-
-
-AUTHORS
-       Arnd  Bergmann  <arndb@de.ibm.com>,  Mark  Nutter <mnutter@us.ibm.com>,
-       Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
-
-SEE ALSO
-       capabilities(7), close(2), spu_create(2), spu_run(2), spufs(7)
-
-
-
-Linux                             2005-09-28                          SPUFS(2)
diff --git a/MAINTAINERS b/MAINTAINERS
index 1c3bd61ad03e..bbf85128ca89 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15882,7 +15882,7 @@ M:	Jeremy Kerr <jk@ozlabs.org>
 L:	linuxppc-dev@lists.ozlabs.org
 S:	Supported
 W:	http://www.ibm.com/developerworks/power/cell/
-F:	Documentation/filesystems/spufs.txt
+F:	Documentation/filesystems/spufs/spufs.rst
 F:	arch/powerpc/platforms/cell/spufs/
 
 SQUASHFS FILE SYSTEM
-- 
cgit v1.2.3


From e2975d7ca8df8775409c948310bc0f39678d1612 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:16 +0200
Subject: docs: filesystems: convert spufs/spu_run.txt to ReST

This file is at groff output format. Manually convert it to
ReST format, trying to preserve a similar output after parsed.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/7d8ee1edf5ef0137009bc65ff0441826ce555895.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/spufs/index.rst   |   1 +
 Documentation/filesystems/spufs/spu_run.rst | 138 ++++++++++++++++++++++++++++
 Documentation/filesystems/spufs/spu_run.txt | 121 ------------------------
 3 files changed, 139 insertions(+), 121 deletions(-)
 create mode 100644 Documentation/filesystems/spufs/spu_run.rst
 delete mode 100644 Documentation/filesystems/spufs/spu_run.txt

diff --git a/Documentation/filesystems/spufs/index.rst b/Documentation/filesystems/spufs/index.rst
index 939cf59a7d9e..5ed4a8494967 100644
--- a/Documentation/filesystems/spufs/index.rst
+++ b/Documentation/filesystems/spufs/index.rst
@@ -10,3 +10,4 @@ SPU Filesystem
 
    spufs
    spu_create
+   spu_run
diff --git a/Documentation/filesystems/spufs/spu_run.rst b/Documentation/filesystems/spufs/spu_run.rst
new file mode 100644
index 000000000000..7fdb1c31cb91
--- /dev/null
+++ b/Documentation/filesystems/spufs/spu_run.rst
@@ -0,0 +1,138 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======
+spu_run
+=======
+
+
+Name
+====
+       spu_run - execute an spu context
+
+
+Synopsis
+========
+
+       ::
+
+	    #include <sys/spu.h>
+
+	    int spu_run(int fd, unsigned int *npc, unsigned int *event);
+
+Description
+===========
+       The  spu_run system call is used on PowerPC machines that implement the
+       Cell Broadband Engine Architecture in order to access Synergistic  Pro-
+       cessor  Units  (SPUs).  It  uses the fd that was returned from spu_cre-
+       ate(2) to address a specific SPU context. When the context gets  sched-
+       uled  to a physical SPU, it starts execution at the instruction pointer
+       passed in npc.
+
+       Execution of SPU code happens synchronously, meaning that spu_run  does
+       not  return  while the SPU is still running. If there is a need to exe-
+       cute SPU code in parallel with other code on either  the  main  CPU  or
+       other  SPUs,  you  need to create a new thread of execution first, e.g.
+       using the pthread_create(3) call.
+
+       When spu_run returns, the current value of the SPU instruction  pointer
+       is  written back to npc, so you can call spu_run again without updating
+       the pointers.
+
+       event can be a NULL pointer or point to an extended  status  code  that
+       gets  filled  when spu_run returns. It can be one of the following con-
+       stants:
+
+       SPE_EVENT_DMA_ALIGNMENT
+              A DMA alignment error
+
+       SPE_EVENT_SPE_DATA_SEGMENT
+              A DMA segmentation error
+
+       SPE_EVENT_SPE_DATA_STORAGE
+              A DMA storage error
+
+       If NULL is passed as the event argument, these errors will result in  a
+       signal delivered to the calling process.
+
+Return Value
+============
+       spu_run  returns the value of the spu_status register or -1 to indicate
+       an error and set errno to one of the error  codes  listed  below.   The
+       spu_status  register  value  contains  a  bit  mask of status codes and
+       optionally a 14 bit code returned from the stop-and-signal  instruction
+       on the SPU. The bit masks for the status codes are:
+
+       0x02
+	      SPU was stopped by stop-and-signal.
+
+       0x04
+	      SPU was stopped by halt.
+
+       0x08
+	      SPU is waiting for a channel.
+
+       0x10
+	      SPU is in single-step mode.
+
+       0x20
+	      SPU has tried to execute an invalid instruction.
+
+       0x40
+	      SPU has tried to access an invalid channel.
+
+       0x3fff0000
+              The  bits  masked with this value contain the code returned from
+              stop-and-signal.
+
+       There are always one or more of the lower eight bits set  or  an  error
+       code is returned from spu_run.
+
+Errors
+======
+       EAGAIN or EWOULDBLOCK
+              fd is in non-blocking mode and spu_run would block.
+
+       EBADF  fd is not a valid file descriptor.
+
+       EFAULT npc is not a valid pointer or status is neither NULL nor a valid
+              pointer.
+
+       EINTR  A signal occurred while spu_run was in progress.  The npc  value
+              has  been updated to the new program counter value if necessary.
+
+       EINVAL fd is not a file descriptor returned from spu_create(2).
+
+       ENOMEM Insufficient memory was available to handle a page fault result-
+              ing from an MFC direct memory access.
+
+       ENOSYS the functionality is not provided by the current system, because
+              either the hardware does not provide SPUs or the spufs module is
+              not loaded.
+
+
+Notes
+=====
+       spu_run  is  meant  to  be  used  from  libraries that implement a more
+       abstract interface to SPUs, not to be used from  regular  applications.
+       See  http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec-
+       ommended libraries.
+
+
+Conforming to
+=============
+       This call is Linux specific and only implemented by the ppc64 architec-
+       ture. Programs using this system call are not portable.
+
+
+Bugs
+====
+       The code does not yet fully implement all features lined out here.
+
+
+Author
+======
+       Arnd Bergmann <arndb@de.ibm.com>
+
+See Also
+========
+       capabilities(7), close(2), spu_create(2), spufs(7)
diff --git a/Documentation/filesystems/spufs/spu_run.txt b/Documentation/filesystems/spufs/spu_run.txt
deleted file mode 100644
index d5c6a00d0f97..000000000000
--- a/Documentation/filesystems/spufs/spu_run.txt
+++ /dev/null
@@ -1,121 +0,0 @@
-SPU_RUN(2)                 Linux Programmer's Manual                SPU_RUN(2)
-
-
-
-NAME
-       spu_run - execute an spu context
-
-
-SYNOPSIS
-       #include <sys/spu.h>
-
-       int spu_run(int fd, unsigned int *npc, unsigned int *event);
-
-DESCRIPTION
-       The  spu_run system call is used on PowerPC machines that implement the
-       Cell Broadband Engine Architecture in order to access Synergistic  Pro-
-       cessor  Units  (SPUs).  It  uses the fd that was returned from spu_cre-
-       ate(2) to address a specific SPU context. When the context gets  sched-
-       uled  to a physical SPU, it starts execution at the instruction pointer
-       passed in npc.
-
-       Execution of SPU code happens synchronously, meaning that spu_run  does
-       not  return  while the SPU is still running. If there is a need to exe-
-       cute SPU code in parallel with other code on either  the  main  CPU  or
-       other  SPUs,  you  need to create a new thread of execution first, e.g.
-       using the pthread_create(3) call.
-
-       When spu_run returns, the current value of the SPU instruction  pointer
-       is  written back to npc, so you can call spu_run again without updating
-       the pointers.
-
-       event can be a NULL pointer or point to an extended  status  code  that
-       gets  filled  when spu_run returns. It can be one of the following con-
-       stants:
-
-       SPE_EVENT_DMA_ALIGNMENT
-              A DMA alignment error
-
-       SPE_EVENT_SPE_DATA_SEGMENT
-              A DMA segmentation error
-
-       SPE_EVENT_SPE_DATA_STORAGE
-              A DMA storage error
-
-       If NULL is passed as the event argument, these errors will result in  a
-       signal delivered to the calling process.
-
-RETURN VALUE
-       spu_run  returns the value of the spu_status register or -1 to indicate
-       an error and set errno to one of the error  codes  listed  below.   The
-       spu_status  register  value  contains  a  bit  mask of status codes and
-       optionally a 14 bit code returned from the stop-and-signal  instruction
-       on the SPU. The bit masks for the status codes are:
-
-       0x02   SPU was stopped by stop-and-signal.
-
-       0x04   SPU was stopped by halt.
-
-       0x08   SPU is waiting for a channel.
-
-       0x10   SPU is in single-step mode.
-
-       0x20   SPU has tried to execute an invalid instruction.
-
-       0x40   SPU has tried to access an invalid channel.
-
-       0x3fff0000
-              The  bits  masked with this value contain the code returned from
-              stop-and-signal.
-
-       There are always one or more of the lower eight bits set  or  an  error
-       code is returned from spu_run.
-
-ERRORS
-       EAGAIN or EWOULDBLOCK
-              fd is in non-blocking mode and spu_run would block.
-
-       EBADF  fd is not a valid file descriptor.
-
-       EFAULT npc is not a valid pointer or status is neither NULL nor a valid
-              pointer.
-
-       EINTR  A signal occurred while spu_run was in progress.  The npc  value
-              has  been updated to the new program counter value if necessary.
-
-       EINVAL fd is not a file descriptor returned from spu_create(2).
-
-       ENOMEM Insufficient memory was available to handle a page fault result-
-              ing from an MFC direct memory access.
-
-       ENOSYS the functionality is not provided by the current system, because
-              either the hardware does not provide SPUs or the spufs module is
-              not loaded.
-
-
-NOTES
-       spu_run  is  meant  to  be  used  from  libraries that implement a more
-       abstract interface to SPUs, not to be used from  regular  applications.
-       See  http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec-
-       ommended libraries.
-
-
-CONFORMING TO
-       This call is Linux specific and only implemented by the ppc64 architec-
-       ture. Programs using this system call are not portable.
-
-
-BUGS
-       The code does not yet fully implement all features lined out here.
-
-
-AUTHOR
-       Arnd Bergmann <arndb@de.ibm.com>
-
-SEE ALSO
-       capabilities(7), close(2), spu_create(2), spufs(7)
-
-
-
-
-Linux                             2005-09-28                        SPU_RUN(2)
-- 
cgit v1.2.3


From 28bcadf0ae994fd3584f43c39b6915ee70b59749 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:17 +0200
Subject: docs: filesystems: convert sysfs-pci.txt to ReST

- Add a SPDX header;
- Adjust document title;
- Some whitespace fixes and new line breaks;
- Mark literal blocks as such;
- Add table markups;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/2a9d307753c97d1a843341a2ef1993d43a407ded.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/index.rst     |   1 +
 Documentation/filesystems/sysfs-pci.rst | 138 ++++++++++++++++++++++++++++++++
 Documentation/filesystems/sysfs-pci.txt | 131 ------------------------------
 3 files changed, 139 insertions(+), 131 deletions(-)
 create mode 100644 Documentation/filesystems/sysfs-pci.rst
 delete mode 100644 Documentation/filesystems/sysfs-pci.txt

diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 5fb2b2166204..7afb58dff43b 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -34,6 +34,7 @@ algorithms work.
    quota
    seq_file
    sharedsubtree
+   sysfs-pci
 
    automount-support
 
diff --git a/Documentation/filesystems/sysfs-pci.rst b/Documentation/filesystems/sysfs-pci.rst
new file mode 100644
index 000000000000..a265f3e2cc80
--- /dev/null
+++ b/Documentation/filesystems/sysfs-pci.rst
@@ -0,0 +1,138 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================
+Accessing PCI device resources through sysfs
+============================================
+
+sysfs, usually mounted at /sys, provides access to PCI resources on platforms
+that support it.  For example, a given bus might look like this::
+
+     /sys/devices/pci0000:17
+     |-- 0000:17:00.0
+     |   |-- class
+     |   |-- config
+     |   |-- device
+     |   |-- enable
+     |   |-- irq
+     |   |-- local_cpus
+     |   |-- remove
+     |   |-- resource
+     |   |-- resource0
+     |   |-- resource1
+     |   |-- resource2
+     |   |-- revision
+     |   |-- rom
+     |   |-- subsystem_device
+     |   |-- subsystem_vendor
+     |   `-- vendor
+     `-- ...
+
+The topmost element describes the PCI domain and bus number.  In this case,
+the domain number is 0000 and the bus number is 17 (both values are in hex).
+This bus contains a single function device in slot 0.  The domain and bus
+numbers are reproduced for convenience.  Under the device directory are several
+files, each with their own function.
+
+       =================== =====================================================
+       file		   function
+       =================== =====================================================
+       class		   PCI class (ascii, ro)
+       config		   PCI config space (binary, rw)
+       device		   PCI device (ascii, ro)
+       enable	           Whether the device is enabled (ascii, rw)
+       irq		   IRQ number (ascii, ro)
+       local_cpus	   nearby CPU mask (cpumask, ro)
+       remove		   remove device from kernel's list (ascii, wo)
+       resource		   PCI resource host addresses (ascii, ro)
+       resource0..N	   PCI resource N, if present (binary, mmap, rw\ [1]_)
+       resource0_wc..N_wc  PCI WC map resource N, if prefetchable (binary, mmap)
+       revision		   PCI revision (ascii, ro)
+       rom		   PCI ROM resource, if present (binary, ro)
+       subsystem_device	   PCI subsystem device (ascii, ro)
+       subsystem_vendor	   PCI subsystem vendor (ascii, ro)
+       vendor		   PCI vendor (ascii, ro)
+       =================== =====================================================
+
+::
+
+  ro - read only file
+  rw - file is readable and writable
+  wo - write only file
+  mmap - file is mmapable
+  ascii - file contains ascii text
+  binary - file contains binary data
+  cpumask - file contains a cpumask type
+
+.. [1] rw for RESOURCE_IO (I/O port) regions only
+
+The read only files are informational, writes to them will be ignored, with
+the exception of the 'rom' file.  Writable files can be used to perform
+actions on the device (e.g. changing config space, detaching a device).
+mmapable files are available via an mmap of the file at offset 0 and can be
+used to do actual device programming from userspace.  Note that some platforms
+don't support mmapping of certain resources, so be sure to check the return
+value from any attempted mmap.  The most notable of these are I/O port
+resources, which also provide read/write access.
+
+The 'enable' file provides a counter that indicates how many times the device
+has been enabled.  If the 'enable' file currently returns '4', and a '1' is
+echoed into it, it will then return '5'.  Echoing a '0' into it will decrease
+the count.  Even when it returns to 0, though, some of the initialisation
+may not be reversed.
+
+The 'rom' file is special in that it provides read-only access to the device's
+ROM file, if available.  It's disabled by default, however, so applications
+should write the string "1" to the file to enable it before attempting a read
+call, and disable it following the access by writing "0" to the file.  Note
+that the device must be enabled for a rom read to return data successfully.
+In the event a driver is not bound to the device, it can be enabled using the
+'enable' file, documented above.
+
+The 'remove' file is used to remove the PCI device, by writing a non-zero
+integer to the file.  This does not involve any kind of hot-plug functionality,
+e.g. powering off the device.  The device is removed from the kernel's list of
+PCI devices, the sysfs directory for it is removed, and the device will be
+removed from any drivers attached to it. Removal of PCI root buses is
+disallowed.
+
+Accessing legacy resources through sysfs
+----------------------------------------
+
+Legacy I/O port and ISA memory resources are also provided in sysfs if the
+underlying platform supports them.  They're located in the PCI class hierarchy,
+e.g.::
+
+	/sys/class/pci_bus/0000:17/
+	|-- bridge -> ../../../devices/pci0000:17
+	|-- cpuaffinity
+	|-- legacy_io
+	`-- legacy_mem
+
+The legacy_io file is a read/write file that can be used by applications to
+do legacy port I/O.  The application should open the file, seek to the desired
+port (e.g. 0x3e8) and do a read or a write of 1, 2 or 4 bytes.  The legacy_mem
+file should be mmapped with an offset corresponding to the memory offset
+desired, e.g. 0xa0000 for the VGA frame buffer.  The application can then
+simply dereference the returned pointer (after checking for errors of course)
+to access legacy memory space.
+
+Supporting PCI access on new platforms
+--------------------------------------
+
+In order to support PCI resource mapping as described above, Linux platform
+code should ideally define ARCH_GENERIC_PCI_MMAP_RESOURCE and use the generic
+implementation of that functionality. To support the historical interface of
+mmap() through files in /proc/bus/pci, platforms may also set HAVE_PCI_MMAP.
+
+Alternatively, platforms which set HAVE_PCI_MMAP may provide their own
+implementation of pci_mmap_page_range() instead of defining
+ARCH_GENERIC_PCI_MMAP_RESOURCE.
+
+Platforms which support write-combining maps of PCI resources must define
+arch_can_pci_mmap_wc() which shall evaluate to non-zero at runtime when
+write-combining is permitted. Platforms which support maps of I/O resources
+define arch_can_pci_mmap_io() similarly.
+
+Legacy resources are protected by the HAVE_PCI_LEGACY define.  Platforms
+wishing to support legacy functionality should define it and provide
+pci_legacy_read, pci_legacy_write and pci_mmap_legacy_page_range functions.
diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt
deleted file mode 100644
index 06f1d64c6f70..000000000000
--- a/Documentation/filesystems/sysfs-pci.txt
+++ /dev/null
@@ -1,131 +0,0 @@
-Accessing PCI device resources through sysfs
---------------------------------------------
-
-sysfs, usually mounted at /sys, provides access to PCI resources on platforms
-that support it.  For example, a given bus might look like this:
-
-     /sys/devices/pci0000:17
-     |-- 0000:17:00.0
-     |   |-- class
-     |   |-- config
-     |   |-- device
-     |   |-- enable
-     |   |-- irq
-     |   |-- local_cpus
-     |   |-- remove
-     |   |-- resource
-     |   |-- resource0
-     |   |-- resource1
-     |   |-- resource2
-     |   |-- revision
-     |   |-- rom
-     |   |-- subsystem_device
-     |   |-- subsystem_vendor
-     |   `-- vendor
-     `-- ...
-
-The topmost element describes the PCI domain and bus number.  In this case,
-the domain number is 0000 and the bus number is 17 (both values are in hex).
-This bus contains a single function device in slot 0.  The domain and bus
-numbers are reproduced for convenience.  Under the device directory are several
-files, each with their own function.
-
-       file		   function
-       ----		   --------
-       class		   PCI class (ascii, ro)
-       config		   PCI config space (binary, rw)
-       device		   PCI device (ascii, ro)
-       enable	           Whether the device is enabled (ascii, rw)
-       irq		   IRQ number (ascii, ro)
-       local_cpus	   nearby CPU mask (cpumask, ro)
-       remove		   remove device from kernel's list (ascii, wo)
-       resource		   PCI resource host addresses (ascii, ro)
-       resource0..N	   PCI resource N, if present (binary, mmap, rw[1])
-       resource0_wc..N_wc  PCI WC map resource N, if prefetchable (binary, mmap)
-       revision		   PCI revision (ascii, ro)
-       rom		   PCI ROM resource, if present (binary, ro)
-       subsystem_device	   PCI subsystem device (ascii, ro)
-       subsystem_vendor	   PCI subsystem vendor (ascii, ro)
-       vendor		   PCI vendor (ascii, ro)
-
-  ro - read only file
-  rw - file is readable and writable
-  wo - write only file
-  mmap - file is mmapable
-  ascii - file contains ascii text
-  binary - file contains binary data
-  cpumask - file contains a cpumask type
-
-[1] rw for RESOURCE_IO (I/O port) regions only
-
-The read only files are informational, writes to them will be ignored, with
-the exception of the 'rom' file.  Writable files can be used to perform
-actions on the device (e.g. changing config space, detaching a device).
-mmapable files are available via an mmap of the file at offset 0 and can be
-used to do actual device programming from userspace.  Note that some platforms
-don't support mmapping of certain resources, so be sure to check the return
-value from any attempted mmap.  The most notable of these are I/O port
-resources, which also provide read/write access.
-
-The 'enable' file provides a counter that indicates how many times the device 
-has been enabled.  If the 'enable' file currently returns '4', and a '1' is
-echoed into it, it will then return '5'.  Echoing a '0' into it will decrease
-the count.  Even when it returns to 0, though, some of the initialisation
-may not be reversed.  
-
-The 'rom' file is special in that it provides read-only access to the device's
-ROM file, if available.  It's disabled by default, however, so applications
-should write the string "1" to the file to enable it before attempting a read
-call, and disable it following the access by writing "0" to the file.  Note
-that the device must be enabled for a rom read to return data successfully.
-In the event a driver is not bound to the device, it can be enabled using the
-'enable' file, documented above.
-
-The 'remove' file is used to remove the PCI device, by writing a non-zero
-integer to the file.  This does not involve any kind of hot-plug functionality,
-e.g. powering off the device.  The device is removed from the kernel's list of
-PCI devices, the sysfs directory for it is removed, and the device will be
-removed from any drivers attached to it. Removal of PCI root buses is
-disallowed.
-
-Accessing legacy resources through sysfs
-----------------------------------------
-
-Legacy I/O port and ISA memory resources are also provided in sysfs if the
-underlying platform supports them.  They're located in the PCI class hierarchy,
-e.g.
-
-	/sys/class/pci_bus/0000:17/
-	|-- bridge -> ../../../devices/pci0000:17
-	|-- cpuaffinity
-	|-- legacy_io
-	`-- legacy_mem
-
-The legacy_io file is a read/write file that can be used by applications to
-do legacy port I/O.  The application should open the file, seek to the desired
-port (e.g. 0x3e8) and do a read or a write of 1, 2 or 4 bytes.  The legacy_mem
-file should be mmapped with an offset corresponding to the memory offset
-desired, e.g. 0xa0000 for the VGA frame buffer.  The application can then
-simply dereference the returned pointer (after checking for errors of course)
-to access legacy memory space.
-
-Supporting PCI access on new platforms
---------------------------------------
-
-In order to support PCI resource mapping as described above, Linux platform
-code should ideally define ARCH_GENERIC_PCI_MMAP_RESOURCE and use the generic
-implementation of that functionality. To support the historical interface of
-mmap() through files in /proc/bus/pci, platforms may also set HAVE_PCI_MMAP.
-
-Alternatively, platforms which set HAVE_PCI_MMAP may provide their own
-implementation of pci_mmap_page_range() instead of defining
-ARCH_GENERIC_PCI_MMAP_RESOURCE.
-
-Platforms which support write-combining maps of PCI resources must define
-arch_can_pci_mmap_wc() which shall evaluate to non-zero at runtime when
-write-combining is permitted. Platforms which support maps of I/O resources
-define arch_can_pci_mmap_io() similarly.
-
-Legacy resources are protected by the HAVE_PCI_LEGACY define.  Platforms
-wishing to support legacy functionality should define it and provide
-pci_legacy_read, pci_legacy_write and pci_mmap_legacy_page_range functions.
-- 
cgit v1.2.3


From ec4551f45036d23a34d4f13ab3ce2ea345a13de7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:18 +0200
Subject: docs: filesystems: convert sysfs-tagging.txt to ReST

- Add a SPDX header;
- Adjust document title;
- Some whitespace fixes and new line breaks;
- Mark literal blocks as such;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/45a01fa5edd5c6ee8fc0754fc74f7ef65a3e5581.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/index.rst         |  1 +
 Documentation/filesystems/sysfs-tagging.rst | 48 +++++++++++++++++++++++++++++
 Documentation/filesystems/sysfs-tagging.txt | 42 -------------------------
 3 files changed, 49 insertions(+), 42 deletions(-)
 create mode 100644 Documentation/filesystems/sysfs-tagging.rst
 delete mode 100644 Documentation/filesystems/sysfs-tagging.txt

diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 7afb58dff43b..14e298d84c8a 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -35,6 +35,7 @@ algorithms work.
    seq_file
    sharedsubtree
    sysfs-pci
+   sysfs-tagging
 
    automount-support
 
diff --git a/Documentation/filesystems/sysfs-tagging.rst b/Documentation/filesystems/sysfs-tagging.rst
new file mode 100644
index 000000000000..8888a05c398e
--- /dev/null
+++ b/Documentation/filesystems/sysfs-tagging.rst
@@ -0,0 +1,48 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Sysfs tagging
+=============
+
+(Taken almost verbatim from Eric Biederman's netns tagging patch
+commit msg)
+
+The problem.  Network devices show up in sysfs and with the network
+namespace active multiple devices with the same name can show up in
+the same directory, ouch!
+
+To avoid that problem and allow existing applications in network
+namespaces to see the same interface that is currently presented in
+sysfs, sysfs now has tagging directory support.
+
+By using the network namespace pointers as tags to separate out the
+the sysfs directory entries we ensure that we don't have conflicts
+in the directories and applications only see a limited set of
+the network devices.
+
+Each sysfs directory entry may be tagged with a namespace via the
+``void *ns member`` of its ``kernfs_node``.  If a directory entry is tagged,
+then ``kernfs_node->flags`` will have a flag between KOBJ_NS_TYPE_NONE
+and KOBJ_NS_TYPES, and ns will point to the namespace to which it
+belongs.
+
+Each sysfs superblock's kernfs_super_info contains an array
+``void *ns[KOBJ_NS_TYPES]``.  When a task in a tagging namespace
+kobj_nstype first mounts sysfs, a new superblock is created.  It
+will be differentiated from other sysfs mounts by having its
+``s_fs_info->ns[kobj_nstype]`` set to the new namespace.  Note that
+through bind mounting and mounts propagation, a task can easily view
+the contents of other namespaces' sysfs mounts.  Therefore, when a
+namespace exits, it will call kobj_ns_exit() to invalidate any
+kernfs_node->ns pointers pointing to it.
+
+Users of this interface:
+
+- define a type in the ``kobj_ns_type`` enumeration.
+- call kobj_ns_type_register() with its ``kobj_ns_type_operations`` which has
+
+  - current_ns() which returns current's namespace
+  - netlink_ns() which returns a socket's namespace
+  - initial_ns() which returns the initial namesapce
+
+- call kobj_ns_exit() when an individual tag is no longer valid
diff --git a/Documentation/filesystems/sysfs-tagging.txt b/Documentation/filesystems/sysfs-tagging.txt
deleted file mode 100644
index c7c8e6438958..000000000000
--- a/Documentation/filesystems/sysfs-tagging.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-Sysfs tagging
--------------
-
-(Taken almost verbatim from Eric Biederman's netns tagging patch
-commit msg)
-
-The problem.  Network devices show up in sysfs and with the network
-namespace active multiple devices with the same name can show up in
-the same directory, ouch!
-
-To avoid that problem and allow existing applications in network
-namespaces to see the same interface that is currently presented in
-sysfs, sysfs now has tagging directory support.
-
-By using the network namespace pointers as tags to separate out the
-the sysfs directory entries we ensure that we don't have conflicts
-in the directories and applications only see a limited set of
-the network devices.
-
-Each sysfs directory entry may be tagged with a namespace via the
-void *ns member of its kernfs_node.  If a directory entry is tagged,
-then kernfs_node->flags will have a flag between KOBJ_NS_TYPE_NONE
-and KOBJ_NS_TYPES, and ns will point to the namespace to which it
-belongs.
-
-Each sysfs superblock's kernfs_super_info contains an array void
-*ns[KOBJ_NS_TYPES].  When a task in a tagging namespace
-kobj_nstype first mounts sysfs, a new superblock is created.  It
-will be differentiated from other sysfs mounts by having its
-s_fs_info->ns[kobj_nstype] set to the new namespace.  Note that
-through bind mounting and mounts propagation, a task can easily view
-the contents of other namespaces' sysfs mounts.  Therefore, when a
-namespace exits, it will call kobj_ns_exit() to invalidate any
-kernfs_node->ns pointers pointing to it.
-
-Users of this interface:
-- define a type in the kobj_ns_type enumeration.
-- call kobj_ns_type_register() with its kobj_ns_type_operations which has
-  - current_ns() which returns current's namespace
-  - netlink_ns() which returns a socket's namespace
-  - initial_ns() which returns the initial namesapce
-- call kobj_ns_exit() when an individual tag is no longer valid
-- 
cgit v1.2.3


From c3d2f6cb4c707736e19eee4340357f2d8286b5c8 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:19 +0200
Subject: docs: filesystems: convert xfs-delayed-logging-design.txt to ReST

- Add a SPDX header;
- Adjust document and section titles;
- Some whitespace fixes and new line breaks;
- Mark literal blocks as such;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/2233c248f12e7b465cd27ee30a86f96eb632946a.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/index.rst                |   1 +
 .../filesystems/xfs-delayed-logging-design.rst     | 804 +++++++++++++++++++++
 .../filesystems/xfs-delayed-logging-design.txt     | 793 --------------------
 MAINTAINERS                                        |   2 +-
 4 files changed, 806 insertions(+), 794 deletions(-)
 create mode 100644 Documentation/filesystems/xfs-delayed-logging-design.rst
 delete mode 100644 Documentation/filesystems/xfs-delayed-logging-design.txt

diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 14e298d84c8a..0fd90f854b33 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -117,4 +117,5 @@ Documentation for filesystem implementations.
    udf
    virtiofs
    vfat
+   xfs-delayed-logging-design
    zonefs
diff --git a/Documentation/filesystems/xfs-delayed-logging-design.rst b/Documentation/filesystems/xfs-delayed-logging-design.rst
new file mode 100644
index 000000000000..464405d2801e
--- /dev/null
+++ b/Documentation/filesystems/xfs-delayed-logging-design.rst
@@ -0,0 +1,804 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+XFS Delayed Logging Design
+==========================
+
+Introduction to Re-logging in XFS
+=================================
+
+XFS logging is a combination of logical and physical logging. Some objects,
+such as inodes and dquots, are logged in logical format where the details
+logged are made up of the changes to in-core structures rather than on-disk
+structures. Other objects - typically buffers - have their physical changes
+logged. The reason for these differences is to reduce the amount of log space
+required for objects that are frequently logged. Some parts of inodes are more
+frequently logged than others, and inodes are typically more frequently logged
+than any other object (except maybe the superblock buffer) so keeping the
+amount of metadata logged low is of prime importance.
+
+The reason that this is such a concern is that XFS allows multiple separate
+modifications to a single object to be carried in the log at any given time.
+This allows the log to avoid needing to flush each change to disk before
+recording a new change to the object. XFS does this via a method called
+"re-logging". Conceptually, this is quite simple - all it requires is that any
+new change to the object is recorded with a *new copy* of all the existing
+changes in the new transaction that is written to the log.
+
+That is, if we have a sequence of changes A through to F, and the object was
+written to disk after change D, we would see in the log the following series
+of transactions, their contents and the log sequence number (LSN) of the
+transaction::
+
+	Transaction		Contents	LSN
+	   A			   A		   X
+	   B			  A+B		  X+n
+	   C			 A+B+C		 X+n+m
+	   D			A+B+C+D		X+n+m+o
+	    <object written to disk>
+	   E			   E		   Y (> X+n+m+o)
+	   F			  E+F		  Y+p
+
+In other words, each time an object is relogged, the new transaction contains
+the aggregation of all the previous changes currently held only in the log.
+
+This relogging technique also allows objects to be moved forward in the log so
+that an object being relogged does not prevent the tail of the log from ever
+moving forward.  This can be seen in the table above by the changing
+(increasing) LSN of each subsequent transaction - the LSN is effectively a
+direct encoding of the location in the log of the transaction.
+
+This relogging is also used to implement long-running, multiple-commit
+transactions.  These transaction are known as rolling transactions, and require
+a special log reservation known as a permanent transaction reservation. A
+typical example of a rolling transaction is the removal of extents from an
+inode which can only be done at a rate of two extents per transaction because
+of reservation size limitations. Hence a rolling extent removal transaction
+keeps relogging the inode and btree buffers as they get modified in each
+removal operation. This keeps them moving forward in the log as the operation
+progresses, ensuring that current operation never gets blocked by itself if the
+log wraps around.
+
+Hence it can be seen that the relogging operation is fundamental to the correct
+working of the XFS journalling subsystem. From the above description, most
+people should be able to see why the XFS metadata operations writes so much to
+the log - repeated operations to the same objects write the same changes to
+the log over and over again. Worse is the fact that objects tend to get
+dirtier as they get relogged, so each subsequent transaction is writing more
+metadata into the log.
+
+Another feature of the XFS transaction subsystem is that most transactions are
+asynchronous. That is, they don't commit to disk until either a log buffer is
+filled (a log buffer can hold multiple transactions) or a synchronous operation
+forces the log buffers holding the transactions to disk. This means that XFS is
+doing aggregation of transactions in memory - batching them, if you like - to
+minimise the impact of the log IO on transaction throughput.
+
+The limitation on asynchronous transaction throughput is the number and size of
+log buffers made available by the log manager. By default there are 8 log
+buffers available and the size of each is 32kB - the size can be increased up
+to 256kB by use of a mount option.
+
+Effectively, this gives us the maximum bound of outstanding metadata changes
+that can be made to the filesystem at any point in time - if all the log
+buffers are full and under IO, then no more transactions can be committed until
+the current batch completes. It is now common for a single current CPU core to
+be to able to issue enough transactions to keep the log buffers full and under
+IO permanently. Hence the XFS journalling subsystem can be considered to be IO
+bound.
+
+Delayed Logging: Concepts
+=========================
+
+The key thing to note about the asynchronous logging combined with the
+relogging technique XFS uses is that we can be relogging changed objects
+multiple times before they are committed to disk in the log buffers. If we
+return to the previous relogging example, it is entirely possible that
+transactions A through D are committed to disk in the same log buffer.
+
+That is, a single log buffer may contain multiple copies of the same object,
+but only one of those copies needs to be there - the last one "D", as it
+contains all the changes from the previous changes. In other words, we have one
+necessary copy in the log buffer, and three stale copies that are simply
+wasting space. When we are doing repeated operations on the same set of
+objects, these "stale objects" can be over 90% of the space used in the log
+buffers. It is clear that reducing the number of stale objects written to the
+log would greatly reduce the amount of metadata we write to the log, and this
+is the fundamental goal of delayed logging.
+
+From a conceptual point of view, XFS is already doing relogging in memory (where
+memory == log buffer), only it is doing it extremely inefficiently. It is using
+logical to physical formatting to do the relogging because there is no
+infrastructure to keep track of logical changes in memory prior to physically
+formatting the changes in a transaction to the log buffer. Hence we cannot avoid
+accumulating stale objects in the log buffers.
+
+Delayed logging is the name we've given to keeping and tracking transactional
+changes to objects in memory outside the log buffer infrastructure. Because of
+the relogging concept fundamental to the XFS journalling subsystem, this is
+actually relatively easy to do - all the changes to logged items are already
+tracked in the current infrastructure. The big problem is how to accumulate
+them and get them to the log in a consistent, recoverable manner.
+Describing the problems and how they have been solved is the focus of this
+document.
+
+One of the key changes that delayed logging makes to the operation of the
+journalling subsystem is that it disassociates the amount of outstanding
+metadata changes from the size and number of log buffers available. In other
+words, instead of there only being a maximum of 2MB of transaction changes not
+written to the log at any point in time, there may be a much greater amount
+being accumulated in memory. Hence the potential for loss of metadata on a
+crash is much greater than for the existing logging mechanism.
+
+It should be noted that this does not change the guarantee that log recovery
+will result in a consistent filesystem. What it does mean is that as far as the
+recovered filesystem is concerned, there may be many thousands of transactions
+that simply did not occur as a result of the crash. This makes it even more
+important that applications that care about their data use fsync() where they
+need to ensure application level data integrity is maintained.
+
+It should be noted that delayed logging is not an innovative new concept that
+warrants rigorous proofs to determine whether it is correct or not. The method
+of accumulating changes in memory for some period before writing them to the
+log is used effectively in many filesystems including ext3 and ext4. Hence
+no time is spent in this document trying to convince the reader that the
+concept is sound. Instead it is simply considered a "solved problem" and as
+such implementing it in XFS is purely an exercise in software engineering.
+
+The fundamental requirements for delayed logging in XFS are simple:
+
+	1. Reduce the amount of metadata written to the log by at least
+	   an order of magnitude.
+	2. Supply sufficient statistics to validate Requirement #1.
+	3. Supply sufficient new tracing infrastructure to be able to debug
+	   problems with the new code.
+	4. No on-disk format change (metadata or log format).
+	5. Enable and disable with a mount option.
+	6. No performance regressions for synchronous transaction workloads.
+
+Delayed Logging: Design
+=======================
+
+Storing Changes
+---------------
+
+The problem with accumulating changes at a logical level (i.e. just using the
+existing log item dirty region tracking) is that when it comes to writing the
+changes to the log buffers, we need to ensure that the object we are formatting
+is not changing while we do this. This requires locking the object to prevent
+concurrent modification. Hence flushing the logical changes to the log would
+require us to lock every object, format them, and then unlock them again.
+
+This introduces lots of scope for deadlocks with transactions that are already
+running. For example, a transaction has object A locked and modified, but needs
+the delayed logging tracking lock to commit the transaction. However, the
+flushing thread has the delayed logging tracking lock already held, and is
+trying to get the lock on object A to flush it to the log buffer. This appears
+to be an unsolvable deadlock condition, and it was solving this problem that
+was the barrier to implementing delayed logging for so long.
+
+The solution is relatively simple - it just took a long time to recognise it.
+Put simply, the current logging code formats the changes to each item into an
+vector array that points to the changed regions in the item. The log write code
+simply copies the memory these vectors point to into the log buffer during
+transaction commit while the item is locked in the transaction. Instead of
+using the log buffer as the destination of the formatting code, we can use an
+allocated memory buffer big enough to fit the formatted vector.
+
+If we then copy the vector into the memory buffer and rewrite the vector to
+point to the memory buffer rather than the object itself, we now have a copy of
+the changes in a format that is compatible with the log buffer writing code.
+that does not require us to lock the item to access. This formatting and
+rewriting can all be done while the object is locked during transaction commit,
+resulting in a vector that is transactionally consistent and can be accessed
+without needing to lock the owning item.
+
+Hence we avoid the need to lock items when we need to flush outstanding
+asynchronous transactions to the log. The differences between the existing
+formatting method and the delayed logging formatting can be seen in the
+diagram below.
+
+Current format log vector::
+
+    Object    +---------------------------------------------+
+    Vector 1      +----+
+    Vector 2                    +----+
+    Vector 3                                   +----------+
+
+After formatting::
+
+    Log Buffer    +-V1-+-V2-+----V3----+
+
+Delayed logging vector::
+
+    Object    +---------------------------------------------+
+    Vector 1      +----+
+    Vector 2                    +----+
+    Vector 3                                   +----------+
+
+After formatting::
+
+    Memory Buffer +-V1-+-V2-+----V3----+
+    Vector 1      +----+
+    Vector 2           +----+
+    Vector 3                +----------+
+
+The memory buffer and associated vector need to be passed as a single object,
+but still need to be associated with the parent object so if the object is
+relogged we can replace the current memory buffer with a new memory buffer that
+contains the latest changes.
+
+The reason for keeping the vector around after we've formatted the memory
+buffer is to support splitting vectors across log buffer boundaries correctly.
+If we don't keep the vector around, we do not know where the region boundaries
+are in the item, so we'd need a new encapsulation method for regions in the log
+buffer writing (i.e. double encapsulation). This would be an on-disk format
+change and as such is not desirable.  It also means we'd have to write the log
+region headers in the formatting stage, which is problematic as there is per
+region state that needs to be placed into the headers during the log write.
+
+Hence we need to keep the vector, but by attaching the memory buffer to it and
+rewriting the vector addresses to point at the memory buffer we end up with a
+self-describing object that can be passed to the log buffer write code to be
+handled in exactly the same manner as the existing log vectors are handled.
+Hence we avoid needing a new on-disk format to handle items that have been
+relogged in memory.
+
+
+Tracking Changes
+----------------
+
+Now that we can record transactional changes in memory in a form that allows
+them to be used without limitations, we need to be able to track and accumulate
+them so that they can be written to the log at some later point in time.  The
+log item is the natural place to store this vector and buffer, and also makes sense
+to be the object that is used to track committed objects as it will always
+exist once the object has been included in a transaction.
+
+The log item is already used to track the log items that have been written to
+the log but not yet written to disk. Such log items are considered "active"
+and as such are stored in the Active Item List (AIL) which is a LSN-ordered
+double linked list. Items are inserted into this list during log buffer IO
+completion, after which they are unpinned and can be written to disk. An object
+that is in the AIL can be relogged, which causes the object to be pinned again
+and then moved forward in the AIL when the log buffer IO completes for that
+transaction.
+
+Essentially, this shows that an item that is in the AIL can still be modified
+and relogged, so any tracking must be separate to the AIL infrastructure. As
+such, we cannot reuse the AIL list pointers for tracking committed items, nor
+can we store state in any field that is protected by the AIL lock. Hence the
+committed item tracking needs it's own locks, lists and state fields in the log
+item.
+
+Similar to the AIL, tracking of committed items is done through a new list
+called the Committed Item List (CIL).  The list tracks log items that have been
+committed and have formatted memory buffers attached to them. It tracks objects
+in transaction commit order, so when an object is relogged it is removed from
+it's place in the list and re-inserted at the tail. This is entirely arbitrary
+and done to make it easy for debugging - the last items in the list are the
+ones that are most recently modified. Ordering of the CIL is not necessary for
+transactional integrity (as discussed in the next section) so the ordering is
+done for convenience/sanity of the developers.
+
+
+Delayed Logging: Checkpoints
+----------------------------
+
+When we have a log synchronisation event, commonly known as a "log force",
+all the items in the CIL must be written into the log via the log buffers.
+We need to write these items in the order that they exist in the CIL, and they
+need to be written as an atomic transaction. The need for all the objects to be
+written as an atomic transaction comes from the requirements of relogging and
+log replay - all the changes in all the objects in a given transaction must
+either be completely replayed during log recovery, or not replayed at all. If
+a transaction is not replayed because it is not complete in the log, then
+no later transactions should be replayed, either.
+
+To fulfill this requirement, we need to write the entire CIL in a single log
+transaction. Fortunately, the XFS log code has no fixed limit on the size of a
+transaction, nor does the log replay code. The only fundamental limit is that
+the transaction cannot be larger than just under half the size of the log.  The
+reason for this limit is that to find the head and tail of the log, there must
+be at least one complete transaction in the log at any given time. If a
+transaction is larger than half the log, then there is the possibility that a
+crash during the write of a such a transaction could partially overwrite the
+only complete previous transaction in the log. This will result in a recovery
+failure and an inconsistent filesystem and hence we must enforce the maximum
+size of a checkpoint to be slightly less than a half the log.
+
+Apart from this size requirement, a checkpoint transaction looks no different
+to any other transaction - it contains a transaction header, a series of
+formatted log items and a commit record at the tail. From a recovery
+perspective, the checkpoint transaction is also no different - just a lot
+bigger with a lot more items in it. The worst case effect of this is that we
+might need to tune the recovery transaction object hash size.
+
+Because the checkpoint is just another transaction and all the changes to log
+items are stored as log vectors, we can use the existing log buffer writing
+code to write the changes into the log. To do this efficiently, we need to
+minimise the time we hold the CIL locked while writing the checkpoint
+transaction. The current log write code enables us to do this easily with the
+way it separates the writing of the transaction contents (the log vectors) from
+the transaction commit record, but tracking this requires us to have a
+per-checkpoint context that travels through the log write process through to
+checkpoint completion.
+
+Hence a checkpoint has a context that tracks the state of the current
+checkpoint from initiation to checkpoint completion. A new context is initiated
+at the same time a checkpoint transaction is started. That is, when we remove
+all the current items from the CIL during a checkpoint operation, we move all
+those changes into the current checkpoint context. We then initialise a new
+context and attach that to the CIL for aggregation of new transactions.
+
+This allows us to unlock the CIL immediately after transfer of all the
+committed items and effectively allow new transactions to be issued while we
+are formatting the checkpoint into the log. It also allows concurrent
+checkpoints to be written into the log buffers in the case of log force heavy
+workloads, just like the existing transaction commit code does. This, however,
+requires that we strictly order the commit records in the log so that
+checkpoint sequence order is maintained during log replay.
+
+To ensure that we can be writing an item into a checkpoint transaction at
+the same time another transaction modifies the item and inserts the log item
+into the new CIL, then checkpoint transaction commit code cannot use log items
+to store the list of log vectors that need to be written into the transaction.
+Hence log vectors need to be able to be chained together to allow them to be
+detached from the log items. That is, when the CIL is flushed the memory
+buffer and log vector attached to each log item needs to be attached to the
+checkpoint context so that the log item can be released. In diagrammatic form,
+the CIL would look like this before the flush::
+
+	CIL Head
+	   |
+	   V
+	Log Item <-> log vector 1	-> memory buffer
+	   |				-> vector array
+	   V
+	Log Item <-> log vector 2	-> memory buffer
+	   |				-> vector array
+	   V
+	......
+	   |
+	   V
+	Log Item <-> log vector N-1	-> memory buffer
+	   |				-> vector array
+	   V
+	Log Item <-> log vector N	-> memory buffer
+					-> vector array
+
+And after the flush the CIL head is empty, and the checkpoint context log
+vector list would look like::
+
+	Checkpoint Context
+	   |
+	   V
+	log vector 1	-> memory buffer
+	   |		-> vector array
+	   |		-> Log Item
+	   V
+	log vector 2	-> memory buffer
+	   |		-> vector array
+	   |		-> Log Item
+	   V
+	......
+	   |
+	   V
+	log vector N-1	-> memory buffer
+	   |		-> vector array
+	   |		-> Log Item
+	   V
+	log vector N	-> memory buffer
+			-> vector array
+			-> Log Item
+
+Once this transfer is done, the CIL can be unlocked and new transactions can
+start, while the checkpoint flush code works over the log vector chain to
+commit the checkpoint.
+
+Once the checkpoint is written into the log buffers, the checkpoint context is
+attached to the log buffer that the commit record was written to along with a
+completion callback. Log IO completion will call that callback, which can then
+run transaction committed processing for the log items (i.e. insert into AIL
+and unpin) in the log vector chain and then free the log vector chain and
+checkpoint context.
+
+Discussion Point: I am uncertain as to whether the log item is the most
+efficient way to track vectors, even though it seems like the natural way to do
+it. The fact that we walk the log items (in the CIL) just to chain the log
+vectors and break the link between the log item and the log vector means that
+we take a cache line hit for the log item list modification, then another for
+the log vector chaining. If we track by the log vectors, then we only need to
+break the link between the log item and the log vector, which means we should
+dirty only the log item cachelines. Normally I wouldn't be concerned about one
+vs two dirty cachelines except for the fact I've seen upwards of 80,000 log
+vectors in one checkpoint transaction. I'd guess this is a "measure and
+compare" situation that can be done after a working and reviewed implementation
+is in the dev tree....
+
+Delayed Logging: Checkpoint Sequencing
+--------------------------------------
+
+One of the key aspects of the XFS transaction subsystem is that it tags
+committed transactions with the log sequence number of the transaction commit.
+This allows transactions to be issued asynchronously even though there may be
+future operations that cannot be completed until that transaction is fully
+committed to the log. In the rare case that a dependent operation occurs (e.g.
+re-using a freed metadata extent for a data extent), a special, optimised log
+force can be issued to force the dependent transaction to disk immediately.
+
+To do this, transactions need to record the LSN of the commit record of the
+transaction. This LSN comes directly from the log buffer the transaction is
+written into. While this works just fine for the existing transaction
+mechanism, it does not work for delayed logging because transactions are not
+written directly into the log buffers. Hence some other method of sequencing
+transactions is required.
+
+As discussed in the checkpoint section, delayed logging uses per-checkpoint
+contexts, and as such it is simple to assign a sequence number to each
+checkpoint. Because the switching of checkpoint contexts must be done
+atomically, it is simple to ensure that each new context has a monotonically
+increasing sequence number assigned to it without the need for an external
+atomic counter - we can just take the current context sequence number and add
+one to it for the new context.
+
+Then, instead of assigning a log buffer LSN to the transaction commit LSN
+during the commit, we can assign the current checkpoint sequence. This allows
+operations that track transactions that have not yet completed know what
+checkpoint sequence needs to be committed before they can continue. As a
+result, the code that forces the log to a specific LSN now needs to ensure that
+the log forces to a specific checkpoint.
+
+To ensure that we can do this, we need to track all the checkpoint contexts
+that are currently committing to the log. When we flush a checkpoint, the
+context gets added to a "committing" list which can be searched. When a
+checkpoint commit completes, it is removed from the committing list. Because
+the checkpoint context records the LSN of the commit record for the checkpoint,
+we can also wait on the log buffer that contains the commit record, thereby
+using the existing log force mechanisms to execute synchronous forces.
+
+It should be noted that the synchronous forces may need to be extended with
+mitigation algorithms similar to the current log buffer code to allow
+aggregation of multiple synchronous transactions if there are already
+synchronous transactions being flushed. Investigation of the performance of the
+current design is needed before making any decisions here.
+
+The main concern with log forces is to ensure that all the previous checkpoints
+are also committed to disk before the one we need to wait for. Therefore we
+need to check that all the prior contexts in the committing list are also
+complete before waiting on the one we need to complete. We do this
+synchronisation in the log force code so that we don't need to wait anywhere
+else for such serialisation - it only matters when we do a log force.
+
+The only remaining complexity is that a log force now also has to handle the
+case where the forcing sequence number is the same as the current context. That
+is, we need to flush the CIL and potentially wait for it to complete. This is a
+simple addition to the existing log forcing code to check the sequence numbers
+and push if required. Indeed, placing the current sequence checkpoint flush in
+the log force code enables the current mechanism for issuing synchronous
+transactions to remain untouched (i.e. commit an asynchronous transaction, then
+force the log at the LSN of that transaction) and so the higher level code
+behaves the same regardless of whether delayed logging is being used or not.
+
+Delayed Logging: Checkpoint Log Space Accounting
+------------------------------------------------
+
+The big issue for a checkpoint transaction is the log space reservation for the
+transaction. We don't know how big a checkpoint transaction is going to be
+ahead of time, nor how many log buffers it will take to write out, nor the
+number of split log vector regions are going to be used. We can track the
+amount of log space required as we add items to the commit item list, but we
+still need to reserve the space in the log for the checkpoint.
+
+A typical transaction reserves enough space in the log for the worst case space
+usage of the transaction. The reservation accounts for log record headers,
+transaction and region headers, headers for split regions, buffer tail padding,
+etc. as well as the actual space for all the changed metadata in the
+transaction. While some of this is fixed overhead, much of it is dependent on
+the size of the transaction and the number of regions being logged (the number
+of log vectors in the transaction).
+
+An example of the differences would be logging directory changes versus logging
+inode changes. If you modify lots of inode cores (e.g. ``chmod -R g+w *``), then
+there are lots of transactions that only contain an inode core and an inode log
+format structure. That is, two vectors totaling roughly 150 bytes. If we modify
+10,000 inodes, we have about 1.5MB of metadata to write in 20,000 vectors. Each
+vector is 12 bytes, so the total to be logged is approximately 1.75MB. In
+comparison, if we are logging full directory buffers, they are typically 4KB
+each, so we in 1.5MB of directory buffers we'd have roughly 400 buffers and a
+buffer format structure for each buffer - roughly 800 vectors or 1.51MB total
+space.  From this, it should be obvious that a static log space reservation is
+not particularly flexible and is difficult to select the "optimal value" for
+all workloads.
+
+Further, if we are going to use a static reservation, which bit of the entire
+reservation does it cover? We account for space used by the transaction
+reservation by tracking the space currently used by the object in the CIL and
+then calculating the increase or decrease in space used as the object is
+relogged. This allows for a checkpoint reservation to only have to account for
+log buffer metadata used such as log header records.
+
+However, even using a static reservation for just the log metadata is
+problematic. Typically log record headers use at least 16KB of log space per
+1MB of log space consumed (512 bytes per 32k) and the reservation needs to be
+large enough to handle arbitrary sized checkpoint transactions. This
+reservation needs to be made before the checkpoint is started, and we need to
+be able to reserve the space without sleeping.  For a 8MB checkpoint, we need a
+reservation of around 150KB, which is a non-trivial amount of space.
+
+A static reservation needs to manipulate the log grant counters - we can take a
+permanent reservation on the space, but we still need to make sure we refresh
+the write reservation (the actual space available to the transaction) after
+every checkpoint transaction completion. Unfortunately, if this space is not
+available when required, then the regrant code will sleep waiting for it.
+
+The problem with this is that it can lead to deadlocks as we may need to commit
+checkpoints to be able to free up log space (refer back to the description of
+rolling transactions for an example of this).  Hence we *must* always have
+space available in the log if we are to use static reservations, and that is
+very difficult and complex to arrange. It is possible to do, but there is a
+simpler way.
+
+The simpler way of doing this is tracking the entire log space used by the
+items in the CIL and using this to dynamically calculate the amount of log
+space required by the log metadata. If this log metadata space changes as a
+result of a transaction commit inserting a new memory buffer into the CIL, then
+the difference in space required is removed from the transaction that causes
+the change. Transactions at this level will *always* have enough space
+available in their reservation for this as they have already reserved the
+maximal amount of log metadata space they require, and such a delta reservation
+will always be less than or equal to the maximal amount in the reservation.
+
+Hence we can grow the checkpoint transaction reservation dynamically as items
+are added to the CIL and avoid the need for reserving and regranting log space
+up front. This avoids deadlocks and removes a blocking point from the
+checkpoint flush code.
+
+As mentioned early, transactions can't grow to more than half the size of the
+log. Hence as part of the reservation growing, we need to also check the size
+of the reservation against the maximum allowed transaction size. If we reach
+the maximum threshold, we need to push the CIL to the log. This is effectively
+a "background flush" and is done on demand. This is identical to
+a CIL push triggered by a log force, only that there is no waiting for the
+checkpoint commit to complete. This background push is checked and executed by
+transaction commit code.
+
+If the transaction subsystem goes idle while we still have items in the CIL,
+they will be flushed by the periodic log force issued by the xfssyncd. This log
+force will push the CIL to disk, and if the transaction subsystem stays idle,
+allow the idle log to be covered (effectively marked clean) in exactly the same
+manner that is done for the existing logging method. A discussion point is
+whether this log force needs to be done more frequently than the current rate
+which is once every 30s.
+
+
+Delayed Logging: Log Item Pinning
+---------------------------------
+
+Currently log items are pinned during transaction commit while the items are
+still locked. This happens just after the items are formatted, though it could
+be done any time before the items are unlocked. The result of this mechanism is
+that items get pinned once for every transaction that is committed to the log
+buffers. Hence items that are relogged in the log buffers will have a pin count
+for every outstanding transaction they were dirtied in. When each of these
+transactions is completed, they will unpin the item once. As a result, the item
+only becomes unpinned when all the transactions complete and there are no
+pending transactions. Thus the pinning and unpinning of a log item is symmetric
+as there is a 1:1 relationship with transaction commit and log item completion.
+
+For delayed logging, however, we have an asymmetric transaction commit to
+completion relationship. Every time an object is relogged in the CIL it goes
+through the commit process without a corresponding completion being registered.
+That is, we now have a many-to-one relationship between transaction commit and
+log item completion. The result of this is that pinning and unpinning of the
+log items becomes unbalanced if we retain the "pin on transaction commit, unpin
+on transaction completion" model.
+
+To keep pin/unpin symmetry, the algorithm needs to change to a "pin on
+insertion into the CIL, unpin on checkpoint completion". In other words, the
+pinning and unpinning becomes symmetric around a checkpoint context. We have to
+pin the object the first time it is inserted into the CIL - if it is already in
+the CIL during a transaction commit, then we do not pin it again. Because there
+can be multiple outstanding checkpoint contexts, we can still see elevated pin
+counts, but as each checkpoint completes the pin count will retain the correct
+value according to it's context.
+
+Just to make matters more slightly more complex, this checkpoint level context
+for the pin count means that the pinning of an item must take place under the
+CIL commit/flush lock. If we pin the object outside this lock, we cannot
+guarantee which context the pin count is associated with. This is because of
+the fact pinning the item is dependent on whether the item is present in the
+current CIL or not. If we don't pin the CIL first before we check and pin the
+object, we have a race with CIL being flushed between the check and the pin
+(or not pinning, as the case may be). Hence we must hold the CIL flush/commit
+lock to guarantee that we pin the items correctly.
+
+Delayed Logging: Concurrent Scalability
+---------------------------------------
+
+A fundamental requirement for the CIL is that accesses through transaction
+commits must scale to many concurrent commits. The current transaction commit
+code does not break down even when there are transactions coming from 2048
+processors at once. The current transaction code does not go any faster than if
+there was only one CPU using it, but it does not slow down either.
+
+As a result, the delayed logging transaction commit code needs to be designed
+for concurrency from the ground up. It is obvious that there are serialisation
+points in the design - the three important ones are:
+
+	1. Locking out new transaction commits while flushing the CIL
+	2. Adding items to the CIL and updating item space accounting
+	3. Checkpoint commit ordering
+
+Looking at the transaction commit and CIL flushing interactions, it is clear
+that we have a many-to-one interaction here. That is, the only restriction on
+the number of concurrent transactions that can be trying to commit at once is
+the amount of space available in the log for their reservations. The practical
+limit here is in the order of several hundred concurrent transactions for a
+128MB log, which means that it is generally one per CPU in a machine.
+
+The amount of time a transaction commit needs to hold out a flush is a
+relatively long period of time - the pinning of log items needs to be done
+while we are holding out a CIL flush, so at the moment that means it is held
+across the formatting of the objects into memory buffers (i.e. while memcpy()s
+are in progress). Ultimately a two pass algorithm where the formatting is done
+separately to the pinning of objects could be used to reduce the hold time of
+the transaction commit side.
+
+Because of the number of potential transaction commit side holders, the lock
+really needs to be a sleeping lock - if the CIL flush takes the lock, we do not
+want every other CPU in the machine spinning on the CIL lock. Given that
+flushing the CIL could involve walking a list of tens of thousands of log
+items, it will get held for a significant time and so spin contention is a
+significant concern. Preventing lots of CPUs spinning doing nothing is the
+main reason for choosing a sleeping lock even though nothing in either the
+transaction commit or CIL flush side sleeps with the lock held.
+
+It should also be noted that CIL flushing is also a relatively rare operation
+compared to transaction commit for asynchronous transaction workloads - only
+time will tell if using a read-write semaphore for exclusion will limit
+transaction commit concurrency due to cache line bouncing of the lock on the
+read side.
+
+The second serialisation point is on the transaction commit side where items
+are inserted into the CIL. Because transactions can enter this code
+concurrently, the CIL needs to be protected separately from the above
+commit/flush exclusion. It also needs to be an exclusive lock but it is only
+held for a very short time and so a spin lock is appropriate here. It is
+possible that this lock will become a contention point, but given the short
+hold time once per transaction I think that contention is unlikely.
+
+The final serialisation point is the checkpoint commit record ordering code
+that is run as part of the checkpoint commit and log force sequencing. The code
+path that triggers a CIL flush (i.e. whatever triggers the log force) will enter
+an ordering loop after writing all the log vectors into the log buffers but
+before writing the commit record. This loop walks the list of committing
+checkpoints and needs to block waiting for checkpoints to complete their commit
+record write. As a result it needs a lock and a wait variable. Log force
+sequencing also requires the same lock, list walk, and blocking mechanism to
+ensure completion of checkpoints.
+
+These two sequencing operations can use the mechanism even though the
+events they are waiting for are different. The checkpoint commit record
+sequencing needs to wait until checkpoint contexts contain a commit LSN
+(obtained through completion of a commit record write) while log force
+sequencing needs to wait until previous checkpoint contexts are removed from
+the committing list (i.e. they've completed). A simple wait variable and
+broadcast wakeups (thundering herds) has been used to implement these two
+serialisation queues. They use the same lock as the CIL, too. If we see too
+much contention on the CIL lock, or too many context switches as a result of
+the broadcast wakeups these operations can be put under a new spinlock and
+given separate wait lists to reduce lock contention and the number of processes
+woken by the wrong event.
+
+
+Lifecycle Changes
+-----------------
+
+The existing log item life cycle is as follows::
+
+	1. Transaction allocate
+	2. Transaction reserve
+	3. Lock item
+	4. Join item to transaction
+		If not already attached,
+			Allocate log item
+			Attach log item to owner item
+		Attach log item to transaction
+	5. Modify item
+		Record modifications in log item
+	6. Transaction commit
+		Pin item in memory
+		Format item into log buffer
+		Write commit LSN into transaction
+		Unlock item
+		Attach transaction to log buffer
+
+	<log buffer IO dispatched>
+	<log buffer IO completes>
+
+	7. Transaction completion
+		Mark log item committed
+		Insert log item into AIL
+			Write commit LSN into log item
+		Unpin log item
+	8. AIL traversal
+		Lock item
+		Mark log item clean
+		Flush item to disk
+
+	<item IO completion>
+
+	9. Log item removed from AIL
+		Moves log tail
+		Item unlocked
+
+Essentially, steps 1-6 operate independently from step 7, which is also
+independent of steps 8-9. An item can be locked in steps 1-6 or steps 8-9
+at the same time step 7 is occurring, but only steps 1-6 or 8-9 can occur
+at the same time. If the log item is in the AIL or between steps 6 and 7
+and steps 1-6 are re-entered, then the item is relogged. Only when steps 8-9
+are entered and completed is the object considered clean.
+
+With delayed logging, there are new steps inserted into the life cycle::
+
+	1. Transaction allocate
+	2. Transaction reserve
+	3. Lock item
+	4. Join item to transaction
+		If not already attached,
+			Allocate log item
+			Attach log item to owner item
+		Attach log item to transaction
+	5. Modify item
+		Record modifications in log item
+	6. Transaction commit
+		Pin item in memory if not pinned in CIL
+		Format item into log vector + buffer
+		Attach log vector and buffer to log item
+		Insert log item into CIL
+		Write CIL context sequence into transaction
+		Unlock item
+
+	<next log force>
+
+	7. CIL push
+		lock CIL flush
+		Chain log vectors and buffers together
+		Remove items from CIL
+		unlock CIL flush
+		write log vectors into log
+		sequence commit records
+		attach checkpoint context to log buffer
+
+	<log buffer IO dispatched>
+	<log buffer IO completes>
+
+	8. Checkpoint completion
+		Mark log item committed
+		Insert item into AIL
+			Write commit LSN into log item
+		Unpin log item
+	9. AIL traversal
+		Lock item
+		Mark log item clean
+		Flush item to disk
+	<item IO completion>
+	10. Log item removed from AIL
+		Moves log tail
+		Item unlocked
+
+From this, it can be seen that the only life cycle differences between the two
+logging methods are in the middle of the life cycle - they still have the same
+beginning and end and execution constraints. The only differences are in the
+committing of the log items to the log itself and the completion processing.
+Hence delayed logging should not introduce any constraints on log item
+behaviour, allocation or freeing that don't already exist.
+
+As a result of this zero-impact "insertion" of delayed logging infrastructure
+and the design of the internal structures to avoid on disk format changes, we
+can basically switch between delayed logging and the existing mechanism with a
+mount option. Fundamentally, there is no reason why the log manager would not
+be able to swap methods automatically and transparently depending on load
+characteristics, but this should not be necessary if delayed logging works as
+designed.
diff --git a/Documentation/filesystems/xfs-delayed-logging-design.txt b/Documentation/filesystems/xfs-delayed-logging-design.txt
deleted file mode 100644
index 9a6dd289b17b..000000000000
--- a/Documentation/filesystems/xfs-delayed-logging-design.txt
+++ /dev/null
@@ -1,793 +0,0 @@
-XFS Delayed Logging Design
---------------------------
-
-Introduction to Re-logging in XFS
----------------------------------
-
-XFS logging is a combination of logical and physical logging. Some objects,
-such as inodes and dquots, are logged in logical format where the details
-logged are made up of the changes to in-core structures rather than on-disk
-structures. Other objects - typically buffers - have their physical changes
-logged. The reason for these differences is to reduce the amount of log space
-required for objects that are frequently logged. Some parts of inodes are more
-frequently logged than others, and inodes are typically more frequently logged
-than any other object (except maybe the superblock buffer) so keeping the
-amount of metadata logged low is of prime importance.
-
-The reason that this is such a concern is that XFS allows multiple separate
-modifications to a single object to be carried in the log at any given time.
-This allows the log to avoid needing to flush each change to disk before
-recording a new change to the object. XFS does this via a method called
-"re-logging". Conceptually, this is quite simple - all it requires is that any
-new change to the object is recorded with a *new copy* of all the existing
-changes in the new transaction that is written to the log.
-
-That is, if we have a sequence of changes A through to F, and the object was
-written to disk after change D, we would see in the log the following series
-of transactions, their contents and the log sequence number (LSN) of the
-transaction:
-
-	Transaction		Contents	LSN
-	   A			   A		   X
-	   B			  A+B		  X+n
-	   C			 A+B+C		 X+n+m
-	   D			A+B+C+D		X+n+m+o
-	    <object written to disk>
-	   E			   E		   Y (> X+n+m+o)
-	   F			  E+F		  Y+p
-
-In other words, each time an object is relogged, the new transaction contains
-the aggregation of all the previous changes currently held only in the log.
-
-This relogging technique also allows objects to be moved forward in the log so
-that an object being relogged does not prevent the tail of the log from ever
-moving forward.  This can be seen in the table above by the changing
-(increasing) LSN of each subsequent transaction - the LSN is effectively a
-direct encoding of the location in the log of the transaction.
-
-This relogging is also used to implement long-running, multiple-commit
-transactions.  These transaction are known as rolling transactions, and require
-a special log reservation known as a permanent transaction reservation. A
-typical example of a rolling transaction is the removal of extents from an
-inode which can only be done at a rate of two extents per transaction because
-of reservation size limitations. Hence a rolling extent removal transaction
-keeps relogging the inode and btree buffers as they get modified in each
-removal operation. This keeps them moving forward in the log as the operation
-progresses, ensuring that current operation never gets blocked by itself if the
-log wraps around.
-
-Hence it can be seen that the relogging operation is fundamental to the correct
-working of the XFS journalling subsystem. From the above description, most
-people should be able to see why the XFS metadata operations writes so much to
-the log - repeated operations to the same objects write the same changes to
-the log over and over again. Worse is the fact that objects tend to get
-dirtier as they get relogged, so each subsequent transaction is writing more
-metadata into the log.
-
-Another feature of the XFS transaction subsystem is that most transactions are
-asynchronous. That is, they don't commit to disk until either a log buffer is
-filled (a log buffer can hold multiple transactions) or a synchronous operation
-forces the log buffers holding the transactions to disk. This means that XFS is
-doing aggregation of transactions in memory - batching them, if you like - to
-minimise the impact of the log IO on transaction throughput.
-
-The limitation on asynchronous transaction throughput is the number and size of
-log buffers made available by the log manager. By default there are 8 log
-buffers available and the size of each is 32kB - the size can be increased up
-to 256kB by use of a mount option.
-
-Effectively, this gives us the maximum bound of outstanding metadata changes
-that can be made to the filesystem at any point in time - if all the log
-buffers are full and under IO, then no more transactions can be committed until
-the current batch completes. It is now common for a single current CPU core to
-be to able to issue enough transactions to keep the log buffers full and under
-IO permanently. Hence the XFS journalling subsystem can be considered to be IO
-bound.
-
-Delayed Logging: Concepts
--------------------------
-
-The key thing to note about the asynchronous logging combined with the
-relogging technique XFS uses is that we can be relogging changed objects
-multiple times before they are committed to disk in the log buffers. If we
-return to the previous relogging example, it is entirely possible that
-transactions A through D are committed to disk in the same log buffer.
-
-That is, a single log buffer may contain multiple copies of the same object,
-but only one of those copies needs to be there - the last one "D", as it
-contains all the changes from the previous changes. In other words, we have one
-necessary copy in the log buffer, and three stale copies that are simply
-wasting space. When we are doing repeated operations on the same set of
-objects, these "stale objects" can be over 90% of the space used in the log
-buffers. It is clear that reducing the number of stale objects written to the
-log would greatly reduce the amount of metadata we write to the log, and this
-is the fundamental goal of delayed logging.
-
-From a conceptual point of view, XFS is already doing relogging in memory (where
-memory == log buffer), only it is doing it extremely inefficiently. It is using
-logical to physical formatting to do the relogging because there is no
-infrastructure to keep track of logical changes in memory prior to physically
-formatting the changes in a transaction to the log buffer. Hence we cannot avoid
-accumulating stale objects in the log buffers.
-
-Delayed logging is the name we've given to keeping and tracking transactional
-changes to objects in memory outside the log buffer infrastructure. Because of
-the relogging concept fundamental to the XFS journalling subsystem, this is
-actually relatively easy to do - all the changes to logged items are already
-tracked in the current infrastructure. The big problem is how to accumulate
-them and get them to the log in a consistent, recoverable manner.
-Describing the problems and how they have been solved is the focus of this
-document.
-
-One of the key changes that delayed logging makes to the operation of the
-journalling subsystem is that it disassociates the amount of outstanding
-metadata changes from the size and number of log buffers available. In other
-words, instead of there only being a maximum of 2MB of transaction changes not
-written to the log at any point in time, there may be a much greater amount
-being accumulated in memory. Hence the potential for loss of metadata on a
-crash is much greater than for the existing logging mechanism.
-
-It should be noted that this does not change the guarantee that log recovery
-will result in a consistent filesystem. What it does mean is that as far as the
-recovered filesystem is concerned, there may be many thousands of transactions
-that simply did not occur as a result of the crash. This makes it even more
-important that applications that care about their data use fsync() where they
-need to ensure application level data integrity is maintained.
-
-It should be noted that delayed logging is not an innovative new concept that
-warrants rigorous proofs to determine whether it is correct or not. The method
-of accumulating changes in memory for some period before writing them to the
-log is used effectively in many filesystems including ext3 and ext4. Hence
-no time is spent in this document trying to convince the reader that the
-concept is sound. Instead it is simply considered a "solved problem" and as
-such implementing it in XFS is purely an exercise in software engineering.
-
-The fundamental requirements for delayed logging in XFS are simple:
-
-	1. Reduce the amount of metadata written to the log by at least
-	   an order of magnitude.
-	2. Supply sufficient statistics to validate Requirement #1.
-	3. Supply sufficient new tracing infrastructure to be able to debug
-	   problems with the new code.
-	4. No on-disk format change (metadata or log format).
-	5. Enable and disable with a mount option.
-	6. No performance regressions for synchronous transaction workloads.
-
-Delayed Logging: Design
------------------------
-
-Storing Changes
-
-The problem with accumulating changes at a logical level (i.e. just using the
-existing log item dirty region tracking) is that when it comes to writing the
-changes to the log buffers, we need to ensure that the object we are formatting
-is not changing while we do this. This requires locking the object to prevent
-concurrent modification. Hence flushing the logical changes to the log would
-require us to lock every object, format them, and then unlock them again.
-
-This introduces lots of scope for deadlocks with transactions that are already
-running. For example, a transaction has object A locked and modified, but needs
-the delayed logging tracking lock to commit the transaction. However, the
-flushing thread has the delayed logging tracking lock already held, and is
-trying to get the lock on object A to flush it to the log buffer. This appears
-to be an unsolvable deadlock condition, and it was solving this problem that
-was the barrier to implementing delayed logging for so long.
-
-The solution is relatively simple - it just took a long time to recognise it.
-Put simply, the current logging code formats the changes to each item into an
-vector array that points to the changed regions in the item. The log write code
-simply copies the memory these vectors point to into the log buffer during
-transaction commit while the item is locked in the transaction. Instead of
-using the log buffer as the destination of the formatting code, we can use an
-allocated memory buffer big enough to fit the formatted vector.
-
-If we then copy the vector into the memory buffer and rewrite the vector to
-point to the memory buffer rather than the object itself, we now have a copy of
-the changes in a format that is compatible with the log buffer writing code.
-that does not require us to lock the item to access. This formatting and
-rewriting can all be done while the object is locked during transaction commit,
-resulting in a vector that is transactionally consistent and can be accessed
-without needing to lock the owning item.
-
-Hence we avoid the need to lock items when we need to flush outstanding
-asynchronous transactions to the log. The differences between the existing
-formatting method and the delayed logging formatting can be seen in the
-diagram below.
-
-Current format log vector:
-
-Object    +---------------------------------------------+
-Vector 1      +----+
-Vector 2                    +----+
-Vector 3                                   +----------+
-
-After formatting:
-
-Log Buffer    +-V1-+-V2-+----V3----+
-
-Delayed logging vector:
-
-Object    +---------------------------------------------+
-Vector 1      +----+
-Vector 2                    +----+
-Vector 3                                   +----------+
-
-After formatting:
-
-Memory Buffer +-V1-+-V2-+----V3----+
-Vector 1      +----+
-Vector 2           +----+
-Vector 3                +----------+
-
-The memory buffer and associated vector need to be passed as a single object,
-but still need to be associated with the parent object so if the object is
-relogged we can replace the current memory buffer with a new memory buffer that
-contains the latest changes.
-
-The reason for keeping the vector around after we've formatted the memory
-buffer is to support splitting vectors across log buffer boundaries correctly.
-If we don't keep the vector around, we do not know where the region boundaries
-are in the item, so we'd need a new encapsulation method for regions in the log
-buffer writing (i.e. double encapsulation). This would be an on-disk format
-change and as such is not desirable.  It also means we'd have to write the log
-region headers in the formatting stage, which is problematic as there is per
-region state that needs to be placed into the headers during the log write.
-
-Hence we need to keep the vector, but by attaching the memory buffer to it and
-rewriting the vector addresses to point at the memory buffer we end up with a
-self-describing object that can be passed to the log buffer write code to be
-handled in exactly the same manner as the existing log vectors are handled.
-Hence we avoid needing a new on-disk format to handle items that have been
-relogged in memory.
-
-
-Tracking Changes
-
-Now that we can record transactional changes in memory in a form that allows
-them to be used without limitations, we need to be able to track and accumulate
-them so that they can be written to the log at some later point in time.  The
-log item is the natural place to store this vector and buffer, and also makes sense
-to be the object that is used to track committed objects as it will always
-exist once the object has been included in a transaction.
-
-The log item is already used to track the log items that have been written to
-the log but not yet written to disk. Such log items are considered "active"
-and as such are stored in the Active Item List (AIL) which is a LSN-ordered
-double linked list. Items are inserted into this list during log buffer IO
-completion, after which they are unpinned and can be written to disk. An object
-that is in the AIL can be relogged, which causes the object to be pinned again
-and then moved forward in the AIL when the log buffer IO completes for that
-transaction.
-
-Essentially, this shows that an item that is in the AIL can still be modified
-and relogged, so any tracking must be separate to the AIL infrastructure. As
-such, we cannot reuse the AIL list pointers for tracking committed items, nor
-can we store state in any field that is protected by the AIL lock. Hence the
-committed item tracking needs it's own locks, lists and state fields in the log
-item.
-
-Similar to the AIL, tracking of committed items is done through a new list
-called the Committed Item List (CIL).  The list tracks log items that have been
-committed and have formatted memory buffers attached to them. It tracks objects
-in transaction commit order, so when an object is relogged it is removed from
-it's place in the list and re-inserted at the tail. This is entirely arbitrary
-and done to make it easy for debugging - the last items in the list are the
-ones that are most recently modified. Ordering of the CIL is not necessary for
-transactional integrity (as discussed in the next section) so the ordering is
-done for convenience/sanity of the developers.
-
-
-Delayed Logging: Checkpoints
-
-When we have a log synchronisation event, commonly known as a "log force",
-all the items in the CIL must be written into the log via the log buffers.
-We need to write these items in the order that they exist in the CIL, and they
-need to be written as an atomic transaction. The need for all the objects to be
-written as an atomic transaction comes from the requirements of relogging and
-log replay - all the changes in all the objects in a given transaction must
-either be completely replayed during log recovery, or not replayed at all. If
-a transaction is not replayed because it is not complete in the log, then
-no later transactions should be replayed, either.
-
-To fulfill this requirement, we need to write the entire CIL in a single log
-transaction. Fortunately, the XFS log code has no fixed limit on the size of a
-transaction, nor does the log replay code. The only fundamental limit is that
-the transaction cannot be larger than just under half the size of the log.  The
-reason for this limit is that to find the head and tail of the log, there must
-be at least one complete transaction in the log at any given time. If a
-transaction is larger than half the log, then there is the possibility that a
-crash during the write of a such a transaction could partially overwrite the
-only complete previous transaction in the log. This will result in a recovery
-failure and an inconsistent filesystem and hence we must enforce the maximum
-size of a checkpoint to be slightly less than a half the log.
-
-Apart from this size requirement, a checkpoint transaction looks no different
-to any other transaction - it contains a transaction header, a series of
-formatted log items and a commit record at the tail. From a recovery
-perspective, the checkpoint transaction is also no different - just a lot
-bigger with a lot more items in it. The worst case effect of this is that we
-might need to tune the recovery transaction object hash size.
-
-Because the checkpoint is just another transaction and all the changes to log
-items are stored as log vectors, we can use the existing log buffer writing
-code to write the changes into the log. To do this efficiently, we need to
-minimise the time we hold the CIL locked while writing the checkpoint
-transaction. The current log write code enables us to do this easily with the
-way it separates the writing of the transaction contents (the log vectors) from
-the transaction commit record, but tracking this requires us to have a
-per-checkpoint context that travels through the log write process through to
-checkpoint completion.
-
-Hence a checkpoint has a context that tracks the state of the current
-checkpoint from initiation to checkpoint completion. A new context is initiated
-at the same time a checkpoint transaction is started. That is, when we remove
-all the current items from the CIL during a checkpoint operation, we move all
-those changes into the current checkpoint context. We then initialise a new
-context and attach that to the CIL for aggregation of new transactions.
-
-This allows us to unlock the CIL immediately after transfer of all the
-committed items and effectively allow new transactions to be issued while we
-are formatting the checkpoint into the log. It also allows concurrent
-checkpoints to be written into the log buffers in the case of log force heavy
-workloads, just like the existing transaction commit code does. This, however,
-requires that we strictly order the commit records in the log so that
-checkpoint sequence order is maintained during log replay.
-
-To ensure that we can be writing an item into a checkpoint transaction at
-the same time another transaction modifies the item and inserts the log item
-into the new CIL, then checkpoint transaction commit code cannot use log items
-to store the list of log vectors that need to be written into the transaction.
-Hence log vectors need to be able to be chained together to allow them to be
-detached from the log items. That is, when the CIL is flushed the memory
-buffer and log vector attached to each log item needs to be attached to the
-checkpoint context so that the log item can be released. In diagrammatic form,
-the CIL would look like this before the flush:
-
-	CIL Head
-	   |
-	   V
-	Log Item <-> log vector 1	-> memory buffer
-	   |				-> vector array
-	   V
-	Log Item <-> log vector 2	-> memory buffer
-	   |				-> vector array
-	   V
-	......
-	   |
-	   V
-	Log Item <-> log vector N-1	-> memory buffer
-	   |				-> vector array
-	   V
-	Log Item <-> log vector N	-> memory buffer
-					-> vector array
-
-And after the flush the CIL head is empty, and the checkpoint context log
-vector list would look like:
-
-	Checkpoint Context
-	   |
-	   V
-	log vector 1	-> memory buffer
-	   |		-> vector array
-	   |		-> Log Item
-	   V
-	log vector 2	-> memory buffer
-	   |		-> vector array
-	   |		-> Log Item
-	   V
-	......
-	   |
-	   V
-	log vector N-1	-> memory buffer
-	   |		-> vector array
-	   |		-> Log Item
-	   V
-	log vector N	-> memory buffer
-			-> vector array
-			-> Log Item
-
-Once this transfer is done, the CIL can be unlocked and new transactions can
-start, while the checkpoint flush code works over the log vector chain to
-commit the checkpoint.
-
-Once the checkpoint is written into the log buffers, the checkpoint context is
-attached to the log buffer that the commit record was written to along with a
-completion callback. Log IO completion will call that callback, which can then
-run transaction committed processing for the log items (i.e. insert into AIL
-and unpin) in the log vector chain and then free the log vector chain and
-checkpoint context.
-
-Discussion Point: I am uncertain as to whether the log item is the most
-efficient way to track vectors, even though it seems like the natural way to do
-it. The fact that we walk the log items (in the CIL) just to chain the log
-vectors and break the link between the log item and the log vector means that
-we take a cache line hit for the log item list modification, then another for
-the log vector chaining. If we track by the log vectors, then we only need to
-break the link between the log item and the log vector, which means we should
-dirty only the log item cachelines. Normally I wouldn't be concerned about one
-vs two dirty cachelines except for the fact I've seen upwards of 80,000 log
-vectors in one checkpoint transaction. I'd guess this is a "measure and
-compare" situation that can be done after a working and reviewed implementation
-is in the dev tree....
-
-Delayed Logging: Checkpoint Sequencing
-
-One of the key aspects of the XFS transaction subsystem is that it tags
-committed transactions with the log sequence number of the transaction commit.
-This allows transactions to be issued asynchronously even though there may be
-future operations that cannot be completed until that transaction is fully
-committed to the log. In the rare case that a dependent operation occurs (e.g.
-re-using a freed metadata extent for a data extent), a special, optimised log
-force can be issued to force the dependent transaction to disk immediately.
-
-To do this, transactions need to record the LSN of the commit record of the
-transaction. This LSN comes directly from the log buffer the transaction is
-written into. While this works just fine for the existing transaction
-mechanism, it does not work for delayed logging because transactions are not
-written directly into the log buffers. Hence some other method of sequencing
-transactions is required.
-
-As discussed in the checkpoint section, delayed logging uses per-checkpoint
-contexts, and as such it is simple to assign a sequence number to each
-checkpoint. Because the switching of checkpoint contexts must be done
-atomically, it is simple to ensure that each new context has a monotonically
-increasing sequence number assigned to it without the need for an external
-atomic counter - we can just take the current context sequence number and add
-one to it for the new context.
-
-Then, instead of assigning a log buffer LSN to the transaction commit LSN
-during the commit, we can assign the current checkpoint sequence. This allows
-operations that track transactions that have not yet completed know what
-checkpoint sequence needs to be committed before they can continue. As a
-result, the code that forces the log to a specific LSN now needs to ensure that
-the log forces to a specific checkpoint.
-
-To ensure that we can do this, we need to track all the checkpoint contexts
-that are currently committing to the log. When we flush a checkpoint, the
-context gets added to a "committing" list which can be searched. When a
-checkpoint commit completes, it is removed from the committing list. Because
-the checkpoint context records the LSN of the commit record for the checkpoint,
-we can also wait on the log buffer that contains the commit record, thereby
-using the existing log force mechanisms to execute synchronous forces.
-
-It should be noted that the synchronous forces may need to be extended with
-mitigation algorithms similar to the current log buffer code to allow
-aggregation of multiple synchronous transactions if there are already
-synchronous transactions being flushed. Investigation of the performance of the
-current design is needed before making any decisions here.
-
-The main concern with log forces is to ensure that all the previous checkpoints
-are also committed to disk before the one we need to wait for. Therefore we
-need to check that all the prior contexts in the committing list are also
-complete before waiting on the one we need to complete. We do this
-synchronisation in the log force code so that we don't need to wait anywhere
-else for such serialisation - it only matters when we do a log force.
-
-The only remaining complexity is that a log force now also has to handle the
-case where the forcing sequence number is the same as the current context. That
-is, we need to flush the CIL and potentially wait for it to complete. This is a
-simple addition to the existing log forcing code to check the sequence numbers
-and push if required. Indeed, placing the current sequence checkpoint flush in
-the log force code enables the current mechanism for issuing synchronous
-transactions to remain untouched (i.e. commit an asynchronous transaction, then
-force the log at the LSN of that transaction) and so the higher level code
-behaves the same regardless of whether delayed logging is being used or not.
-
-Delayed Logging: Checkpoint Log Space Accounting
-
-The big issue for a checkpoint transaction is the log space reservation for the
-transaction. We don't know how big a checkpoint transaction is going to be
-ahead of time, nor how many log buffers it will take to write out, nor the
-number of split log vector regions are going to be used. We can track the
-amount of log space required as we add items to the commit item list, but we
-still need to reserve the space in the log for the checkpoint.
-
-A typical transaction reserves enough space in the log for the worst case space
-usage of the transaction. The reservation accounts for log record headers,
-transaction and region headers, headers for split regions, buffer tail padding,
-etc. as well as the actual space for all the changed metadata in the
-transaction. While some of this is fixed overhead, much of it is dependent on
-the size of the transaction and the number of regions being logged (the number
-of log vectors in the transaction).
-
-An example of the differences would be logging directory changes versus logging
-inode changes. If you modify lots of inode cores (e.g. chmod -R g+w *), then
-there are lots of transactions that only contain an inode core and an inode log
-format structure. That is, two vectors totaling roughly 150 bytes. If we modify
-10,000 inodes, we have about 1.5MB of metadata to write in 20,000 vectors. Each
-vector is 12 bytes, so the total to be logged is approximately 1.75MB. In
-comparison, if we are logging full directory buffers, they are typically 4KB
-each, so we in 1.5MB of directory buffers we'd have roughly 400 buffers and a
-buffer format structure for each buffer - roughly 800 vectors or 1.51MB total
-space.  From this, it should be obvious that a static log space reservation is
-not particularly flexible and is difficult to select the "optimal value" for
-all workloads.
-
-Further, if we are going to use a static reservation, which bit of the entire
-reservation does it cover? We account for space used by the transaction
-reservation by tracking the space currently used by the object in the CIL and
-then calculating the increase or decrease in space used as the object is
-relogged. This allows for a checkpoint reservation to only have to account for
-log buffer metadata used such as log header records.
-
-However, even using a static reservation for just the log metadata is
-problematic. Typically log record headers use at least 16KB of log space per
-1MB of log space consumed (512 bytes per 32k) and the reservation needs to be
-large enough to handle arbitrary sized checkpoint transactions. This
-reservation needs to be made before the checkpoint is started, and we need to
-be able to reserve the space without sleeping.  For a 8MB checkpoint, we need a
-reservation of around 150KB, which is a non-trivial amount of space.
-
-A static reservation needs to manipulate the log grant counters - we can take a
-permanent reservation on the space, but we still need to make sure we refresh
-the write reservation (the actual space available to the transaction) after
-every checkpoint transaction completion. Unfortunately, if this space is not
-available when required, then the regrant code will sleep waiting for it.
-
-The problem with this is that it can lead to deadlocks as we may need to commit
-checkpoints to be able to free up log space (refer back to the description of
-rolling transactions for an example of this).  Hence we *must* always have
-space available in the log if we are to use static reservations, and that is
-very difficult and complex to arrange. It is possible to do, but there is a
-simpler way.
-
-The simpler way of doing this is tracking the entire log space used by the
-items in the CIL and using this to dynamically calculate the amount of log
-space required by the log metadata. If this log metadata space changes as a
-result of a transaction commit inserting a new memory buffer into the CIL, then
-the difference in space required is removed from the transaction that causes
-the change. Transactions at this level will *always* have enough space
-available in their reservation for this as they have already reserved the
-maximal amount of log metadata space they require, and such a delta reservation
-will always be less than or equal to the maximal amount in the reservation.
-
-Hence we can grow the checkpoint transaction reservation dynamically as items
-are added to the CIL and avoid the need for reserving and regranting log space
-up front. This avoids deadlocks and removes a blocking point from the
-checkpoint flush code.
-
-As mentioned early, transactions can't grow to more than half the size of the
-log. Hence as part of the reservation growing, we need to also check the size
-of the reservation against the maximum allowed transaction size. If we reach
-the maximum threshold, we need to push the CIL to the log. This is effectively
-a "background flush" and is done on demand. This is identical to
-a CIL push triggered by a log force, only that there is no waiting for the
-checkpoint commit to complete. This background push is checked and executed by
-transaction commit code.
-
-If the transaction subsystem goes idle while we still have items in the CIL,
-they will be flushed by the periodic log force issued by the xfssyncd. This log
-force will push the CIL to disk, and if the transaction subsystem stays idle,
-allow the idle log to be covered (effectively marked clean) in exactly the same
-manner that is done for the existing logging method. A discussion point is
-whether this log force needs to be done more frequently than the current rate
-which is once every 30s.
-
-
-Delayed Logging: Log Item Pinning
-
-Currently log items are pinned during transaction commit while the items are
-still locked. This happens just after the items are formatted, though it could
-be done any time before the items are unlocked. The result of this mechanism is
-that items get pinned once for every transaction that is committed to the log
-buffers. Hence items that are relogged in the log buffers will have a pin count
-for every outstanding transaction they were dirtied in. When each of these
-transactions is completed, they will unpin the item once. As a result, the item
-only becomes unpinned when all the transactions complete and there are no
-pending transactions. Thus the pinning and unpinning of a log item is symmetric
-as there is a 1:1 relationship with transaction commit and log item completion.
-
-For delayed logging, however, we have an asymmetric transaction commit to
-completion relationship. Every time an object is relogged in the CIL it goes
-through the commit process without a corresponding completion being registered.
-That is, we now have a many-to-one relationship between transaction commit and
-log item completion. The result of this is that pinning and unpinning of the
-log items becomes unbalanced if we retain the "pin on transaction commit, unpin
-on transaction completion" model.
-
-To keep pin/unpin symmetry, the algorithm needs to change to a "pin on
-insertion into the CIL, unpin on checkpoint completion". In other words, the
-pinning and unpinning becomes symmetric around a checkpoint context. We have to
-pin the object the first time it is inserted into the CIL - if it is already in
-the CIL during a transaction commit, then we do not pin it again. Because there
-can be multiple outstanding checkpoint contexts, we can still see elevated pin
-counts, but as each checkpoint completes the pin count will retain the correct
-value according to it's context.
-
-Just to make matters more slightly more complex, this checkpoint level context
-for the pin count means that the pinning of an item must take place under the
-CIL commit/flush lock. If we pin the object outside this lock, we cannot
-guarantee which context the pin count is associated with. This is because of
-the fact pinning the item is dependent on whether the item is present in the
-current CIL or not. If we don't pin the CIL first before we check and pin the
-object, we have a race with CIL being flushed between the check and the pin
-(or not pinning, as the case may be). Hence we must hold the CIL flush/commit
-lock to guarantee that we pin the items correctly.
-
-Delayed Logging: Concurrent Scalability
-
-A fundamental requirement for the CIL is that accesses through transaction
-commits must scale to many concurrent commits. The current transaction commit
-code does not break down even when there are transactions coming from 2048
-processors at once. The current transaction code does not go any faster than if
-there was only one CPU using it, but it does not slow down either.
-
-As a result, the delayed logging transaction commit code needs to be designed
-for concurrency from the ground up. It is obvious that there are serialisation
-points in the design - the three important ones are:
-
-	1. Locking out new transaction commits while flushing the CIL
-	2. Adding items to the CIL and updating item space accounting
-	3. Checkpoint commit ordering
-
-Looking at the transaction commit and CIL flushing interactions, it is clear
-that we have a many-to-one interaction here. That is, the only restriction on
-the number of concurrent transactions that can be trying to commit at once is
-the amount of space available in the log for their reservations. The practical
-limit here is in the order of several hundred concurrent transactions for a
-128MB log, which means that it is generally one per CPU in a machine.
-
-The amount of time a transaction commit needs to hold out a flush is a
-relatively long period of time - the pinning of log items needs to be done
-while we are holding out a CIL flush, so at the moment that means it is held
-across the formatting of the objects into memory buffers (i.e. while memcpy()s
-are in progress). Ultimately a two pass algorithm where the formatting is done
-separately to the pinning of objects could be used to reduce the hold time of
-the transaction commit side.
-
-Because of the number of potential transaction commit side holders, the lock
-really needs to be a sleeping lock - if the CIL flush takes the lock, we do not
-want every other CPU in the machine spinning on the CIL lock. Given that
-flushing the CIL could involve walking a list of tens of thousands of log
-items, it will get held for a significant time and so spin contention is a
-significant concern. Preventing lots of CPUs spinning doing nothing is the
-main reason for choosing a sleeping lock even though nothing in either the
-transaction commit or CIL flush side sleeps with the lock held.
-
-It should also be noted that CIL flushing is also a relatively rare operation
-compared to transaction commit for asynchronous transaction workloads - only
-time will tell if using a read-write semaphore for exclusion will limit
-transaction commit concurrency due to cache line bouncing of the lock on the
-read side.
-
-The second serialisation point is on the transaction commit side where items
-are inserted into the CIL. Because transactions can enter this code
-concurrently, the CIL needs to be protected separately from the above
-commit/flush exclusion. It also needs to be an exclusive lock but it is only
-held for a very short time and so a spin lock is appropriate here. It is
-possible that this lock will become a contention point, but given the short
-hold time once per transaction I think that contention is unlikely.
-
-The final serialisation point is the checkpoint commit record ordering code
-that is run as part of the checkpoint commit and log force sequencing. The code
-path that triggers a CIL flush (i.e. whatever triggers the log force) will enter
-an ordering loop after writing all the log vectors into the log buffers but
-before writing the commit record. This loop walks the list of committing
-checkpoints and needs to block waiting for checkpoints to complete their commit
-record write. As a result it needs a lock and a wait variable. Log force
-sequencing also requires the same lock, list walk, and blocking mechanism to
-ensure completion of checkpoints.
-
-These two sequencing operations can use the mechanism even though the
-events they are waiting for are different. The checkpoint commit record
-sequencing needs to wait until checkpoint contexts contain a commit LSN
-(obtained through completion of a commit record write) while log force
-sequencing needs to wait until previous checkpoint contexts are removed from
-the committing list (i.e. they've completed). A simple wait variable and
-broadcast wakeups (thundering herds) has been used to implement these two
-serialisation queues. They use the same lock as the CIL, too. If we see too
-much contention on the CIL lock, or too many context switches as a result of
-the broadcast wakeups these operations can be put under a new spinlock and
-given separate wait lists to reduce lock contention and the number of processes
-woken by the wrong event.
-
-
-Lifecycle Changes
-
-The existing log item life cycle is as follows:
-
-	1. Transaction allocate
-	2. Transaction reserve
-	3. Lock item
-	4. Join item to transaction
-		If not already attached,
-			Allocate log item
-			Attach log item to owner item
-		Attach log item to transaction
-	5. Modify item
-		Record modifications in log item
-	6. Transaction commit
-		Pin item in memory
-		Format item into log buffer
-		Write commit LSN into transaction
-		Unlock item
-		Attach transaction to log buffer
-
-	<log buffer IO dispatched>
-	<log buffer IO completes>
-
-	7. Transaction completion
-		Mark log item committed
-		Insert log item into AIL
-			Write commit LSN into log item
-		Unpin log item
-	8. AIL traversal
-		Lock item
-		Mark log item clean
-		Flush item to disk
-
-	<item IO completion>
-
-	9. Log item removed from AIL
-		Moves log tail
-		Item unlocked
-
-Essentially, steps 1-6 operate independently from step 7, which is also
-independent of steps 8-9. An item can be locked in steps 1-6 or steps 8-9
-at the same time step 7 is occurring, but only steps 1-6 or 8-9 can occur
-at the same time. If the log item is in the AIL or between steps 6 and 7
-and steps 1-6 are re-entered, then the item is relogged. Only when steps 8-9
-are entered and completed is the object considered clean.
-
-With delayed logging, there are new steps inserted into the life cycle:
-
-	1. Transaction allocate
-	2. Transaction reserve
-	3. Lock item
-	4. Join item to transaction
-		If not already attached,
-			Allocate log item
-			Attach log item to owner item
-		Attach log item to transaction
-	5. Modify item
-		Record modifications in log item
-	6. Transaction commit
-		Pin item in memory if not pinned in CIL
-		Format item into log vector + buffer
-		Attach log vector and buffer to log item
-		Insert log item into CIL
-		Write CIL context sequence into transaction
-		Unlock item
-
-	<next log force>
-
-	7. CIL push
-		lock CIL flush
-		Chain log vectors and buffers together
-		Remove items from CIL
-		unlock CIL flush
-		write log vectors into log
-		sequence commit records
-		attach checkpoint context to log buffer
-
-	<log buffer IO dispatched>
-	<log buffer IO completes>
-
-	8. Checkpoint completion
-		Mark log item committed
-		Insert item into AIL
-			Write commit LSN into log item
-		Unpin log item
-	9. AIL traversal
-		Lock item
-		Mark log item clean
-		Flush item to disk
-	<item IO completion>
-	10. Log item removed from AIL
-		Moves log tail
-		Item unlocked
-
-From this, it can be seen that the only life cycle differences between the two
-logging methods are in the middle of the life cycle - they still have the same
-beginning and end and execution constraints. The only differences are in the
-committing of the log items to the log itself and the completion processing.
-Hence delayed logging should not introduce any constraints on log item
-behaviour, allocation or freeing that don't already exist.
-
-As a result of this zero-impact "insertion" of delayed logging infrastructure
-and the design of the internal structures to avoid on disk format changes, we
-can basically switch between delayed logging and the existing mechanism with a
-mount option. Fundamentally, there is no reason why the log manager would not
-be able to swap methods automatically and transparently depending on load
-characteristics, but this should not be necessary if delayed logging works as
-designed.
diff --git a/MAINTAINERS b/MAINTAINERS
index bbf85128ca89..d8f11f8134df 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18533,7 +18533,7 @@ W:	http://xfs.org/
 T:	git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
 F:	Documentation/ABI/testing/sysfs-fs-xfs
 F:	Documentation/admin-guide/xfs.rst
-F:	Documentation/filesystems/xfs-delayed-logging-design.txt
+F:	Documentation/filesystems/xfs-delayed-logging-design.rst
 F:	Documentation/filesystems/xfs-self-describing-metadata.txt
 F:	fs/xfs/
 F:	include/uapi/linux/dqblk_xfs.h
-- 
cgit v1.2.3


From fc2f6fe745a0fa287f68cb8fba04040aaf73fa28 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:20 +0200
Subject: docs: filesystems: convert xfs-self-describing-metadata.txt to ReST

- Add a SPDX header;
- Adjust document and section titles;
- Some whitespace fixes and new line breaks;
- Mark literal blocks as such;
- Add it to filesystems/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/7c26b200e12cfc07b9bd379612452d845a8d1474.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/index.rst                |   1 +
 .../filesystems/xfs-self-describing-metadata.rst   | 352 +++++++++++++++++++++
 .../filesystems/xfs-self-describing-metadata.txt   | 350 --------------------
 MAINTAINERS                                        |   2 +-
 4 files changed, 354 insertions(+), 351 deletions(-)
 create mode 100644 Documentation/filesystems/xfs-self-describing-metadata.rst
 delete mode 100644 Documentation/filesystems/xfs-self-describing-metadata.txt

diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 0fd90f854b33..e242cb75f9f2 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -118,4 +118,5 @@ Documentation for filesystem implementations.
    virtiofs
    vfat
    xfs-delayed-logging-design
+   xfs-self-describing-metadata
    zonefs
diff --git a/Documentation/filesystems/xfs-self-describing-metadata.rst b/Documentation/filesystems/xfs-self-describing-metadata.rst
new file mode 100644
index 000000000000..51cdafe01ab1
--- /dev/null
+++ b/Documentation/filesystems/xfs-self-describing-metadata.rst
@@ -0,0 +1,352 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
+XFS Self Describing Metadata
+============================
+
+Introduction
+============
+
+The largest scalability problem facing XFS is not one of algorithmic
+scalability, but of verification of the filesystem structure. Scalabilty of the
+structures and indexes on disk and the algorithms for iterating them are
+adequate for supporting PB scale filesystems with billions of inodes, however it
+is this very scalability that causes the verification problem.
+
+Almost all metadata on XFS is dynamically allocated. The only fixed location
+metadata is the allocation group headers (SB, AGF, AGFL and AGI), while all
+other metadata structures need to be discovered by walking the filesystem
+structure in different ways. While this is already done by userspace tools for
+validating and repairing the structure, there are limits to what they can
+verify, and this in turn limits the supportable size of an XFS filesystem.
+
+For example, it is entirely possible to manually use xfs_db and a bit of
+scripting to analyse the structure of a 100TB filesystem when trying to
+determine the root cause of a corruption problem, but it is still mainly a
+manual task of verifying that things like single bit errors or misplaced writes
+weren't the ultimate cause of a corruption event. It may take a few hours to a
+few days to perform such forensic analysis, so for at this scale root cause
+analysis is entirely possible.
+
+However, if we scale the filesystem up to 1PB, we now have 10x as much metadata
+to analyse and so that analysis blows out towards weeks/months of forensic work.
+Most of the analysis work is slow and tedious, so as the amount of analysis goes
+up, the more likely that the cause will be lost in the noise.  Hence the primary
+concern for supporting PB scale filesystems is minimising the time and effort
+required for basic forensic analysis of the filesystem structure.
+
+
+Self Describing Metadata
+========================
+
+One of the problems with the current metadata format is that apart from the
+magic number in the metadata block, we have no other way of identifying what it
+is supposed to be. We can't even identify if it is the right place. Put simply,
+you can't look at a single metadata block in isolation and say "yes, it is
+supposed to be there and the contents are valid".
+
+Hence most of the time spent on forensic analysis is spent doing basic
+verification of metadata values, looking for values that are in range (and hence
+not detected by automated verification checks) but are not correct. Finding and
+understanding how things like cross linked block lists (e.g. sibling
+pointers in a btree end up with loops in them) are the key to understanding what
+went wrong, but it is impossible to tell what order the blocks were linked into
+each other or written to disk after the fact.
+
+Hence we need to record more information into the metadata to allow us to
+quickly determine if the metadata is intact and can be ignored for the purpose
+of analysis. We can't protect against every possible type of error, but we can
+ensure that common types of errors are easily detectable.  Hence the concept of
+self describing metadata.
+
+The first, fundamental requirement of self describing metadata is that the
+metadata object contains some form of unique identifier in a well known
+location. This allows us to identify the expected contents of the block and
+hence parse and verify the metadata object. IF we can't independently identify
+the type of metadata in the object, then the metadata doesn't describe itself
+very well at all!
+
+Luckily, almost all XFS metadata has magic numbers embedded already - only the
+AGFL, remote symlinks and remote attribute blocks do not contain identifying
+magic numbers. Hence we can change the on-disk format of all these objects to
+add more identifying information and detect this simply by changing the magic
+numbers in the metadata objects. That is, if it has the current magic number,
+the metadata isn't self identifying. If it contains a new magic number, it is
+self identifying and we can do much more expansive automated verification of the
+metadata object at runtime, during forensic analysis or repair.
+
+As a primary concern, self describing metadata needs some form of overall
+integrity checking. We cannot trust the metadata if we cannot verify that it has
+not been changed as a result of external influences. Hence we need some form of
+integrity check, and this is done by adding CRC32c validation to the metadata
+block. If we can verify the block contains the metadata it was intended to
+contain, a large amount of the manual verification work can be skipped.
+
+CRC32c was selected as metadata cannot be more than 64k in length in XFS and
+hence a 32 bit CRC is more than sufficient to detect multi-bit errors in
+metadata blocks. CRC32c is also now hardware accelerated on common CPUs so it is
+fast. So while CRC32c is not the strongest of possible integrity checks that
+could be used, it is more than sufficient for our needs and has relatively
+little overhead. Adding support for larger integrity fields and/or algorithms
+does really provide any extra value over CRC32c, but it does add a lot of
+complexity and so there is no provision for changing the integrity checking
+mechanism.
+
+Self describing metadata needs to contain enough information so that the
+metadata block can be verified as being in the correct place without needing to
+look at any other metadata. This means it needs to contain location information.
+Just adding a block number to the metadata is not sufficient to protect against
+mis-directed writes - a write might be misdirected to the wrong LUN and so be
+written to the "correct block" of the wrong filesystem. Hence location
+information must contain a filesystem identifier as well as a block number.
+
+Another key information point in forensic analysis is knowing who the metadata
+block belongs to. We already know the type, the location, that it is valid
+and/or corrupted, and how long ago that it was last modified. Knowing the owner
+of the block is important as it allows us to find other related metadata to
+determine the scope of the corruption. For example, if we have a extent btree
+object, we don't know what inode it belongs to and hence have to walk the entire
+filesystem to find the owner of the block. Worse, the corruption could mean that
+no owner can be found (i.e. it's an orphan block), and so without an owner field
+in the metadata we have no idea of the scope of the corruption. If we have an
+owner field in the metadata object, we can immediately do top down validation to
+determine the scope of the problem.
+
+Different types of metadata have different owner identifiers. For example,
+directory, attribute and extent tree blocks are all owned by an inode, while
+freespace btree blocks are owned by an allocation group. Hence the size and
+contents of the owner field are determined by the type of metadata object we are
+looking at.  The owner information can also identify misplaced writes (e.g.
+freespace btree block written to the wrong AG).
+
+Self describing metadata also needs to contain some indication of when it was
+written to the filesystem. One of the key information points when doing forensic
+analysis is how recently the block was modified. Correlation of set of corrupted
+metadata blocks based on modification times is important as it can indicate
+whether the corruptions are related, whether there's been multiple corruption
+events that lead to the eventual failure, and even whether there are corruptions
+present that the run-time verification is not detecting.
+
+For example, we can determine whether a metadata object is supposed to be free
+space or still allocated if it is still referenced by its owner by looking at
+when the free space btree block that contains the block was last written
+compared to when the metadata object itself was last written.  If the free space
+block is more recent than the object and the object's owner, then there is a
+very good chance that the block should have been removed from the owner.
+
+To provide this "written timestamp", each metadata block gets the Log Sequence
+Number (LSN) of the most recent transaction it was modified on written into it.
+This number will always increase over the life of the filesystem, and the only
+thing that resets it is running xfs_repair on the filesystem. Further, by use of
+the LSN we can tell if the corrupted metadata all belonged to the same log
+checkpoint and hence have some idea of how much modification occurred between
+the first and last instance of corrupt metadata on disk and, further, how much
+modification occurred between the corruption being written and when it was
+detected.
+
+Runtime Validation
+==================
+
+Validation of self-describing metadata takes place at runtime in two places:
+
+	- immediately after a successful read from disk
+	- immediately prior to write IO submission
+
+The verification is completely stateless - it is done independently of the
+modification process, and seeks only to check that the metadata is what it says
+it is and that the metadata fields are within bounds and internally consistent.
+As such, we cannot catch all types of corruption that can occur within a block
+as there may be certain limitations that operational state enforces of the
+metadata, or there may be corruption of interblock relationships (e.g. corrupted
+sibling pointer lists). Hence we still need stateful checking in the main code
+body, but in general most of the per-field validation is handled by the
+verifiers.
+
+For read verification, the caller needs to specify the expected type of metadata
+that it should see, and the IO completion process verifies that the metadata
+object matches what was expected. If the verification process fails, then it
+marks the object being read as EFSCORRUPTED. The caller needs to catch this
+error (same as for IO errors), and if it needs to take special action due to a
+verification error it can do so by catching the EFSCORRUPTED error value. If we
+need more discrimination of error type at higher levels, we can define new
+error numbers for different errors as necessary.
+
+The first step in read verification is checking the magic number and determining
+whether CRC validating is necessary. If it is, the CRC32c is calculated and
+compared against the value stored in the object itself. Once this is validated,
+further checks are made against the location information, followed by extensive
+object specific metadata validation. If any of these checks fail, then the
+buffer is considered corrupt and the EFSCORRUPTED error is set appropriately.
+
+Write verification is the opposite of the read verification - first the object
+is extensively verified and if it is OK we then update the LSN from the last
+modification made to the object, After this, we calculate the CRC and insert it
+into the object. Once this is done the write IO is allowed to continue. If any
+error occurs during this process, the buffer is again marked with a EFSCORRUPTED
+error for the higher layers to catch.
+
+Structures
+==========
+
+A typical on-disk structure needs to contain the following information::
+
+    struct xfs_ondisk_hdr {
+	    __be32  magic;		/* magic number */
+	    __be32  crc;		/* CRC, not logged */
+	    uuid_t  uuid;		/* filesystem identifier */
+	    __be64  owner;		/* parent object */
+	    __be64  blkno;		/* location on disk */
+	    __be64  lsn;		/* last modification in log, not logged */
+    };
+
+Depending on the metadata, this information may be part of a header structure
+separate to the metadata contents, or may be distributed through an existing
+structure. The latter occurs with metadata that already contains some of this
+information, such as the superblock and AG headers.
+
+Other metadata may have different formats for the information, but the same
+level of information is generally provided. For example:
+
+	- short btree blocks have a 32 bit owner (ag number) and a 32 bit block
+	  number for location. The two of these combined provide the same
+	  information as @owner and @blkno in eh above structure, but using 8
+	  bytes less space on disk.
+
+	- directory/attribute node blocks have a 16 bit magic number, and the
+	  header that contains the magic number has other information in it as
+	  well. hence the additional metadata headers change the overall format
+	  of the metadata.
+
+A typical buffer read verifier is structured as follows::
+
+    #define XFS_FOO_CRC_OFF		offsetof(struct xfs_ondisk_hdr, crc)
+
+    static void
+    xfs_foo_read_verify(
+	    struct xfs_buf	*bp)
+    {
+	struct xfs_mount *mp = bp->b_mount;
+
+	    if ((xfs_sb_version_hascrc(&mp->m_sb) &&
+		!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+					    XFS_FOO_CRC_OFF)) ||
+		!xfs_foo_verify(bp)) {
+		    XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+		    xfs_buf_ioerror(bp, EFSCORRUPTED);
+	    }
+    }
+
+The code ensures that the CRC is only checked if the filesystem has CRCs enabled
+by checking the superblock of the feature bit, and then if the CRC verifies OK
+(or is not needed) it verifies the actual contents of the block.
+
+The verifier function will take a couple of different forms, depending on
+whether the magic number can be used to determine the format of the block. In
+the case it can't, the code is structured as follows::
+
+    static bool
+    xfs_foo_verify(
+	    struct xfs_buf		*bp)
+    {
+	    struct xfs_mount	*mp = bp->b_mount;
+	    struct xfs_ondisk_hdr	*hdr = bp->b_addr;
+
+	    if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC))
+		    return false;
+
+	    if (!xfs_sb_version_hascrc(&mp->m_sb)) {
+		    if (!uuid_equal(&hdr->uuid, &mp->m_sb.sb_uuid))
+			    return false;
+		    if (bp->b_bn != be64_to_cpu(hdr->blkno))
+			    return false;
+		    if (hdr->owner == 0)
+			    return false;
+	    }
+
+	    /* object specific verification checks here */
+
+	    return true;
+    }
+
+If there are different magic numbers for the different formats, the verifier
+will look like::
+
+    static bool
+    xfs_foo_verify(
+	    struct xfs_buf		*bp)
+    {
+	    struct xfs_mount	*mp = bp->b_mount;
+	    struct xfs_ondisk_hdr	*hdr = bp->b_addr;
+
+	    if (hdr->magic == cpu_to_be32(XFS_FOO_CRC_MAGIC)) {
+		    if (!uuid_equal(&hdr->uuid, &mp->m_sb.sb_uuid))
+			    return false;
+		    if (bp->b_bn != be64_to_cpu(hdr->blkno))
+			    return false;
+		    if (hdr->owner == 0)
+			    return false;
+	    } else if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC))
+		    return false;
+
+	    /* object specific verification checks here */
+
+	    return true;
+    }
+
+Write verifiers are very similar to the read verifiers, they just do things in
+the opposite order to the read verifiers. A typical write verifier::
+
+    static void
+    xfs_foo_write_verify(
+	    struct xfs_buf	*bp)
+    {
+	    struct xfs_mount	*mp = bp->b_mount;
+	    struct xfs_buf_log_item	*bip = bp->b_fspriv;
+
+	    if (!xfs_foo_verify(bp)) {
+		    XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+		    xfs_buf_ioerror(bp, EFSCORRUPTED);
+		    return;
+	    }
+
+	    if (!xfs_sb_version_hascrc(&mp->m_sb))
+		    return;
+
+
+	    if (bip) {
+		    struct xfs_ondisk_hdr	*hdr = bp->b_addr;
+		    hdr->lsn = cpu_to_be64(bip->bli_item.li_lsn);
+	    }
+	    xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_FOO_CRC_OFF);
+    }
+
+This will verify the internal structure of the metadata before we go any
+further, detecting corruptions that have occurred as the metadata has been
+modified in memory. If the metadata verifies OK, and CRCs are enabled, we then
+update the LSN field (when it was last modified) and calculate the CRC on the
+metadata. Once this is done, we can issue the IO.
+
+Inodes and Dquots
+=================
+
+Inodes and dquots are special snowflakes. They have per-object CRC and
+self-identifiers, but they are packed so that there are multiple objects per
+buffer. Hence we do not use per-buffer verifiers to do the work of per-object
+verification and CRC calculations. The per-buffer verifiers simply perform basic
+identification of the buffer - that they contain inodes or dquots, and that
+there are magic numbers in all the expected spots. All further CRC and
+verification checks are done when each inode is read from or written back to the
+buffer.
+
+The structure of the verifiers and the identifiers checks is very similar to the
+buffer code described above. The only difference is where they are called. For
+example, inode read verification is done in xfs_iread() when the inode is first
+read out of the buffer and the struct xfs_inode is instantiated. The inode is
+already extensively verified during writeback in xfs_iflush_int, so the only
+addition here is to add the LSN and CRC to the inode as it is copied back into
+the buffer.
+
+XXX: inode unlinked list modification doesn't recalculate the inode CRC! None of
+the unlinked list modifications check or update CRCs, neither during unlink nor
+log recovery. So, it's gone unnoticed until now. This won't matter immediately -
+repair will probably complain about it - but it needs to be fixed.
diff --git a/Documentation/filesystems/xfs-self-describing-metadata.txt b/Documentation/filesystems/xfs-self-describing-metadata.txt
deleted file mode 100644
index 8db0121d0980..000000000000
--- a/Documentation/filesystems/xfs-self-describing-metadata.txt
+++ /dev/null
@@ -1,350 +0,0 @@
-XFS Self Describing Metadata
-----------------------------
-
-Introduction
-------------
-
-The largest scalability problem facing XFS is not one of algorithmic
-scalability, but of verification of the filesystem structure. Scalabilty of the
-structures and indexes on disk and the algorithms for iterating them are
-adequate for supporting PB scale filesystems with billions of inodes, however it
-is this very scalability that causes the verification problem.
-
-Almost all metadata on XFS is dynamically allocated. The only fixed location
-metadata is the allocation group headers (SB, AGF, AGFL and AGI), while all
-other metadata structures need to be discovered by walking the filesystem
-structure in different ways. While this is already done by userspace tools for
-validating and repairing the structure, there are limits to what they can
-verify, and this in turn limits the supportable size of an XFS filesystem.
-
-For example, it is entirely possible to manually use xfs_db and a bit of
-scripting to analyse the structure of a 100TB filesystem when trying to
-determine the root cause of a corruption problem, but it is still mainly a
-manual task of verifying that things like single bit errors or misplaced writes
-weren't the ultimate cause of a corruption event. It may take a few hours to a
-few days to perform such forensic analysis, so for at this scale root cause
-analysis is entirely possible.
-
-However, if we scale the filesystem up to 1PB, we now have 10x as much metadata
-to analyse and so that analysis blows out towards weeks/months of forensic work.
-Most of the analysis work is slow and tedious, so as the amount of analysis goes
-up, the more likely that the cause will be lost in the noise.  Hence the primary
-concern for supporting PB scale filesystems is minimising the time and effort
-required for basic forensic analysis of the filesystem structure.
-
-
-Self Describing Metadata
-------------------------
-
-One of the problems with the current metadata format is that apart from the
-magic number in the metadata block, we have no other way of identifying what it
-is supposed to be. We can't even identify if it is the right place. Put simply,
-you can't look at a single metadata block in isolation and say "yes, it is
-supposed to be there and the contents are valid".
-
-Hence most of the time spent on forensic analysis is spent doing basic
-verification of metadata values, looking for values that are in range (and hence
-not detected by automated verification checks) but are not correct. Finding and
-understanding how things like cross linked block lists (e.g. sibling
-pointers in a btree end up with loops in them) are the key to understanding what
-went wrong, but it is impossible to tell what order the blocks were linked into
-each other or written to disk after the fact.
-
-Hence we need to record more information into the metadata to allow us to
-quickly determine if the metadata is intact and can be ignored for the purpose
-of analysis. We can't protect against every possible type of error, but we can
-ensure that common types of errors are easily detectable.  Hence the concept of
-self describing metadata.
-
-The first, fundamental requirement of self describing metadata is that the
-metadata object contains some form of unique identifier in a well known
-location. This allows us to identify the expected contents of the block and
-hence parse and verify the metadata object. IF we can't independently identify
-the type of metadata in the object, then the metadata doesn't describe itself
-very well at all!
-
-Luckily, almost all XFS metadata has magic numbers embedded already - only the
-AGFL, remote symlinks and remote attribute blocks do not contain identifying
-magic numbers. Hence we can change the on-disk format of all these objects to
-add more identifying information and detect this simply by changing the magic
-numbers in the metadata objects. That is, if it has the current magic number,
-the metadata isn't self identifying. If it contains a new magic number, it is
-self identifying and we can do much more expansive automated verification of the
-metadata object at runtime, during forensic analysis or repair.
-
-As a primary concern, self describing metadata needs some form of overall
-integrity checking. We cannot trust the metadata if we cannot verify that it has
-not been changed as a result of external influences. Hence we need some form of
-integrity check, and this is done by adding CRC32c validation to the metadata
-block. If we can verify the block contains the metadata it was intended to
-contain, a large amount of the manual verification work can be skipped.
-
-CRC32c was selected as metadata cannot be more than 64k in length in XFS and
-hence a 32 bit CRC is more than sufficient to detect multi-bit errors in
-metadata blocks. CRC32c is also now hardware accelerated on common CPUs so it is
-fast. So while CRC32c is not the strongest of possible integrity checks that
-could be used, it is more than sufficient for our needs and has relatively
-little overhead. Adding support for larger integrity fields and/or algorithms
-does really provide any extra value over CRC32c, but it does add a lot of
-complexity and so there is no provision for changing the integrity checking
-mechanism.
-
-Self describing metadata needs to contain enough information so that the
-metadata block can be verified as being in the correct place without needing to
-look at any other metadata. This means it needs to contain location information.
-Just adding a block number to the metadata is not sufficient to protect against
-mis-directed writes - a write might be misdirected to the wrong LUN and so be
-written to the "correct block" of the wrong filesystem. Hence location
-information must contain a filesystem identifier as well as a block number.
-
-Another key information point in forensic analysis is knowing who the metadata
-block belongs to. We already know the type, the location, that it is valid
-and/or corrupted, and how long ago that it was last modified. Knowing the owner
-of the block is important as it allows us to find other related metadata to
-determine the scope of the corruption. For example, if we have a extent btree
-object, we don't know what inode it belongs to and hence have to walk the entire
-filesystem to find the owner of the block. Worse, the corruption could mean that
-no owner can be found (i.e. it's an orphan block), and so without an owner field
-in the metadata we have no idea of the scope of the corruption. If we have an
-owner field in the metadata object, we can immediately do top down validation to
-determine the scope of the problem.
-
-Different types of metadata have different owner identifiers. For example,
-directory, attribute and extent tree blocks are all owned by an inode, while
-freespace btree blocks are owned by an allocation group. Hence the size and
-contents of the owner field are determined by the type of metadata object we are
-looking at.  The owner information can also identify misplaced writes (e.g.
-freespace btree block written to the wrong AG).
-
-Self describing metadata also needs to contain some indication of when it was
-written to the filesystem. One of the key information points when doing forensic
-analysis is how recently the block was modified. Correlation of set of corrupted
-metadata blocks based on modification times is important as it can indicate
-whether the corruptions are related, whether there's been multiple corruption
-events that lead to the eventual failure, and even whether there are corruptions
-present that the run-time verification is not detecting.
-
-For example, we can determine whether a metadata object is supposed to be free
-space or still allocated if it is still referenced by its owner by looking at
-when the free space btree block that contains the block was last written
-compared to when the metadata object itself was last written.  If the free space
-block is more recent than the object and the object's owner, then there is a
-very good chance that the block should have been removed from the owner.
-
-To provide this "written timestamp", each metadata block gets the Log Sequence
-Number (LSN) of the most recent transaction it was modified on written into it.
-This number will always increase over the life of the filesystem, and the only
-thing that resets it is running xfs_repair on the filesystem. Further, by use of
-the LSN we can tell if the corrupted metadata all belonged to the same log
-checkpoint and hence have some idea of how much modification occurred between
-the first and last instance of corrupt metadata on disk and, further, how much
-modification occurred between the corruption being written and when it was
-detected.
-
-Runtime Validation
-------------------
-
-Validation of self-describing metadata takes place at runtime in two places:
-
-	- immediately after a successful read from disk
-	- immediately prior to write IO submission
-
-The verification is completely stateless - it is done independently of the
-modification process, and seeks only to check that the metadata is what it says
-it is and that the metadata fields are within bounds and internally consistent.
-As such, we cannot catch all types of corruption that can occur within a block
-as there may be certain limitations that operational state enforces of the
-metadata, or there may be corruption of interblock relationships (e.g. corrupted
-sibling pointer lists). Hence we still need stateful checking in the main code
-body, but in general most of the per-field validation is handled by the
-verifiers.
-
-For read verification, the caller needs to specify the expected type of metadata
-that it should see, and the IO completion process verifies that the metadata
-object matches what was expected. If the verification process fails, then it
-marks the object being read as EFSCORRUPTED. The caller needs to catch this
-error (same as for IO errors), and if it needs to take special action due to a
-verification error it can do so by catching the EFSCORRUPTED error value. If we
-need more discrimination of error type at higher levels, we can define new
-error numbers for different errors as necessary.
-
-The first step in read verification is checking the magic number and determining
-whether CRC validating is necessary. If it is, the CRC32c is calculated and
-compared against the value stored in the object itself. Once this is validated,
-further checks are made against the location information, followed by extensive
-object specific metadata validation. If any of these checks fail, then the
-buffer is considered corrupt and the EFSCORRUPTED error is set appropriately.
-
-Write verification is the opposite of the read verification - first the object
-is extensively verified and if it is OK we then update the LSN from the last
-modification made to the object, After this, we calculate the CRC and insert it
-into the object. Once this is done the write IO is allowed to continue. If any
-error occurs during this process, the buffer is again marked with a EFSCORRUPTED
-error for the higher layers to catch.
-
-Structures
-----------
-
-A typical on-disk structure needs to contain the following information:
-
-struct xfs_ondisk_hdr {
-        __be32  magic;		/* magic number */
-        __be32  crc;		/* CRC, not logged */
-        uuid_t  uuid;		/* filesystem identifier */
-        __be64  owner;		/* parent object */
-        __be64  blkno;		/* location on disk */
-        __be64  lsn;		/* last modification in log, not logged */
-};
-
-Depending on the metadata, this information may be part of a header structure
-separate to the metadata contents, or may be distributed through an existing
-structure. The latter occurs with metadata that already contains some of this
-information, such as the superblock and AG headers.
-
-Other metadata may have different formats for the information, but the same
-level of information is generally provided. For example:
-
-	- short btree blocks have a 32 bit owner (ag number) and a 32 bit block
-	  number for location. The two of these combined provide the same
-	  information as @owner and @blkno in eh above structure, but using 8
-	  bytes less space on disk.
-
-	- directory/attribute node blocks have a 16 bit magic number, and the
-	  header that contains the magic number has other information in it as
-	  well. hence the additional metadata headers change the overall format
-	  of the metadata.
-
-A typical buffer read verifier is structured as follows:
-
-#define XFS_FOO_CRC_OFF		offsetof(struct xfs_ondisk_hdr, crc)
-
-static void
-xfs_foo_read_verify(
-	struct xfs_buf	*bp)
-{
-       struct xfs_mount *mp = bp->b_mount;
-
-        if ((xfs_sb_version_hascrc(&mp->m_sb) &&
-             !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-					XFS_FOO_CRC_OFF)) ||
-            !xfs_foo_verify(bp)) {
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
-                xfs_buf_ioerror(bp, EFSCORRUPTED);
-        }
-}
-
-The code ensures that the CRC is only checked if the filesystem has CRCs enabled
-by checking the superblock of the feature bit, and then if the CRC verifies OK
-(or is not needed) it verifies the actual contents of the block.
-
-The verifier function will take a couple of different forms, depending on
-whether the magic number can be used to determine the format of the block. In
-the case it can't, the code is structured as follows:
-
-static bool
-xfs_foo_verify(
-	struct xfs_buf		*bp)
-{
-        struct xfs_mount	*mp = bp->b_mount;
-        struct xfs_ondisk_hdr	*hdr = bp->b_addr;
-
-        if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC))
-                return false;
-
-        if (!xfs_sb_version_hascrc(&mp->m_sb)) {
-		if (!uuid_equal(&hdr->uuid, &mp->m_sb.sb_uuid))
-			return false;
-		if (bp->b_bn != be64_to_cpu(hdr->blkno))
-			return false;
-		if (hdr->owner == 0)
-			return false;
-	}
-
-	/* object specific verification checks here */
-
-        return true;
-}
-
-If there are different magic numbers for the different formats, the verifier
-will look like:
-
-static bool
-xfs_foo_verify(
-	struct xfs_buf		*bp)
-{
-        struct xfs_mount	*mp = bp->b_mount;
-        struct xfs_ondisk_hdr	*hdr = bp->b_addr;
-
-        if (hdr->magic == cpu_to_be32(XFS_FOO_CRC_MAGIC)) {
-		if (!uuid_equal(&hdr->uuid, &mp->m_sb.sb_uuid))
-			return false;
-		if (bp->b_bn != be64_to_cpu(hdr->blkno))
-			return false;
-		if (hdr->owner == 0)
-			return false;
-	} else if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC))
-		return false;
-
-	/* object specific verification checks here */
-
-        return true;
-}
-
-Write verifiers are very similar to the read verifiers, they just do things in
-the opposite order to the read verifiers. A typical write verifier:
-
-static void
-xfs_foo_write_verify(
-	struct xfs_buf	*bp)
-{
-	struct xfs_mount	*mp = bp->b_mount;
-	struct xfs_buf_log_item	*bip = bp->b_fspriv;
-
-	if (!xfs_foo_verify(bp)) {
-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
-		xfs_buf_ioerror(bp, EFSCORRUPTED);
-		return;
-	}
-
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
-		return;
-
-
-	if (bip) {
-		struct xfs_ondisk_hdr	*hdr = bp->b_addr;
-		hdr->lsn = cpu_to_be64(bip->bli_item.li_lsn);
-	}
-	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_FOO_CRC_OFF);
-}
-
-This will verify the internal structure of the metadata before we go any
-further, detecting corruptions that have occurred as the metadata has been
-modified in memory. If the metadata verifies OK, and CRCs are enabled, we then
-update the LSN field (when it was last modified) and calculate the CRC on the
-metadata. Once this is done, we can issue the IO.
-
-Inodes and Dquots
------------------
-
-Inodes and dquots are special snowflakes. They have per-object CRC and
-self-identifiers, but they are packed so that there are multiple objects per
-buffer. Hence we do not use per-buffer verifiers to do the work of per-object
-verification and CRC calculations. The per-buffer verifiers simply perform basic
-identification of the buffer - that they contain inodes or dquots, and that
-there are magic numbers in all the expected spots. All further CRC and
-verification checks are done when each inode is read from or written back to the
-buffer.
-
-The structure of the verifiers and the identifiers checks is very similar to the
-buffer code described above. The only difference is where they are called. For
-example, inode read verification is done in xfs_iread() when the inode is first
-read out of the buffer and the struct xfs_inode is instantiated. The inode is
-already extensively verified during writeback in xfs_iflush_int, so the only
-addition here is to add the LSN and CRC to the inode as it is copied back into
-the buffer.
-
-XXX: inode unlinked list modification doesn't recalculate the inode CRC! None of
-the unlinked list modifications check or update CRCs, neither during unlink nor
-log recovery. So, it's gone unnoticed until now. This won't matter immediately -
-repair will probably complain about it - but it needs to be fixed.
-
diff --git a/MAINTAINERS b/MAINTAINERS
index d8f11f8134df..ffd14379f81e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18534,7 +18534,7 @@ T:	git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
 F:	Documentation/ABI/testing/sysfs-fs-xfs
 F:	Documentation/admin-guide/xfs.rst
 F:	Documentation/filesystems/xfs-delayed-logging-design.rst
-F:	Documentation/filesystems/xfs-self-describing-metadata.txt
+F:	Documentation/filesystems/xfs-self-describing-metadata.rst
 F:	fs/xfs/
 F:	include/uapi/linux/dqblk_xfs.h
 F:	include/uapi/linux/fsmap.h
-- 
cgit v1.2.3


From 982649915d626bba8753c04e994e5a6650523c64 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 27 Apr 2020 23:17:21 +0200
Subject: docs: filesystems: convert configfs.txt to ReST

- Add a SPDX header;
- Adjust document and section titles;
- Use copyright symbol;
- Some whitespace fixes and new line breaks;
- Mark literal blocks as such;
- Add it to filesystems/index.rst.

Also, as this file is alone on its own dir, and it doesn't
seem too likely that other documents will follow it, let's
move it to the filesystems/ root documentation dir.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/c2424ec2ad4d735751434ff7f52144c44aa02d5a.1588021877.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/configfs.rst          | 535 ++++++++++++++++++++++++
 Documentation/filesystems/configfs/configfs.txt | 508 ----------------------
 Documentation/filesystems/index.rst             |   1 +
 Documentation/iio/iio_configfs.rst              |   2 +-
 Documentation/usb/gadget_configfs.rst           |   4 +-
 fs/configfs/inode.c                             |   2 +-
 fs/configfs/item.c                              |   2 +-
 include/linux/configfs.h                        |   2 +-
 8 files changed, 542 insertions(+), 514 deletions(-)
 create mode 100644 Documentation/filesystems/configfs.rst
 delete mode 100644 Documentation/filesystems/configfs/configfs.txt

diff --git a/Documentation/filesystems/configfs.rst b/Documentation/filesystems/configfs.rst
new file mode 100644
index 000000000000..f8941954c667
--- /dev/null
+++ b/Documentation/filesystems/configfs.rst
@@ -0,0 +1,535 @@
+=======================================================
+Configfs - Userspace-driven Kernel Object Configuration
+=======================================================
+
+Joel Becker <joel.becker@oracle.com>
+
+Updated: 31 March 2005
+
+Copyright (c) 2005 Oracle Corporation,
+	Joel Becker <joel.becker@oracle.com>
+
+
+What is configfs?
+=================
+
+configfs is a ram-based filesystem that provides the converse of
+sysfs's functionality.  Where sysfs is a filesystem-based view of
+kernel objects, configfs is a filesystem-based manager of kernel
+objects, or config_items.
+
+With sysfs, an object is created in kernel (for example, when a device
+is discovered) and it is registered with sysfs.  Its attributes then
+appear in sysfs, allowing userspace to read the attributes via
+readdir(3)/read(2).  It may allow some attributes to be modified via
+write(2).  The important point is that the object is created and
+destroyed in kernel, the kernel controls the lifecycle of the sysfs
+representation, and sysfs is merely a window on all this.
+
+A configfs config_item is created via an explicit userspace operation:
+mkdir(2).  It is destroyed via rmdir(2).  The attributes appear at
+mkdir(2) time, and can be read or modified via read(2) and write(2).
+As with sysfs, readdir(3) queries the list of items and/or attributes.
+symlink(2) can be used to group items together.  Unlike sysfs, the
+lifetime of the representation is completely driven by userspace.  The
+kernel modules backing the items must respond to this.
+
+Both sysfs and configfs can and should exist together on the same
+system.  One is not a replacement for the other.
+
+Using configfs
+==============
+
+configfs can be compiled as a module or into the kernel.  You can access
+it by doing::
+
+	mount -t configfs none /config
+
+The configfs tree will be empty unless client modules are also loaded.
+These are modules that register their item types with configfs as
+subsystems.  Once a client subsystem is loaded, it will appear as a
+subdirectory (or more than one) under /config.  Like sysfs, the
+configfs tree is always there, whether mounted on /config or not.
+
+An item is created via mkdir(2).  The item's attributes will also
+appear at this time.  readdir(3) can determine what the attributes are,
+read(2) can query their default values, and write(2) can store new
+values.  Don't mix more than one attribute in one attribute file.
+
+There are two types of configfs attributes:
+
+* Normal attributes, which similar to sysfs attributes, are small ASCII text
+  files, with a maximum size of one page (PAGE_SIZE, 4096 on i386).  Preferably
+  only one value per file should be used, and the same caveats from sysfs apply.
+  Configfs expects write(2) to store the entire buffer at once.  When writing to
+  normal configfs attributes, userspace processes should first read the entire
+  file, modify the portions they wish to change, and then write the entire
+  buffer back.
+
+* Binary attributes, which are somewhat similar to sysfs binary attributes,
+  but with a few slight changes to semantics.  The PAGE_SIZE limitation does not
+  apply, but the whole binary item must fit in single kernel vmalloc'ed buffer.
+  The write(2) calls from user space are buffered, and the attributes'
+  write_bin_attribute method will be invoked on the final close, therefore it is
+  imperative for user-space to check the return code of close(2) in order to
+  verify that the operation finished successfully.
+  To avoid a malicious user OOMing the kernel, there's a per-binary attribute
+  maximum buffer value.
+
+When an item needs to be destroyed, remove it with rmdir(2).  An
+item cannot be destroyed if any other item has a link to it (via
+symlink(2)).  Links can be removed via unlink(2).
+
+Configuring FakeNBD: an Example
+===============================
+
+Imagine there's a Network Block Device (NBD) driver that allows you to
+access remote block devices.  Call it FakeNBD.  FakeNBD uses configfs
+for its configuration.  Obviously, there will be a nice program that
+sysadmins use to configure FakeNBD, but somehow that program has to tell
+the driver about it.  Here's where configfs comes in.
+
+When the FakeNBD driver is loaded, it registers itself with configfs.
+readdir(3) sees this just fine::
+
+	# ls /config
+	fakenbd
+
+A fakenbd connection can be created with mkdir(2).  The name is
+arbitrary, but likely the tool will make some use of the name.  Perhaps
+it is a uuid or a disk name::
+
+	# mkdir /config/fakenbd/disk1
+	# ls /config/fakenbd/disk1
+	target device rw
+
+The target attribute contains the IP address of the server FakeNBD will
+connect to.  The device attribute is the device on the server.
+Predictably, the rw attribute determines whether the connection is
+read-only or read-write::
+
+	# echo 10.0.0.1 > /config/fakenbd/disk1/target
+	# echo /dev/sda1 > /config/fakenbd/disk1/device
+	# echo 1 > /config/fakenbd/disk1/rw
+
+That's it.  That's all there is.  Now the device is configured, via the
+shell no less.
+
+Coding With configfs
+====================
+
+Every object in configfs is a config_item.  A config_item reflects an
+object in the subsystem.  It has attributes that match values on that
+object.  configfs handles the filesystem representation of that object
+and its attributes, allowing the subsystem to ignore all but the
+basic show/store interaction.
+
+Items are created and destroyed inside a config_group.  A group is a
+collection of items that share the same attributes and operations.
+Items are created by mkdir(2) and removed by rmdir(2), but configfs
+handles that.  The group has a set of operations to perform these tasks
+
+A subsystem is the top level of a client module.  During initialization,
+the client module registers the subsystem with configfs, the subsystem
+appears as a directory at the top of the configfs filesystem.  A
+subsystem is also a config_group, and can do everything a config_group
+can.
+
+struct config_item
+==================
+
+::
+
+	struct config_item {
+		char                    *ci_name;
+		char                    ci_namebuf[UOBJ_NAME_LEN];
+		struct kref             ci_kref;
+		struct list_head        ci_entry;
+		struct config_item      *ci_parent;
+		struct config_group     *ci_group;
+		struct config_item_type *ci_type;
+		struct dentry           *ci_dentry;
+	};
+
+	void config_item_init(struct config_item *);
+	void config_item_init_type_name(struct config_item *,
+					const char *name,
+					struct config_item_type *type);
+	struct config_item *config_item_get(struct config_item *);
+	void config_item_put(struct config_item *);
+
+Generally, struct config_item is embedded in a container structure, a
+structure that actually represents what the subsystem is doing.  The
+config_item portion of that structure is how the object interacts with
+configfs.
+
+Whether statically defined in a source file or created by a parent
+config_group, a config_item must have one of the _init() functions
+called on it.  This initializes the reference count and sets up the
+appropriate fields.
+
+All users of a config_item should have a reference on it via
+config_item_get(), and drop the reference when they are done via
+config_item_put().
+
+By itself, a config_item cannot do much more than appear in configfs.
+Usually a subsystem wants the item to display and/or store attributes,
+among other things.  For that, it needs a type.
+
+struct config_item_type
+=======================
+
+::
+
+	struct configfs_item_operations {
+		void (*release)(struct config_item *);
+		int (*allow_link)(struct config_item *src,
+				  struct config_item *target);
+		void (*drop_link)(struct config_item *src,
+				 struct config_item *target);
+	};
+
+	struct config_item_type {
+		struct module                           *ct_owner;
+		struct configfs_item_operations         *ct_item_ops;
+		struct configfs_group_operations        *ct_group_ops;
+		struct configfs_attribute               **ct_attrs;
+		struct configfs_bin_attribute		**ct_bin_attrs;
+	};
+
+The most basic function of a config_item_type is to define what
+operations can be performed on a config_item.  All items that have been
+allocated dynamically will need to provide the ct_item_ops->release()
+method.  This method is called when the config_item's reference count
+reaches zero.
+
+struct configfs_attribute
+=========================
+
+::
+
+	struct configfs_attribute {
+		char                    *ca_name;
+		struct module           *ca_owner;
+		umode_t                  ca_mode;
+		ssize_t (*show)(struct config_item *, char *);
+		ssize_t (*store)(struct config_item *, const char *, size_t);
+	};
+
+When a config_item wants an attribute to appear as a file in the item's
+configfs directory, it must define a configfs_attribute describing it.
+It then adds the attribute to the NULL-terminated array
+config_item_type->ct_attrs.  When the item appears in configfs, the
+attribute file will appear with the configfs_attribute->ca_name
+filename.  configfs_attribute->ca_mode specifies the file permissions.
+
+If an attribute is readable and provides a ->show method, that method will
+be called whenever userspace asks for a read(2) on the attribute.  If an
+attribute is writable and provides a ->store  method, that method will be
+be called whenever userspace asks for a write(2) on the attribute.
+
+struct configfs_bin_attribute
+=============================
+
+::
+
+	struct configfs_bin_attribute {
+		struct configfs_attribute	cb_attr;
+		void				*cb_private;
+		size_t				cb_max_size;
+	};
+
+The binary attribute is used when the one needs to use binary blob to
+appear as the contents of a file in the item's configfs directory.
+To do so add the binary attribute to the NULL-terminated array
+config_item_type->ct_bin_attrs, and the item appears in configfs, the
+attribute file will appear with the configfs_bin_attribute->cb_attr.ca_name
+filename.  configfs_bin_attribute->cb_attr.ca_mode specifies the file
+permissions.
+The cb_private member is provided for use by the driver, while the
+cb_max_size member specifies the maximum amount of vmalloc buffer
+to be used.
+
+If binary attribute is readable and the config_item provides a
+ct_item_ops->read_bin_attribute() method, that method will be called
+whenever userspace asks for a read(2) on the attribute.  The converse
+will happen for write(2). The reads/writes are bufferred so only a
+single read/write will occur; the attributes' need not concern itself
+with it.
+
+struct config_group
+===================
+
+A config_item cannot live in a vacuum.  The only way one can be created
+is via mkdir(2) on a config_group.  This will trigger creation of a
+child item::
+
+	struct config_group {
+		struct config_item		cg_item;
+		struct list_head		cg_children;
+		struct configfs_subsystem 	*cg_subsys;
+		struct list_head		default_groups;
+		struct list_head		group_entry;
+	};
+
+	void config_group_init(struct config_group *group);
+	void config_group_init_type_name(struct config_group *group,
+					 const char *name,
+					 struct config_item_type *type);
+
+
+The config_group structure contains a config_item.  Properly configuring
+that item means that a group can behave as an item in its own right.
+However, it can do more: it can create child items or groups.  This is
+accomplished via the group operations specified on the group's
+config_item_type::
+
+	struct configfs_group_operations {
+		struct config_item *(*make_item)(struct config_group *group,
+						 const char *name);
+		struct config_group *(*make_group)(struct config_group *group,
+						   const char *name);
+		int (*commit_item)(struct config_item *item);
+		void (*disconnect_notify)(struct config_group *group,
+					  struct config_item *item);
+		void (*drop_item)(struct config_group *group,
+				  struct config_item *item);
+	};
+
+A group creates child items by providing the
+ct_group_ops->make_item() method.  If provided, this method is called from
+mkdir(2) in the group's directory.  The subsystem allocates a new
+config_item (or more likely, its container structure), initializes it,
+and returns it to configfs.  Configfs will then populate the filesystem
+tree to reflect the new item.
+
+If the subsystem wants the child to be a group itself, the subsystem
+provides ct_group_ops->make_group().  Everything else behaves the same,
+using the group _init() functions on the group.
+
+Finally, when userspace calls rmdir(2) on the item or group,
+ct_group_ops->drop_item() is called.  As a config_group is also a
+config_item, it is not necessary for a separate drop_group() method.
+The subsystem must config_item_put() the reference that was initialized
+upon item allocation.  If a subsystem has no work to do, it may omit
+the ct_group_ops->drop_item() method, and configfs will call
+config_item_put() on the item on behalf of the subsystem.
+
+Important:
+   drop_item() is void, and as such cannot fail.  When rmdir(2)
+   is called, configfs WILL remove the item from the filesystem tree
+   (assuming that it has no children to keep it busy).  The subsystem is
+   responsible for responding to this.  If the subsystem has references to
+   the item in other threads, the memory is safe.  It may take some time
+   for the item to actually disappear from the subsystem's usage.  But it
+   is gone from configfs.
+
+When drop_item() is called, the item's linkage has already been torn
+down.  It no longer has a reference on its parent and has no place in
+the item hierarchy.  If a client needs to do some cleanup before this
+teardown happens, the subsystem can implement the
+ct_group_ops->disconnect_notify() method.  The method is called after
+configfs has removed the item from the filesystem view but before the
+item is removed from its parent group.  Like drop_item(),
+disconnect_notify() is void and cannot fail.  Client subsystems should
+not drop any references here, as they still must do it in drop_item().
+
+A config_group cannot be removed while it still has child items.  This
+is implemented in the configfs rmdir(2) code.  ->drop_item() will not be
+called, as the item has not been dropped.  rmdir(2) will fail, as the
+directory is not empty.
+
+struct configfs_subsystem
+=========================
+
+A subsystem must register itself, usually at module_init time.  This
+tells configfs to make the subsystem appear in the file tree::
+
+	struct configfs_subsystem {
+		struct config_group	su_group;
+		struct mutex		su_mutex;
+	};
+
+	int configfs_register_subsystem(struct configfs_subsystem *subsys);
+	void configfs_unregister_subsystem(struct configfs_subsystem *subsys);
+
+A subsystem consists of a toplevel config_group and a mutex.
+The group is where child config_items are created.  For a subsystem,
+this group is usually defined statically.  Before calling
+configfs_register_subsystem(), the subsystem must have initialized the
+group via the usual group _init() functions, and it must also have
+initialized the mutex.
+
+When the register call returns, the subsystem is live, and it
+will be visible via configfs.  At that point, mkdir(2) can be called and
+the subsystem must be ready for it.
+
+An Example
+==========
+
+The best example of these basic concepts is the simple_children
+subsystem/group and the simple_child item in
+samples/configfs/configfs_sample.c. It shows a trivial object displaying
+and storing an attribute, and a simple group creating and destroying
+these children.
+
+Hierarchy Navigation and the Subsystem Mutex
+============================================
+
+There is an extra bonus that configfs provides.  The config_groups and
+config_items are arranged in a hierarchy due to the fact that they
+appear in a filesystem.  A subsystem is NEVER to touch the filesystem
+parts, but the subsystem might be interested in this hierarchy.  For
+this reason, the hierarchy is mirrored via the config_group->cg_children
+and config_item->ci_parent structure members.
+
+A subsystem can navigate the cg_children list and the ci_parent pointer
+to see the tree created by the subsystem.  This can race with configfs'
+management of the hierarchy, so configfs uses the subsystem mutex to
+protect modifications.  Whenever a subsystem wants to navigate the
+hierarchy, it must do so under the protection of the subsystem
+mutex.
+
+A subsystem will be prevented from acquiring the mutex while a newly
+allocated item has not been linked into this hierarchy.   Similarly, it
+will not be able to acquire the mutex while a dropping item has not
+yet been unlinked.  This means that an item's ci_parent pointer will
+never be NULL while the item is in configfs, and that an item will only
+be in its parent's cg_children list for the same duration.  This allows
+a subsystem to trust ci_parent and cg_children while they hold the
+mutex.
+
+Item Aggregation Via symlink(2)
+===============================
+
+configfs provides a simple group via the group->item parent/child
+relationship.  Often, however, a larger environment requires aggregation
+outside of the parent/child connection.  This is implemented via
+symlink(2).
+
+A config_item may provide the ct_item_ops->allow_link() and
+ct_item_ops->drop_link() methods.  If the ->allow_link() method exists,
+symlink(2) may be called with the config_item as the source of the link.
+These links are only allowed between configfs config_items.  Any
+symlink(2) attempt outside the configfs filesystem will be denied.
+
+When symlink(2) is called, the source config_item's ->allow_link()
+method is called with itself and a target item.  If the source item
+allows linking to target item, it returns 0.  A source item may wish to
+reject a link if it only wants links to a certain type of object (say,
+in its own subsystem).
+
+When unlink(2) is called on the symbolic link, the source item is
+notified via the ->drop_link() method.  Like the ->drop_item() method,
+this is a void function and cannot return failure.  The subsystem is
+responsible for responding to the change.
+
+A config_item cannot be removed while it links to any other item, nor
+can it be removed while an item links to it.  Dangling symlinks are not
+allowed in configfs.
+
+Automatically Created Subgroups
+===============================
+
+A new config_group may want to have two types of child config_items.
+While this could be codified by magic names in ->make_item(), it is much
+more explicit to have a method whereby userspace sees this divergence.
+
+Rather than have a group where some items behave differently than
+others, configfs provides a method whereby one or many subgroups are
+automatically created inside the parent at its creation.  Thus,
+mkdir("parent") results in "parent", "parent/subgroup1", up through
+"parent/subgroupN".  Items of type 1 can now be created in
+"parent/subgroup1", and items of type N can be created in
+"parent/subgroupN".
+
+These automatic subgroups, or default groups, do not preclude other
+children of the parent group.  If ct_group_ops->make_group() exists,
+other child groups can be created on the parent group directly.
+
+A configfs subsystem specifies default groups by adding them using the
+configfs_add_default_group() function to the parent config_group
+structure.  Each added group is populated in the configfs tree at the same
+time as the parent group.  Similarly, they are removed at the same time
+as the parent.  No extra notification is provided.  When a ->drop_item()
+method call notifies the subsystem the parent group is going away, it
+also means every default group child associated with that parent group.
+
+As a consequence of this, default groups cannot be removed directly via
+rmdir(2).  They also are not considered when rmdir(2) on the parent
+group is checking for children.
+
+Dependent Subsystems
+====================
+
+Sometimes other drivers depend on particular configfs items.  For
+example, ocfs2 mounts depend on a heartbeat region item.  If that
+region item is removed with rmdir(2), the ocfs2 mount must BUG or go
+readonly.  Not happy.
+
+configfs provides two additional API calls: configfs_depend_item() and
+configfs_undepend_item().  A client driver can call
+configfs_depend_item() on an existing item to tell configfs that it is
+depended on.  configfs will then return -EBUSY from rmdir(2) for that
+item.  When the item is no longer depended on, the client driver calls
+configfs_undepend_item() on it.
+
+These API cannot be called underneath any configfs callbacks, as
+they will conflict.  They can block and allocate.  A client driver
+probably shouldn't calling them of its own gumption.  Rather it should
+be providing an API that external subsystems call.
+
+How does this work?  Imagine the ocfs2 mount process.  When it mounts,
+it asks for a heartbeat region item.  This is done via a call into the
+heartbeat code.  Inside the heartbeat code, the region item is looked
+up.  Here, the heartbeat code calls configfs_depend_item().  If it
+succeeds, then heartbeat knows the region is safe to give to ocfs2.
+If it fails, it was being torn down anyway, and heartbeat can gracefully
+pass up an error.
+
+Committable Items
+=================
+
+Note:
+     Committable items are currently unimplemented.
+
+Some config_items cannot have a valid initial state.  That is, no
+default values can be specified for the item's attributes such that the
+item can do its work.  Userspace must configure one or more attributes,
+after which the subsystem can start whatever entity this item
+represents.
+
+Consider the FakeNBD device from above.  Without a target address *and*
+a target device, the subsystem has no idea what block device to import.
+The simple example assumes that the subsystem merely waits until all the
+appropriate attributes are configured, and then connects.  This will,
+indeed, work, but now every attribute store must check if the attributes
+are initialized.  Every attribute store must fire off the connection if
+that condition is met.
+
+Far better would be an explicit action notifying the subsystem that the
+config_item is ready to go.  More importantly, an explicit action allows
+the subsystem to provide feedback as to whether the attributes are
+initialized in a way that makes sense.  configfs provides this as
+committable items.
+
+configfs still uses only normal filesystem operations.  An item is
+committed via rename(2).  The item is moved from a directory where it
+can be modified to a directory where it cannot.
+
+Any group that provides the ct_group_ops->commit_item() method has
+committable items.  When this group appears in configfs, mkdir(2) will
+not work directly in the group.  Instead, the group will have two
+subdirectories: "live" and "pending".  The "live" directory does not
+support mkdir(2) or rmdir(2) either.  It only allows rename(2).  The
+"pending" directory does allow mkdir(2) and rmdir(2).  An item is
+created in the "pending" directory.  Its attributes can be modified at
+will.  Userspace commits the item by renaming it into the "live"
+directory.  At this point, the subsystem receives the ->commit_item()
+callback.  If all required attributes are filled to satisfaction, the
+method returns zero and the item is moved to the "live" directory.
+
+As rmdir(2) does not work in the "live" directory, an item must be
+shutdown, or "uncommitted".  Again, this is done via rename(2), this
+time from the "live" directory back to the "pending" one.  The subsystem
+is notified by the ct_group_ops->uncommit_object() method.
diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt
deleted file mode 100644
index 16e606c11f40..000000000000
--- a/Documentation/filesystems/configfs/configfs.txt
+++ /dev/null
@@ -1,508 +0,0 @@
-
-configfs - Userspace-driven kernel object configuration.
-
-Joel Becker <joel.becker@oracle.com>
-
-Updated: 31 March 2005
-
-Copyright (c) 2005 Oracle Corporation,
-	Joel Becker <joel.becker@oracle.com>
-
-
-[What is configfs?]
-
-configfs is a ram-based filesystem that provides the converse of
-sysfs's functionality.  Where sysfs is a filesystem-based view of
-kernel objects, configfs is a filesystem-based manager of kernel
-objects, or config_items.
-
-With sysfs, an object is created in kernel (for example, when a device
-is discovered) and it is registered with sysfs.  Its attributes then
-appear in sysfs, allowing userspace to read the attributes via
-readdir(3)/read(2).  It may allow some attributes to be modified via
-write(2).  The important point is that the object is created and
-destroyed in kernel, the kernel controls the lifecycle of the sysfs
-representation, and sysfs is merely a window on all this.
-
-A configfs config_item is created via an explicit userspace operation:
-mkdir(2).  It is destroyed via rmdir(2).  The attributes appear at
-mkdir(2) time, and can be read or modified via read(2) and write(2).
-As with sysfs, readdir(3) queries the list of items and/or attributes.
-symlink(2) can be used to group items together.  Unlike sysfs, the
-lifetime of the representation is completely driven by userspace.  The
-kernel modules backing the items must respond to this.
-
-Both sysfs and configfs can and should exist together on the same
-system.  One is not a replacement for the other.
-
-[Using configfs]
-
-configfs can be compiled as a module or into the kernel.  You can access
-it by doing
-
-	mount -t configfs none /config
-
-The configfs tree will be empty unless client modules are also loaded.
-These are modules that register their item types with configfs as
-subsystems.  Once a client subsystem is loaded, it will appear as a
-subdirectory (or more than one) under /config.  Like sysfs, the
-configfs tree is always there, whether mounted on /config or not.
-
-An item is created via mkdir(2).  The item's attributes will also
-appear at this time.  readdir(3) can determine what the attributes are,
-read(2) can query their default values, and write(2) can store new
-values.  Don't mix more than one attribute in one attribute file.
-
-There are two types of configfs attributes:
-
-* Normal attributes, which similar to sysfs attributes, are small ASCII text
-files, with a maximum size of one page (PAGE_SIZE, 4096 on i386).  Preferably
-only one value per file should be used, and the same caveats from sysfs apply.
-Configfs expects write(2) to store the entire buffer at once.  When writing to
-normal configfs attributes, userspace processes should first read the entire
-file, modify the portions they wish to change, and then write the entire
-buffer back.
-
-* Binary attributes, which are somewhat similar to sysfs binary attributes,
-but with a few slight changes to semantics.  The PAGE_SIZE limitation does not
-apply, but the whole binary item must fit in single kernel vmalloc'ed buffer.
-The write(2) calls from user space are buffered, and the attributes'
-write_bin_attribute method will be invoked on the final close, therefore it is
-imperative for user-space to check the return code of close(2) in order to
-verify that the operation finished successfully.
-To avoid a malicious user OOMing the kernel, there's a per-binary attribute
-maximum buffer value.
-
-When an item needs to be destroyed, remove it with rmdir(2).  An
-item cannot be destroyed if any other item has a link to it (via
-symlink(2)).  Links can be removed via unlink(2).
-
-[Configuring FakeNBD: an Example]
-
-Imagine there's a Network Block Device (NBD) driver that allows you to
-access remote block devices.  Call it FakeNBD.  FakeNBD uses configfs
-for its configuration.  Obviously, there will be a nice program that
-sysadmins use to configure FakeNBD, but somehow that program has to tell
-the driver about it.  Here's where configfs comes in.
-
-When the FakeNBD driver is loaded, it registers itself with configfs.
-readdir(3) sees this just fine:
-
-	# ls /config
-	fakenbd
-
-A fakenbd connection can be created with mkdir(2).  The name is
-arbitrary, but likely the tool will make some use of the name.  Perhaps
-it is a uuid or a disk name:
-
-	# mkdir /config/fakenbd/disk1
-	# ls /config/fakenbd/disk1
-	target device rw
-
-The target attribute contains the IP address of the server FakeNBD will
-connect to.  The device attribute is the device on the server.
-Predictably, the rw attribute determines whether the connection is
-read-only or read-write.
-
-	# echo 10.0.0.1 > /config/fakenbd/disk1/target
-	# echo /dev/sda1 > /config/fakenbd/disk1/device
-	# echo 1 > /config/fakenbd/disk1/rw
-
-That's it.  That's all there is.  Now the device is configured, via the
-shell no less.
-
-[Coding With configfs]
-
-Every object in configfs is a config_item.  A config_item reflects an
-object in the subsystem.  It has attributes that match values on that
-object.  configfs handles the filesystem representation of that object
-and its attributes, allowing the subsystem to ignore all but the
-basic show/store interaction.
-
-Items are created and destroyed inside a config_group.  A group is a
-collection of items that share the same attributes and operations.
-Items are created by mkdir(2) and removed by rmdir(2), but configfs
-handles that.  The group has a set of operations to perform these tasks
-
-A subsystem is the top level of a client module.  During initialization,
-the client module registers the subsystem with configfs, the subsystem
-appears as a directory at the top of the configfs filesystem.  A
-subsystem is also a config_group, and can do everything a config_group
-can.
-
-[struct config_item]
-
-	struct config_item {
-		char                    *ci_name;
-		char                    ci_namebuf[UOBJ_NAME_LEN];
-		struct kref             ci_kref;
-		struct list_head        ci_entry;
-		struct config_item      *ci_parent;
-		struct config_group     *ci_group;
-		struct config_item_type *ci_type;
-		struct dentry           *ci_dentry;
-	};
-
-	void config_item_init(struct config_item *);
-	void config_item_init_type_name(struct config_item *,
-					const char *name,
-					struct config_item_type *type);
-	struct config_item *config_item_get(struct config_item *);
-	void config_item_put(struct config_item *);
-
-Generally, struct config_item is embedded in a container structure, a
-structure that actually represents what the subsystem is doing.  The
-config_item portion of that structure is how the object interacts with
-configfs.
-
-Whether statically defined in a source file or created by a parent
-config_group, a config_item must have one of the _init() functions
-called on it.  This initializes the reference count and sets up the
-appropriate fields.
-
-All users of a config_item should have a reference on it via
-config_item_get(), and drop the reference when they are done via
-config_item_put().
-
-By itself, a config_item cannot do much more than appear in configfs.
-Usually a subsystem wants the item to display and/or store attributes,
-among other things.  For that, it needs a type.
-
-[struct config_item_type]
-
-	struct configfs_item_operations {
-		void (*release)(struct config_item *);
-		int (*allow_link)(struct config_item *src,
-				  struct config_item *target);
-		void (*drop_link)(struct config_item *src,
-				 struct config_item *target);
-	};
-
-	struct config_item_type {
-		struct module                           *ct_owner;
-		struct configfs_item_operations         *ct_item_ops;
-		struct configfs_group_operations        *ct_group_ops;
-		struct configfs_attribute               **ct_attrs;
-		struct configfs_bin_attribute		**ct_bin_attrs;
-	};
-
-The most basic function of a config_item_type is to define what
-operations can be performed on a config_item.  All items that have been
-allocated dynamically will need to provide the ct_item_ops->release()
-method.  This method is called when the config_item's reference count
-reaches zero.
-
-[struct configfs_attribute]
-
-	struct configfs_attribute {
-		char                    *ca_name;
-		struct module           *ca_owner;
-		umode_t                  ca_mode;
-		ssize_t (*show)(struct config_item *, char *);
-		ssize_t (*store)(struct config_item *, const char *, size_t);
-	};
-
-When a config_item wants an attribute to appear as a file in the item's
-configfs directory, it must define a configfs_attribute describing it.
-It then adds the attribute to the NULL-terminated array
-config_item_type->ct_attrs.  When the item appears in configfs, the
-attribute file will appear with the configfs_attribute->ca_name
-filename.  configfs_attribute->ca_mode specifies the file permissions.
-
-If an attribute is readable and provides a ->show method, that method will
-be called whenever userspace asks for a read(2) on the attribute.  If an
-attribute is writable and provides a ->store  method, that method will be
-be called whenever userspace asks for a write(2) on the attribute.
-
-[struct configfs_bin_attribute]
-
-	struct configfs_bin_attribute {
-		struct configfs_attribute	cb_attr;
-		void				*cb_private;
-		size_t				cb_max_size;
-	};
-
-The binary attribute is used when the one needs to use binary blob to
-appear as the contents of a file in the item's configfs directory.
-To do so add the binary attribute to the NULL-terminated array
-config_item_type->ct_bin_attrs, and the item appears in configfs, the
-attribute file will appear with the configfs_bin_attribute->cb_attr.ca_name
-filename.  configfs_bin_attribute->cb_attr.ca_mode specifies the file
-permissions.
-The cb_private member is provided for use by the driver, while the
-cb_max_size member specifies the maximum amount of vmalloc buffer
-to be used.
-
-If binary attribute is readable and the config_item provides a
-ct_item_ops->read_bin_attribute() method, that method will be called
-whenever userspace asks for a read(2) on the attribute.  The converse
-will happen for write(2). The reads/writes are bufferred so only a
-single read/write will occur; the attributes' need not concern itself
-with it.
-
-[struct config_group]
-
-A config_item cannot live in a vacuum.  The only way one can be created
-is via mkdir(2) on a config_group.  This will trigger creation of a
-child item.
-
-	struct config_group {
-		struct config_item		cg_item;
-		struct list_head		cg_children;
-		struct configfs_subsystem 	*cg_subsys;
-		struct list_head		default_groups;
-		struct list_head		group_entry;
-	};
-
-	void config_group_init(struct config_group *group);
-	void config_group_init_type_name(struct config_group *group,
-					 const char *name,
-					 struct config_item_type *type);
-
-
-The config_group structure contains a config_item.  Properly configuring
-that item means that a group can behave as an item in its own right.
-However, it can do more: it can create child items or groups.  This is
-accomplished via the group operations specified on the group's
-config_item_type.
-
-	struct configfs_group_operations {
-		struct config_item *(*make_item)(struct config_group *group,
-						 const char *name);
-		struct config_group *(*make_group)(struct config_group *group,
-						   const char *name);
-		int (*commit_item)(struct config_item *item);
-		void (*disconnect_notify)(struct config_group *group,
-					  struct config_item *item);
-		void (*drop_item)(struct config_group *group,
-				  struct config_item *item);
-	};
-
-A group creates child items by providing the
-ct_group_ops->make_item() method.  If provided, this method is called from mkdir(2) in the group's directory.  The subsystem allocates a new
-config_item (or more likely, its container structure), initializes it,
-and returns it to configfs.  Configfs will then populate the filesystem
-tree to reflect the new item.
-
-If the subsystem wants the child to be a group itself, the subsystem
-provides ct_group_ops->make_group().  Everything else behaves the same,
-using the group _init() functions on the group.
-
-Finally, when userspace calls rmdir(2) on the item or group,
-ct_group_ops->drop_item() is called.  As a config_group is also a
-config_item, it is not necessary for a separate drop_group() method.
-The subsystem must config_item_put() the reference that was initialized
-upon item allocation.  If a subsystem has no work to do, it may omit
-the ct_group_ops->drop_item() method, and configfs will call
-config_item_put() on the item on behalf of the subsystem.
-
-IMPORTANT: drop_item() is void, and as such cannot fail.  When rmdir(2)
-is called, configfs WILL remove the item from the filesystem tree
-(assuming that it has no children to keep it busy).  The subsystem is
-responsible for responding to this.  If the subsystem has references to
-the item in other threads, the memory is safe.  It may take some time
-for the item to actually disappear from the subsystem's usage.  But it
-is gone from configfs.
-
-When drop_item() is called, the item's linkage has already been torn
-down.  It no longer has a reference on its parent and has no place in
-the item hierarchy.  If a client needs to do some cleanup before this
-teardown happens, the subsystem can implement the
-ct_group_ops->disconnect_notify() method.  The method is called after
-configfs has removed the item from the filesystem view but before the
-item is removed from its parent group.  Like drop_item(),
-disconnect_notify() is void and cannot fail.  Client subsystems should
-not drop any references here, as they still must do it in drop_item().
-
-A config_group cannot be removed while it still has child items.  This
-is implemented in the configfs rmdir(2) code.  ->drop_item() will not be
-called, as the item has not been dropped.  rmdir(2) will fail, as the
-directory is not empty.
-
-[struct configfs_subsystem]
-
-A subsystem must register itself, usually at module_init time.  This
-tells configfs to make the subsystem appear in the file tree.
-
-	struct configfs_subsystem {
-		struct config_group	su_group;
-		struct mutex		su_mutex;
-	};
-
-	int configfs_register_subsystem(struct configfs_subsystem *subsys);
-	void configfs_unregister_subsystem(struct configfs_subsystem *subsys);
-
-	A subsystem consists of a toplevel config_group and a mutex.
-The group is where child config_items are created.  For a subsystem,
-this group is usually defined statically.  Before calling
-configfs_register_subsystem(), the subsystem must have initialized the
-group via the usual group _init() functions, and it must also have
-initialized the mutex.
-	When the register call returns, the subsystem is live, and it
-will be visible via configfs.  At that point, mkdir(2) can be called and
-the subsystem must be ready for it.
-
-[An Example]
-
-The best example of these basic concepts is the simple_children
-subsystem/group and the simple_child item in
-samples/configfs/configfs_sample.c. It shows a trivial object displaying
-and storing an attribute, and a simple group creating and destroying
-these children.
-
-[Hierarchy Navigation and the Subsystem Mutex]
-
-There is an extra bonus that configfs provides.  The config_groups and
-config_items are arranged in a hierarchy due to the fact that they
-appear in a filesystem.  A subsystem is NEVER to touch the filesystem
-parts, but the subsystem might be interested in this hierarchy.  For
-this reason, the hierarchy is mirrored via the config_group->cg_children
-and config_item->ci_parent structure members.
-
-A subsystem can navigate the cg_children list and the ci_parent pointer
-to see the tree created by the subsystem.  This can race with configfs'
-management of the hierarchy, so configfs uses the subsystem mutex to
-protect modifications.  Whenever a subsystem wants to navigate the
-hierarchy, it must do so under the protection of the subsystem
-mutex.
-
-A subsystem will be prevented from acquiring the mutex while a newly
-allocated item has not been linked into this hierarchy.   Similarly, it
-will not be able to acquire the mutex while a dropping item has not
-yet been unlinked.  This means that an item's ci_parent pointer will
-never be NULL while the item is in configfs, and that an item will only
-be in its parent's cg_children list for the same duration.  This allows
-a subsystem to trust ci_parent and cg_children while they hold the
-mutex.
-
-[Item Aggregation Via symlink(2)]
-
-configfs provides a simple group via the group->item parent/child
-relationship.  Often, however, a larger environment requires aggregation
-outside of the parent/child connection.  This is implemented via
-symlink(2).
-
-A config_item may provide the ct_item_ops->allow_link() and
-ct_item_ops->drop_link() methods.  If the ->allow_link() method exists,
-symlink(2) may be called with the config_item as the source of the link.
-These links are only allowed between configfs config_items.  Any
-symlink(2) attempt outside the configfs filesystem will be denied.
-
-When symlink(2) is called, the source config_item's ->allow_link()
-method is called with itself and a target item.  If the source item
-allows linking to target item, it returns 0.  A source item may wish to
-reject a link if it only wants links to a certain type of object (say,
-in its own subsystem).
-
-When unlink(2) is called on the symbolic link, the source item is
-notified via the ->drop_link() method.  Like the ->drop_item() method,
-this is a void function and cannot return failure.  The subsystem is
-responsible for responding to the change.
-
-A config_item cannot be removed while it links to any other item, nor
-can it be removed while an item links to it.  Dangling symlinks are not
-allowed in configfs.
-
-[Automatically Created Subgroups]
-
-A new config_group may want to have two types of child config_items.
-While this could be codified by magic names in ->make_item(), it is much
-more explicit to have a method whereby userspace sees this divergence.
-
-Rather than have a group where some items behave differently than
-others, configfs provides a method whereby one or many subgroups are
-automatically created inside the parent at its creation.  Thus,
-mkdir("parent") results in "parent", "parent/subgroup1", up through
-"parent/subgroupN".  Items of type 1 can now be created in
-"parent/subgroup1", and items of type N can be created in
-"parent/subgroupN".
-
-These automatic subgroups, or default groups, do not preclude other
-children of the parent group.  If ct_group_ops->make_group() exists,
-other child groups can be created on the parent group directly.
-
-A configfs subsystem specifies default groups by adding them using the
-configfs_add_default_group() function to the parent config_group
-structure.  Each added group is populated in the configfs tree at the same
-time as the parent group.  Similarly, they are removed at the same time
-as the parent.  No extra notification is provided.  When a ->drop_item()
-method call notifies the subsystem the parent group is going away, it
-also means every default group child associated with that parent group.
-
-As a consequence of this, default groups cannot be removed directly via
-rmdir(2).  They also are not considered when rmdir(2) on the parent
-group is checking for children.
-
-[Dependent Subsystems]
-
-Sometimes other drivers depend on particular configfs items.  For
-example, ocfs2 mounts depend on a heartbeat region item.  If that
-region item is removed with rmdir(2), the ocfs2 mount must BUG or go
-readonly.  Not happy.
-
-configfs provides two additional API calls: configfs_depend_item() and
-configfs_undepend_item().  A client driver can call
-configfs_depend_item() on an existing item to tell configfs that it is
-depended on.  configfs will then return -EBUSY from rmdir(2) for that
-item.  When the item is no longer depended on, the client driver calls
-configfs_undepend_item() on it.
-
-These API cannot be called underneath any configfs callbacks, as
-they will conflict.  They can block and allocate.  A client driver
-probably shouldn't calling them of its own gumption.  Rather it should
-be providing an API that external subsystems call.
-
-How does this work?  Imagine the ocfs2 mount process.  When it mounts,
-it asks for a heartbeat region item.  This is done via a call into the
-heartbeat code.  Inside the heartbeat code, the region item is looked
-up.  Here, the heartbeat code calls configfs_depend_item().  If it
-succeeds, then heartbeat knows the region is safe to give to ocfs2.
-If it fails, it was being torn down anyway, and heartbeat can gracefully
-pass up an error.
-
-[Committable Items]
-
-NOTE: Committable items are currently unimplemented.
-
-Some config_items cannot have a valid initial state.  That is, no
-default values can be specified for the item's attributes such that the
-item can do its work.  Userspace must configure one or more attributes,
-after which the subsystem can start whatever entity this item
-represents.
-
-Consider the FakeNBD device from above.  Without a target address *and*
-a target device, the subsystem has no idea what block device to import.
-The simple example assumes that the subsystem merely waits until all the
-appropriate attributes are configured, and then connects.  This will,
-indeed, work, but now every attribute store must check if the attributes
-are initialized.  Every attribute store must fire off the connection if
-that condition is met.
-
-Far better would be an explicit action notifying the subsystem that the
-config_item is ready to go.  More importantly, an explicit action allows
-the subsystem to provide feedback as to whether the attributes are
-initialized in a way that makes sense.  configfs provides this as
-committable items.
-
-configfs still uses only normal filesystem operations.  An item is
-committed via rename(2).  The item is moved from a directory where it
-can be modified to a directory where it cannot.
-
-Any group that provides the ct_group_ops->commit_item() method has
-committable items.  When this group appears in configfs, mkdir(2) will
-not work directly in the group.  Instead, the group will have two
-subdirectories: "live" and "pending".  The "live" directory does not
-support mkdir(2) or rmdir(2) either.  It only allows rename(2).  The
-"pending" directory does allow mkdir(2) and rmdir(2).  An item is
-created in the "pending" directory.  Its attributes can be modified at
-will.  Userspace commits the item by renaming it into the "live"
-directory.  At this point, the subsystem receives the ->commit_item()
-callback.  If all required attributes are filled to satisfaction, the
-method returns zero and the item is moved to the "live" directory.
-
-As rmdir(2) does not work in the "live" directory, an item must be
-shutdown, or "uncommitted".  Again, this is done via rename(2), this
-time from the "live" directory back to the "pending" one.  The subsystem
-is notified by the ct_group_ops->uncommit_object() method.
-
-
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index e242cb75f9f2..17795341e0a3 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -76,6 +76,7 @@ Documentation for filesystem implementations.
    cifs/cifsroot
    ceph
    coda
+   configfs
    cramfs
    debugfs
    dlmfs
diff --git a/Documentation/iio/iio_configfs.rst b/Documentation/iio/iio_configfs.rst
index ecbfdb3afef7..6e38cbbd2981 100644
--- a/Documentation/iio/iio_configfs.rst
+++ b/Documentation/iio/iio_configfs.rst
@@ -9,7 +9,7 @@ Configfs is a filesystem-based manager of kernel objects. IIO uses some
 objects that could be easily configured using configfs (e.g.: devices,
 triggers).
 
-See Documentation/filesystems/configfs/configfs.txt for more information
+See Documentation/filesystems/configfs.rst for more information
 about how configfs works.
 
 2. Usage
diff --git a/Documentation/usb/gadget_configfs.rst b/Documentation/usb/gadget_configfs.rst
index 54fb08baae22..158e48dab586 100644
--- a/Documentation/usb/gadget_configfs.rst
+++ b/Documentation/usb/gadget_configfs.rst
@@ -24,7 +24,7 @@ Linux provides a number of functions for gadgets to use.
 Creating a gadget means deciding what configurations there will be
 and which functions each configuration will provide.
 
-Configfs (please see `Documentation/filesystems/configfs/*`) lends itself nicely
+Configfs (please see `Documentation/filesystems/configfs.rst`) lends itself nicely
 for the purpose of telling the kernel about the above mentioned decision.
 This document is about how to do it.
 
@@ -354,7 +354,7 @@ the directories in general can be named at will. A group can have
 a number of its default sub-groups created automatically.
 
 For more information on configfs please see
-`Documentation/filesystems/configfs/*`.
+`Documentation/filesystems/configfs.rst`.
 
 The concepts described above translate to USB gadgets like this:
 
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index fd0b5dd68f9e..8bd6a883c94c 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -9,7 +9,7 @@
  *
  * configfs Copyright (C) 2005 Oracle.  All rights reserved.
  *
- * Please see Documentation/filesystems/configfs/configfs.txt for more
+ * Please see Documentation/filesystems/configfs.rst for more
  * information.
  */
 
diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index 6e0f1fcb8a5b..704a4356f137 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -9,7 +9,7 @@
  *
  * configfs Copyright (C) 2005 Oracle.  All rights reserved.
  *
- * Please see the file Documentation/filesystems/configfs/configfs.txt for
+ * Please see the file Documentation/filesystems/configfs.rst for
  * critical information about using the config_item interface.
  */
 
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index fa9490a8874c..2e8c69b43c64 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -13,7 +13,7 @@
  *
  * configfs Copyright (C) 2005 Oracle.  All rights reserved.
  *
- * Please read Documentation/filesystems/configfs/configfs.txt before using
+ * Please read Documentation/filesystems/configfs.rst before using
  * the configfs interface, ESPECIALLY the parts about reference counts and
  * item destructors.
  */
-- 
cgit v1.2.3


From 3eaa3bfa380bf07f778bc2ff57441976803ccaed Mon Sep 17 00:00:00 2001
From: Qi Zheng <arch0.zheng@gmail.com>
Date: Tue, 5 May 2020 14:18:28 +0800
Subject: kobject: documentation: Fix erroneous function example in kobject
 doc.

Update the definitions of some functions listed in the kobject
document, since they have been changed.

Signed-off-by: Qi Zheng <arch0.zheng@gmail.com>
Link: https://lore.kernel.org/r/20200505061828.42952-1-arch0.zheng@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/kobject.rst | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/Documentation/core-api/kobject.rst b/Documentation/core-api/kobject.rst
index 1f62d4d7d966..c3ac2963283b 100644
--- a/Documentation/core-api/kobject.rst
+++ b/Documentation/core-api/kobject.rst
@@ -80,11 +80,11 @@ what is the pointer to the containing structure?  You must avoid tricks
 (such as assuming that the kobject is at the beginning of the structure)
 and, instead, use the container_of() macro, found in ``<linux/kernel.h>``::
 
-    container_of(pointer, type, member)
+    container_of(ptr, type, member)
 
 where:
 
-  * ``pointer`` is the pointer to the embedded kobject,
+  * ``ptr`` is the pointer to the embedded kobject,
   * ``type`` is the type of the containing structure, and
   * ``member`` is the name of the structure field to which ``pointer`` points.
 
@@ -140,7 +140,7 @@ the name of the kobject, call kobject_rename()::
 
     int kobject_rename(struct kobject *kobj, const char *new_name);
 
-kobject_rename does not perform any locking or have a solid notion of
+kobject_rename() does not perform any locking or have a solid notion of
 what names are valid so the caller must provide their own sanity checking
 and serialization.
 
@@ -222,17 +222,17 @@ ksets, show and store functions, and other details.  This is the one
 exception where a single kobject should be created.  To create such an
 entry, use the function::
 
-    struct kobject *kobject_create_and_add(char *name, struct kobject *parent);
+    struct kobject *kobject_create_and_add(const char *name, struct kobject *parent);
 
 This function will create a kobject and place it in sysfs in the location
 underneath the specified parent kobject.  To create simple attributes
 associated with this kobject, use::
 
-    int sysfs_create_file(struct kobject *kobj, struct attribute *attr);
+    int sysfs_create_file(struct kobject *kobj, const struct attribute *attr);
 
 or::
 
-    int sysfs_create_group(struct kobject *kobj, struct attribute_group *grp);
+    int sysfs_create_group(struct kobject *kobj, const struct attribute_group *grp);
 
 Both types of attributes used here, with a kobject that has been created
 with the kobject_create_and_add(), can be of type kobj_attribute, so no
@@ -300,8 +300,10 @@ kobj_type::
             void (*release)(struct kobject *kobj);
             const struct sysfs_ops *sysfs_ops;
             struct attribute **default_attrs;
+            const struct attribute_group **default_groups;
             const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
             const void *(*namespace)(struct kobject *kobj);
+            void (*get_ownership)(struct kobject *kobj, kuid_t *uid, kgid_t *gid);
     };
 
 This structure is used to describe a particular type of kobject (or, more
@@ -352,12 +354,12 @@ created and never declared statically or on the stack.  To create a new
 kset use::
 
   struct kset *kset_create_and_add(const char *name,
-                                   struct kset_uevent_ops *u,
-                                   struct kobject *parent);
+                                   const struct kset_uevent_ops *uevent_ops,
+                                   struct kobject *parent_kobj);
 
 When you are finished with the kset, call::
 
-  void kset_unregister(struct kset *kset);
+  void kset_unregister(struct kset *k);
 
 to destroy it.  This removes the kset from sysfs and decrements its reference
 count.  When the reference count goes to zero, the kset will be released.
@@ -371,9 +373,9 @@ If a kset wishes to control the uevent operations of the kobjects
 associated with it, it can use the struct kset_uevent_ops to handle it::
 
   struct kset_uevent_ops {
-          int (*filter)(struct kset *kset, struct kobject *kobj);
-          const char *(*name)(struct kset *kset, struct kobject *kobj);
-          int (*uevent)(struct kset *kset, struct kobject *kobj,
+          int (* const filter)(struct kset *kset, struct kobject *kobj);
+          const char *(* const name)(struct kset *kset, struct kobject *kobj);
+          int (* const uevent)(struct kset *kset, struct kobject *kobj,
                         struct kobj_uevent_env *env);
   };
 
-- 
cgit v1.2.3


From 094d6dc56245d0c796af9ed3e680effbe66ba0b5 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@kernel.org>
Date: Sat, 2 May 2020 16:30:58 +0200
Subject: watchdog: update email address in conversion doc

The old one is defunct. However, I think it makes sense that I am still
the primary contact person for updates here.

Signed-off-by: Wolfram Sang <wsa@kernel.org>
Link: https://lore.kernel.org/r/20200502143103.19473-1-wsa@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/watchdog/convert_drivers_to_kernel_api.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/watchdog/convert_drivers_to_kernel_api.rst b/Documentation/watchdog/convert_drivers_to_kernel_api.rst
index 51b999b5551a..a1c3f038ce0e 100644
--- a/Documentation/watchdog/convert_drivers_to_kernel_api.rst
+++ b/Documentation/watchdog/convert_drivers_to_kernel_api.rst
@@ -2,7 +2,7 @@
 Converting old watchdog drivers to the watchdog framework
 =========================================================
 
-by Wolfram Sang <w.sang@pengutronix.de>
+by Wolfram Sang <wsa@kernel.org>
 
 Before the watchdog framework came into the kernel, every driver had to
 implement the API on its own. Now, as the framework factored out the common
-- 
cgit v1.2.3


From 35c59990058351a1c9efab9e49dd19c2e5579cfb Mon Sep 17 00:00:00 2001
From: Joshua Abraham <j.abraham1776@gmail.com>
Date: Fri, 1 May 2020 18:36:24 -0400
Subject: docs: kvm: Fix KVM_KVMCLOCK_CTRL API doc

The KVM_KVMCLOCK_CTRL ioctl signals to supported KVM guests
that the hypervisor has paused it. Update the documentation to
reflect that the guest is notified by this API.

Signed-off-by: Joshua Abraham <sinisterpatrician@gmail.com>
Link: https://lore.kernel.org/r/20200501223624.GA25826@josh-ZenBook
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/virt/kvm/api.rst | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index efbbe570aa9b..d2c1cbce1018 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -2572,13 +2572,15 @@ list in 4.68.
 :Parameters: None
 :Returns: 0 on success, -1 on error
 
-This signals to the host kernel that the specified guest is being paused by
-userspace.  The host will set a flag in the pvclock structure that is checked
-from the soft lockup watchdog.  The flag is part of the pvclock structure that
-is shared between guest and host, specifically the second bit of the flags
+This ioctl sets a flag accessible to the guest indicating that the specified
+vCPU has been paused by the host userspace.
+
+The host will set a flag in the pvclock structure that is checked from the
+soft lockup watchdog.  The flag is part of the pvclock structure that is
+shared between guest and host, specifically the second bit of the flags
 field of the pvclock_vcpu_time_info structure.  It will be set exclusively by
 the host and read/cleared exclusively by the guest.  The guest operation of
-checking and clearing the flag must an atomic operation so
+checking and clearing the flag must be an atomic operation so
 load-link/store-conditional, or equivalent must be used.  There are two cases
 where the guest will clear the flag: when the soft lockup watchdog timer resets
 itself or when a soft lockup is detected.  This ioctl can be called any time
-- 
cgit v1.2.3


From 16a398d17649d781773fa8e342a4ce28cd65af97 Mon Sep 17 00:00:00 2001
From: Vitor Massaru Iha <vitor@massaru.org>
Date: Thu, 30 Apr 2020 19:58:28 -0300
Subject: doc: misc-device: add uacce to toctree(index)

This fixes:

Documentation/misc-devices/uacce.rst: WARNING: document isn't included in any toctree

Signed-off-by: Vitor Massaru Iha <vitor@massaru.org>
Link: https://lore.kernel.org/r/20200430225828.114033-1-vitor@massaru.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/misc-devices/index.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/misc-devices/index.rst b/Documentation/misc-devices/index.rst
index c1dcd2628911..1ecc05fbe6f4 100644
--- a/Documentation/misc-devices/index.rst
+++ b/Documentation/misc-devices/index.rst
@@ -21,4 +21,5 @@ fit into other categories.
    lis3lv02d
    max6875
    mic/index
+   uacce
    xilinx_sdfec
-- 
cgit v1.2.3


From b67aa4ef68ed4f2933d06eb65a5721c97cce9934 Mon Sep 17 00:00:00 2001
From: Federico Vaga <federico.vaga@vaga.pv.it>
Date: Fri, 1 May 2020 00:20:37 +0200
Subject: doc:it_IT: align Italian translation

Translation for the following patches:

commit c4f4af4094d6 ("docs: Add documentation for Symbol Namespaces")
commit 36bc683dde0a ("kernel-doc: rename the kernel-doc directive 'functions' to 'identifiers'")
commit a035d552a93b ("Makefile: Globally enable fall-through warning")
commit b9918bdcac1f ("Documentation/process: Add fallthrough pseudo-keyword")
commit 58ad30cf91f0 ("docs: fix reference to core-api/namespaces.rst")
commit fb0e0ffe7fc8 ("Documentation: bring process docs up to date")
commit 7af51678b6d3 ("docs: deprecated.rst: Add BUG()-family")
commit 7929b9836ed0 ("docs: Remove :c:func: from process/deprecated.rst")
commit 76136e028d3b ("docs: deprecated.rst: Clean up fall-through details")
commit d8401f504b49 ("docs: deprecated.rst: Add %p to the list")
commit b1735296cef9 ("docs: locking: Drop :c:func: throughout")
commit 6adb7755996f ("docs: locking: Add 'need' to hardirq section")

Signed-off-by: Federico Vaga <federico.vaga@vaga.pv.it>
Link: https://lore.kernel.org/r/20200430222037.4480-1-federico.vaga@vaga.pv.it
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/it_IT/doc-guide/kernel-doc.rst    |  25 +--
 .../translations/it_IT/kernel-hacking/hacking.rst  |  18 +++
 .../translations/it_IT/kernel-hacking/locking.rst  | 172 ++++++++++-----------
 .../translations/it_IT/process/2.Process.rst       |  95 ++++++------
 .../translations/it_IT/process/coding-style.rst    |   6 +-
 .../translations/it_IT/process/deprecated.rst      | 130 ++++++++++++++--
 6 files changed, 287 insertions(+), 159 deletions(-)

diff --git a/Documentation/translations/it_IT/doc-guide/kernel-doc.rst b/Documentation/translations/it_IT/doc-guide/kernel-doc.rst
index a4ecd8f27631..524ad86cadbb 100644
--- a/Documentation/translations/it_IT/doc-guide/kernel-doc.rst
+++ b/Documentation/translations/it_IT/doc-guide/kernel-doc.rst
@@ -515,6 +515,22 @@ internal: *[source-pattern ...]*
     .. kernel-doc:: drivers/gpu/drm/i915/intel_audio.c
        :internal:
 
+identifiers: *[ function/type ...]*
+  Include la documentazione per ogni *function* e *type*  in *source*.
+  Se non vengono esplicitamente specificate le funzioni da includere, allora
+  verranno incluse tutte quelle disponibili in *source*.
+
+  Esempi::
+
+    .. kernel-doc:: lib/bitmap.c
+       :identifiers: bitmap_parselist bitmap_parselist_user
+
+    .. kernel-doc:: lib/idr.c
+       :identifiers:
+
+functions: *[ function ...]*
+  Questo è uno pseudonimo, deprecato, per la direttiva 'identifiers'.
+
 doc: *title*
   Include la documentazione del paragrafo ``DOC:`` identificato dal titolo
   (*title*) all'interno del file sorgente (*source*). Gli spazi in *title* sono
@@ -528,15 +544,6 @@ doc: *title*
     .. kernel-doc:: drivers/gpu/drm/i915/intel_audio.c
        :doc: High Definition Audio over HDMI and Display Port
 
-functions: *function* *[...]*
-  Dal file sorgente (*source*) include la documentazione per le funzioni
-  elencate (*function*).
-
-  Esempio::
-
-    .. kernel-doc:: lib/bitmap.c
-       :functions: bitmap_parselist bitmap_parselist_user
-
 Senza alcuna opzione, la direttiva kernel-doc include tutti i commenti di
 documentazione presenti nel file sorgente (*source*).
 
diff --git a/Documentation/translations/it_IT/kernel-hacking/hacking.rst b/Documentation/translations/it_IT/kernel-hacking/hacking.rst
index 24c592852bf1..6aab27a8d323 100644
--- a/Documentation/translations/it_IT/kernel-hacking/hacking.rst
+++ b/Documentation/translations/it_IT/kernel-hacking/hacking.rst
@@ -627,6 +627,24 @@ Alcuni manutentori e sviluppatori potrebbero comunque richiedere
 :c:func:`EXPORT_SYMBOL_GPL()` quando si aggiungono nuove funzionalità o
 interfacce.
 
+:c:func:`EXPORT_SYMBOL_NS()`
+----------------------------
+
+Definita in ``include/linux/export.h``
+
+Questa è una variate di `EXPORT_SYMBOL()` che permette di specificare uno
+spazio dei nomi. Lo spazio dei nomi è documentato in
+:doc:`../core-api/symbol-namespaces`
+
+:c:func:`EXPORT_SYMBOL_NS_GPL()`
+--------------------------------
+
+Definita in ``include/linux/export.h``
+
+Questa è una variate di `EXPORT_SYMBOL_GPL()` che permette di specificare uno
+spazio dei nomi. Lo spazio dei nomi è documentato in
+:doc:`../core-api/symbol-namespaces`
+
 Procedure e convenzioni
 =======================
 
diff --git a/Documentation/translations/it_IT/kernel-hacking/locking.rst b/Documentation/translations/it_IT/kernel-hacking/locking.rst
index b9a6be4b8499..4615df5723fb 100644
--- a/Documentation/translations/it_IT/kernel-hacking/locking.rst
+++ b/Documentation/translations/it_IT/kernel-hacking/locking.rst
@@ -159,17 +159,17 @@ Sincronizzazione in contesto utente
 Se avete una struttura dati che verrà utilizzata solo dal contesto utente,
 allora, per proteggerla, potete utilizzare un semplice mutex
 (``include/linux/mutex.h``). Questo è il caso più semplice: inizializzate il
-mutex; invocate :c:func:`mutex_lock_interruptible()` per trattenerlo e
-:c:func:`mutex_unlock()` per rilasciarlo. C'è anche :c:func:`mutex_lock()`
+mutex; invocate mutex_lock_interruptible() per trattenerlo e
+mutex_unlock() per rilasciarlo. C'è anche mutex_lock()
 ma questa dovrebbe essere evitata perché non ritorna in caso di segnali.
 
 Per esempio: ``net/netfilter/nf_sockopt.c`` permette la registrazione
-di nuove chiamate per :c:func:`setsockopt()` e :c:func:`getsockopt()`
-usando la funzione :c:func:`nf_register_sockopt()`. La registrazione e
+di nuove chiamate per setsockopt() e getsockopt()
+usando la funzione nf_register_sockopt(). La registrazione e
 la rimozione vengono eseguite solamente quando il modulo viene caricato
 o scaricato (e durante l'avvio del sistema, qui non abbiamo concorrenza),
 e la lista delle funzioni registrate viene consultata solamente quando
-:c:func:`setsockopt()` o :c:func:`getsockopt()` sono sconosciute al sistema.
+setsockopt() o getsockopt() sono sconosciute al sistema.
 In questo caso ``nf_sockopt_mutex`` è perfetto allo scopo, in particolar modo
 visto che setsockopt e getsockopt potrebbero dormire.
 
@@ -179,19 +179,19 @@ Sincronizzazione fra il contesto utente e i softirq
 Se un softirq condivide dati col contesto utente, avete due problemi.
 Primo, il contesto utente corrente potrebbe essere interroto da un softirq,
 e secondo, la sezione critica potrebbe essere eseguita da un altro
-processore. Questo è quando :c:func:`spin_lock_bh()`
+processore. Questo è quando spin_lock_bh()
 (``include/linux/spinlock.h``) viene utilizzato. Questo disabilita i softirq
-sul processore e trattiene il *lock*. Invece, :c:func:`spin_unlock_bh()` fa
+sul processore e trattiene il *lock*. Invece, spin_unlock_bh() fa
 l'opposto. (Il suffisso '_bh' è un residuo storico che fa riferimento al
 "Bottom Halves", il vecchio nome delle interruzioni software. In un mondo
 perfetto questa funzione si chiamerebbe 'spin_lock_softirq()').
 
-Da notare che in questo caso potete utilizzare anche :c:func:`spin_lock_irq()`
-o :c:func:`spin_lock_irqsave()`, queste fermano anche le interruzioni hardware:
+Da notare che in questo caso potete utilizzare anche spin_lock_irq()
+o spin_lock_irqsave(), queste fermano anche le interruzioni hardware:
 vedere :ref:`Contesto di interruzione hardware <it_hardirq-context>`.
 
 Questo funziona alla perfezione anche sui sistemi monoprocessore: gli spinlock
-svaniscono e questa macro diventa semplicemente :c:func:`local_bh_disable()`
+svaniscono e questa macro diventa semplicemente local_bh_disable()
 (``include/linux/interrupt.h``), la quale impedisce ai softirq d'essere
 eseguiti.
 
@@ -224,8 +224,8 @@ Differenti tasklet/timer
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
 Se un altro tasklet/timer vuole condividere dati col vostro tasklet o timer,
-allora avrete bisogno entrambe di :c:func:`spin_lock()` e
-:c:func:`spin_unlock()`. Qui :c:func:`spin_lock_bh()` è inutile, siete già
+allora avrete bisogno entrambe di spin_lock() e
+spin_unlock(). Qui spin_lock_bh() è inutile, siete già
 in un tasklet ed avete la garanzia che nessun altro verrà eseguito sullo
 stesso processore.
 
@@ -243,13 +243,13 @@ processore (vedere :ref:`Dati per processore <it_per-cpu>`). Se siete arrivati
 fino a questo punto nell'uso dei softirq, probabilmente tenete alla scalabilità
 delle prestazioni abbastanza da giustificarne la complessità aggiuntiva.
 
-Dovete utilizzare :c:func:`spin_lock()` e :c:func:`spin_unlock()` per
+Dovete utilizzare spin_lock() e spin_unlock() per
 proteggere i dati condivisi.
 
 Diversi Softirqs
 ~~~~~~~~~~~~~~~~
 
-Dovete utilizzare :c:func:`spin_lock()` e :c:func:`spin_unlock()` per
+Dovete utilizzare spin_lock() e spin_unlock() per
 proteggere i dati condivisi, che siano timer, tasklet, diversi softirq o
 lo stesso o altri softirq: uno qualsiasi di essi potrebbe essere in esecuzione
 su un diverso processore.
@@ -270,40 +270,40 @@ Se un gestore di interruzioni hardware condivide dati con un softirq, allora
 avrete due preoccupazioni. Primo, il softirq può essere interrotto da
 un'interruzione hardware, e secondo, la sezione critica potrebbe essere
 eseguita da un'interruzione hardware su un processore diverso. Questo è il caso
-dove :c:func:`spin_lock_irq()` viene utilizzato. Disabilita le interruzioni
-sul processore che l'esegue, poi trattiene il lock. :c:func:`spin_unlock_irq()`
+dove spin_lock_irq() viene utilizzato. Disabilita le interruzioni
+sul processore che l'esegue, poi trattiene il lock. spin_unlock_irq()
 fa l'opposto.
 
-Il gestore d'interruzione hardware non usa :c:func:`spin_lock_irq()` perché
-i softirq non possono essere eseguiti quando il gestore d'interruzione hardware
-è in esecuzione: per questo si può usare :c:func:`spin_lock()`, che è un po'
+Il gestore d'interruzione hardware non ha bisogno di usare spin_lock_irq()
+perché i softirq non possono essere eseguiti quando il gestore d'interruzione
+hardware è in esecuzione: per questo si può usare spin_lock(), che è un po'
 più veloce. L'unica eccezione è quando un altro gestore d'interruzioni
-hardware utilizza lo stesso *lock*: :c:func:`spin_lock_irq()` impedirà a questo
+hardware utilizza lo stesso *lock*: spin_lock_irq() impedirà a questo
 secondo gestore di interrompere quello in esecuzione.
 
 Questo funziona alla perfezione anche sui sistemi monoprocessore: gli spinlock
-svaniscono e questa macro diventa semplicemente :c:func:`local_irq_disable()`
+svaniscono e questa macro diventa semplicemente local_irq_disable()
 (``include/asm/smp.h``), la quale impedisce a softirq/tasklet/BH d'essere
 eseguiti.
 
-:c:func:`spin_lock_irqsave()` (``include/linux/spinlock.h``) è una variante che
+spin_lock_irqsave() (``include/linux/spinlock.h``) è una variante che
 salva lo stato delle interruzioni in una variabile, questa verrà poi passata
-a :c:func:`spin_unlock_irqrestore()`. Questo significa che lo stesso codice
+a spin_unlock_irqrestore(). Questo significa che lo stesso codice
 potrà essere utilizzato in un'interruzione hardware (dove le interruzioni sono
 già disabilitate) e in un softirq (dove la disabilitazione delle interruzioni
 è richiesta).
 
 Da notare che i softirq (e quindi tasklet e timer) sono eseguiti al ritorno
-da un'interruzione hardware, quindi :c:func:`spin_lock_irq()` interrompe
+da un'interruzione hardware, quindi spin_lock_irq() interrompe
 anche questi. Tenuto conto di questo si può dire che
-:c:func:`spin_lock_irqsave()` è la funzione di sincronizzazione più generica
+spin_lock_irqsave() è la funzione di sincronizzazione più generica
 e potente.
 
 Sincronizzazione fra due gestori d'interruzioni hardware
 --------------------------------------------------------
 
 Condividere dati fra due gestori di interruzione hardware è molto raro, ma se
-succede, dovreste usare :c:func:`spin_lock_irqsave()`: è una specificità
+succede, dovreste usare spin_lock_irqsave(): è una specificità
 dell'architettura il fatto che tutte le interruzioni vengano interrotte
 quando si eseguono di gestori di interruzioni.
 
@@ -317,11 +317,11 @@ Pete Zaitcev ci offre il seguente riassunto:
    il mutex e dormire (``copy_from_user*(`` o ``kmalloc(x,GFP_KERNEL)``).
 
 -  Altrimenti (== i dati possono essere manipolati da un'interruzione) usate
-   :c:func:`spin_lock_irqsave()` e :c:func:`spin_unlock_irqrestore()`.
+   spin_lock_irqsave() e spin_unlock_irqrestore().
 
 -  Evitate di trattenere uno spinlock per più di 5 righe di codice incluse
    le chiamate a funzione (ad eccezione di quell per l'accesso come
-   :c:func:`readb()`).
+   readb()).
 
 Tabella dei requisiti minimi
 ----------------------------
@@ -334,7 +334,7 @@ processore alla volta, ma se deve condividere dati con un altro thread, allora
 la sincronizzazione è necessaria).
 
 Ricordatevi il suggerimento qui sopra: potete sempre usare
-:c:func:`spin_lock_irqsave()`, che è un sovrainsieme di tutte le altre funzioni
+spin_lock_irqsave(), che è un sovrainsieme di tutte le altre funzioni
 per spinlock.
 
 ============== ============= ============= ========= ========= ========= ========= ======= ======= ============== ==============
@@ -378,13 +378,13 @@ protetti dal *lock* quando qualche altro thread lo sta già facendo
 trattenendo il *lock*. Potrete acquisire il *lock* più tardi se vi
 serve accedere ai dati protetti da questo *lock*.
 
-La funzione :c:func:`spin_trylock()` non ritenta di acquisire il *lock*,
+La funzione spin_trylock() non ritenta di acquisire il *lock*,
 se ci riesce al primo colpo ritorna un valore diverso da zero, altrimenti
 se fallisce ritorna 0. Questa funzione può essere utilizzata in un qualunque
-contesto, ma come :c:func:`spin_lock()`: dovete disabilitare i contesti che
+contesto, ma come spin_lock(): dovete disabilitare i contesti che
 potrebbero interrompervi e quindi trattenere lo spinlock.
 
-La funzione :c:func:`mutex_trylock()` invece di sospendere il vostro processo
+La funzione mutex_trylock() invece di sospendere il vostro processo
 ritorna un valore diverso da zero se è possibile trattenere il lock al primo
 colpo, altrimenti se fallisce ritorna 0. Nonostante non dorma, questa funzione
 non può essere usata in modo sicuro in contesti di interruzione hardware o
@@ -506,7 +506,7 @@ della memoria che il suo contenuto sono protetti dal *lock*. Questo
 caso è semplice dato che copiamo i dati dall'utente e non permettiamo
 mai loro di accedere direttamente agli oggetti.
 
-C'è una piccola ottimizzazione qui: nella funzione :c:func:`cache_add()`
+C'è una piccola ottimizzazione qui: nella funzione cache_add()
 impostiamo i campi dell'oggetto prima di acquisire il *lock*. Questo è
 sicuro perché nessun altro potrà accedervi finché non lo inseriremo
 nella memoria.
@@ -514,7 +514,7 @@ nella memoria.
 Accesso dal contesto utente
 ---------------------------
 
-Ora consideriamo il caso in cui :c:func:`cache_find()` può essere invocata
+Ora consideriamo il caso in cui cache_find() può essere invocata
 dal contesto d'interruzione: sia hardware che software. Un esempio potrebbe
 essere un timer che elimina oggetti dalla memoria.
 
@@ -583,15 +583,15 @@ sono quelle rimosse, mentre quelle ``+`` sono quelle aggiunte.
              return ret;
      }
 
-Da notare che :c:func:`spin_lock_irqsave()` disabiliterà le interruzioni
+Da notare che spin_lock_irqsave() disabiliterà le interruzioni
 se erano attive, altrimenti non farà niente (quando siamo già in un contesto
 d'interruzione); dunque queste funzioni possono essere chiamante in
 sicurezza da qualsiasi contesto.
 
-Sfortunatamente, :c:func:`cache_add()` invoca :c:func:`kmalloc()` con
+Sfortunatamente, cache_add() invoca kmalloc() con
 l'opzione ``GFP_KERNEL`` che è permessa solo in contesto utente. Ho supposto
-che :c:func:`cache_add()` venga chiamata dal contesto utente, altrimenti
-questa opzione deve diventare un parametro di :c:func:`cache_add()`.
+che cache_add() venga chiamata dal contesto utente, altrimenti
+questa opzione deve diventare un parametro di cache_add().
 
 Esporre gli oggetti al di fuori del file
 ----------------------------------------
@@ -610,7 +610,7 @@ Il secondo problema è il problema del ciclo di vita: se un'altra struttura
 mantiene un puntatore ad un oggetto, presumibilmente si aspetta che questo
 puntatore rimanga valido. Sfortunatamente, questo è garantito solo mentre
 si trattiene il *lock*, altrimenti qualcuno potrebbe chiamare
-:c:func:`cache_delete()` o peggio, aggiungere un oggetto che riutilizza lo
+cache_delete() o peggio, aggiungere un oggetto che riutilizza lo
 stesso indirizzo.
 
 Dato che c'è un solo *lock*, non potete trattenerlo a vita: altrimenti
@@ -710,9 +710,9 @@ Ecco il codice::
      }
 
 Abbiamo incapsulato il contatore di riferimenti nelle tipiche funzioni
-di 'get' e 'put'. Ora possiamo ritornare l'oggetto da :c:func:`cache_find()`
+di 'get' e 'put'. Ora possiamo ritornare l'oggetto da cache_find()
 col vantaggio che l'utente può dormire trattenendo l'oggetto (per esempio,
-:c:func:`copy_to_user()` per copiare il nome verso lo spazio utente).
+copy_to_user() per copiare il nome verso lo spazio utente).
 
 Un altro punto da notare è che ho detto che il contatore dovrebbe incrementarsi
 per ogni puntatore ad un oggetto: quindi il contatore di riferimenti è 1
@@ -727,8 +727,8 @@ Ci sono un certo numbero di operazioni atomiche definite
 in ``include/asm/atomic.h``: queste sono garantite come atomiche su qualsiasi
 processore del sistema, quindi non sono necessari i *lock*. In questo caso è
 più semplice rispetto all'uso degli spinlock, benché l'uso degli spinlock
-sia più elegante per casi non banali. Le funzioni :c:func:`atomic_inc()` e
-:c:func:`atomic_dec_and_test()` vengono usate al posto dei tipici operatori di
+sia più elegante per casi non banali. Le funzioni atomic_inc() e
+atomic_dec_and_test() vengono usate al posto dei tipici operatori di
 incremento e decremento, e i *lock* non sono più necessari per proteggere il
 contatore stesso.
 
@@ -820,7 +820,7 @@ al nome di cambiare abbiamo tre possibilità:
 -  Si può togliere static da ``cache_lock`` e dire agli utenti che devono
    trattenere il *lock* prima di modificare il nome di un oggetto.
 
--  Si può fornire una funzione :c:func:`cache_obj_rename()` che prende il
+-  Si può fornire una funzione cache_obj_rename() che prende il
    *lock* e cambia il nome per conto del chiamante; si dirà poi agli utenti
    di usare questa funzione.
 
@@ -878,11 +878,11 @@ Da notare che ho deciso che il contatore di popolarità dovesse essere
 protetto da ``cache_lock`` piuttosto che dal *lock* dell'oggetto; questo
 perché è logicamente parte dell'infrastruttura (come
 :c:type:`struct list_head <list_head>` nell'oggetto). In questo modo,
-in :c:func:`__cache_add()`, non ho bisogno di trattenere il *lock* di ogni
+in __cache_add(), non ho bisogno di trattenere il *lock* di ogni
 oggetto mentre si cerca il meno popolare.
 
 Ho anche deciso che il campo id è immutabile, quindi non ho bisogno di
-trattenere il lock dell'oggetto quando si usa :c:func:`__cache_find()`
+trattenere il lock dell'oggetto quando si usa __cache_find()
 per leggere questo campo; il *lock* dell'oggetto è usato solo dal chiamante
 che vuole leggere o scrivere il campo name.
 
@@ -907,7 +907,7 @@ Questo è facile da diagnosticare: non è uno di quei problemi che ti tengono
 sveglio 5 notti a parlare da solo.
 
 Un caso un pochino più complesso; immaginate d'avere una spazio condiviso
-fra un softirq ed il contesto utente. Se usate :c:func:`spin_lock()` per
+fra un softirq ed il contesto utente. Se usate spin_lock() per
 proteggerlo, il contesto utente potrebbe essere interrotto da un softirq
 mentre trattiene il lock, da qui il softirq rimarrà in attesa attiva provando
 ad acquisire il *lock* già trattenuto nel contesto utente.
@@ -1006,12 +1006,12 @@ potreste fare come segue::
             spin_unlock_bh(&list_lock);
 
 Primo o poi, questo esploderà su un sistema multiprocessore perché un
-temporizzatore potrebbe essere già partiro prima di :c:func:`spin_lock_bh()`,
-e prenderà il *lock* solo dopo :c:func:`spin_unlock_bh()`, e cercherà
+temporizzatore potrebbe essere già partiro prima di spin_lock_bh(),
+e prenderà il *lock* solo dopo spin_unlock_bh(), e cercherà
 di eliminare il suo oggetto (che però è già stato eliminato).
 
 Questo può essere evitato controllando il valore di ritorno di
-:c:func:`del_timer()`: se ritorna 1, il temporizzatore è stato già
+del_timer(): se ritorna 1, il temporizzatore è stato già
 rimosso. Se 0, significa (in questo caso) che il temporizzatore è in
 esecuzione, quindi possiamo fare come segue::
 
@@ -1032,9 +1032,9 @@ esecuzione, quindi possiamo fare come segue::
                     spin_unlock_bh(&list_lock);
 
 Un altro problema è l'eliminazione dei temporizzatori che si riavviano
-da soli (chiamando :c:func:`add_timer()` alla fine della loro esecuzione).
+da soli (chiamando add_timer() alla fine della loro esecuzione).
 Dato che questo è un problema abbastanza comune con una propensione
-alle corse critiche, dovreste usare :c:func:`del_timer_sync()`
+alle corse critiche, dovreste usare del_timer_sync()
 (``include/linux/timer.h``) per gestire questo caso. Questa ritorna il
 numero di volte che il temporizzatore è stato interrotto prima che
 fosse in grado di fermarlo senza che si riavviasse.
@@ -1116,7 +1116,7 @@ chiamata ``list``::
             wmb();
             list->next = new;
 
-La funzione :c:func:`wmb()` è una barriera di sincronizzazione delle
+La funzione wmb() è una barriera di sincronizzazione delle
 scritture. Questa garantisce che la prima operazione (impostare l'elemento
 ``next`` del nuovo elemento) venga completata e vista da tutti i processori
 prima che venga eseguita la seconda operazione (che sarebbe quella di mettere
@@ -1127,7 +1127,7 @@ completamente il nuovo elemento; oppure che lo vedano correttamente e quindi
 il puntatore ``next`` deve puntare al resto della lista.
 
 Fortunatamente, c'è una funzione che fa questa operazione sulle liste
-:c:type:`struct list_head <list_head>`: :c:func:`list_add_rcu()`
+:c:type:`struct list_head <list_head>`: list_add_rcu()
 (``include/linux/list.h``).
 
 Rimuovere un elemento dalla lista è anche più facile: sostituiamo il puntatore
@@ -1138,7 +1138,7 @@ l'elemento o lo salteranno.
 
             list->next = old->next;
 
-La funzione :c:func:`list_del_rcu()` (``include/linux/list.h``) fa esattamente
+La funzione list_del_rcu() (``include/linux/list.h``) fa esattamente
 questo (la versione normale corrompe il vecchio oggetto, e non vogliamo che
 accada).
 
@@ -1146,9 +1146,9 @@ Anche i lettori devono stare attenti: alcuni processori potrebbero leggere
 attraverso il puntatore ``next`` il contenuto dell'elemento successivo
 troppo presto, ma non accorgersi che il contenuto caricato è sbagliato quando
 il puntatore ``next`` viene modificato alla loro spalle. Ancora una volta
-c'è una funzione che viene in vostro aiuto :c:func:`list_for_each_entry_rcu()`
+c'è una funzione che viene in vostro aiuto list_for_each_entry_rcu()
 (``include/linux/list.h``). Ovviamente, gli scrittori possono usare
-:c:func:`list_for_each_entry()` dato che non ci possono essere due scrittori
+list_for_each_entry() dato che non ci possono essere due scrittori
 in contemporanea.
 
 Il nostro ultimo dilemma è il seguente: quando possiamo realmente distruggere
@@ -1156,15 +1156,15 @@ l'elemento rimosso? Ricordate, un lettore potrebbe aver avuto accesso a questo
 elemento proprio ora: se eliminiamo questo elemento ed il puntatore ``next``
 cambia, il lettore salterà direttamente nella spazzatura e scoppierà. Dobbiamo
 aspettare finché tutti i lettori che stanno attraversando la lista abbiano
-finito. Utilizziamo :c:func:`call_rcu()` per registrare una funzione di
+finito. Utilizziamo call_rcu() per registrare una funzione di
 richiamo che distrugga l'oggetto quando tutti i lettori correnti hanno
 terminato. In alternative, potrebbe essere usata la funzione
-:c:func:`synchronize_rcu()` che blocca l'esecuzione finché tutti i lettori
+synchronize_rcu() che blocca l'esecuzione finché tutti i lettori
 non terminano di ispezionare la lista.
 
 Ma come fa l'RCU a sapere quando i lettori sono finiti? Il meccanismo è
 il seguente: innanzi tutto i lettori accedono alla lista solo fra la coppia
-:c:func:`rcu_read_lock()`/:c:func:`rcu_read_unlock()` che disabilita la
+rcu_read_lock()/rcu_read_unlock() che disabilita la
 prelazione così che i lettori non vengano sospesi mentre stanno leggendo
 la lista.
 
@@ -1253,12 +1253,12 @@ codice RCU è un po' più ottimizzato di così, ma questa è l'idea di fondo.
      }
 
 Da notare che i lettori modificano il campo popularity nella funzione
-:c:func:`__cache_find()`, e ora non trattiene alcun *lock*. Una soluzione
+__cache_find(), e ora non trattiene alcun *lock*. Una soluzione
 potrebbe essere quella di rendere la variabile ``atomic_t``, ma per l'uso
 che ne abbiamo fatto qui, non ci interessano queste corse critiche perché un
 risultato approssimativo è comunque accettabile, quindi non l'ho cambiato.
 
-Il risultato è che la funzione :c:func:`cache_find()` non ha bisogno di alcuna
+Il risultato è che la funzione cache_find() non ha bisogno di alcuna
 sincronizzazione con le altre funzioni, quindi è veloce su un sistema
 multi-processore tanto quanto lo sarebbe su un sistema mono-processore.
 
@@ -1271,9 +1271,9 @@ riferimenti.
 
 Ora, dato che il '*lock* di lettura' di un RCU non fa altro che disabilitare
 la prelazione, un chiamante che ha sempre la prelazione disabilitata fra le
-chiamate :c:func:`cache_find()` e :c:func:`object_put()` non necessita
+chiamate cache_find() e object_put() non necessita
 di incrementare e decrementare il contatore di riferimenti. Potremmo
-esporre la funzione :c:func:`__cache_find()` dichiarandola non-static,
+esporre la funzione __cache_find() dichiarandola non-static,
 e quel chiamante potrebbe usare direttamente questa funzione.
 
 Il beneficio qui sta nel fatto che il contatore di riferimenti no
@@ -1293,10 +1293,10 @@ singolo contatore. Facile e pulito.
 Se questo dovesse essere troppo lento (solitamente non lo è, ma se avete
 dimostrato che lo è devvero), potreste usare un contatore per ogni processore
 e quindi non sarebbe più necessaria la mutua esclusione. Vedere
-:c:func:`DEFINE_PER_CPU()`, :c:func:`get_cpu_var()` e :c:func:`put_cpu_var()`
+DEFINE_PER_CPU(), get_cpu_var() e put_cpu_var()
 (``include/linux/percpu.h``).
 
-Il tipo di dato ``local_t``, la funzione :c:func:`cpu_local_inc()` e tutte
+Il tipo di dato ``local_t``, la funzione cpu_local_inc() e tutte
 le altre funzioni associate, sono di particolare utilità per semplici contatori
 per-processore; su alcune architetture sono anche più efficienti
 (``include/asm/local.h``).
@@ -1324,11 +1324,11 @@ da un'interruzione software. Il gestore d'interruzione non utilizza alcun
         enable_irq(irq);
         spin_unlock(&lock);
 
-La funzione :c:func:`disable_irq()` impedisce al gestore d'interruzioni
+La funzione disable_irq() impedisce al gestore d'interruzioni
 d'essere eseguito (e aspetta che finisca nel caso fosse in esecuzione su
 un altro processore). Lo spinlock, invece, previene accessi simultanei.
 Naturalmente, questo è più lento della semplice chiamata
-:c:func:`spin_lock_irq()`, quindi ha senso solo se questo genere di accesso
+spin_lock_irq(), quindi ha senso solo se questo genere di accesso
 è estremamente raro.
 
 .. _`it_sleeping-things`:
@@ -1336,7 +1336,7 @@ Naturalmente, questo è più lento della semplice chiamata
 Quali funzioni possono essere chiamate in modo sicuro dalle interruzioni?
 =========================================================================
 
-Molte funzioni del kernel dormono (in sostanza, chiamano ``schedule()``)
+Molte funzioni del kernel dormono (in sostanza, chiamano schedule())
 direttamente od indirettamente: non potete chiamarle se trattenere uno
 spinlock o avete la prelazione disabilitata, mai. Questo significa che
 dovete necessariamente essere nel contesto utente: chiamarle da un
@@ -1354,23 +1354,23 @@ dormire.
 
 -  Accessi allo spazio utente:
 
-   -  :c:func:`copy_from_user()`
+   -  copy_from_user()
 
-   -  :c:func:`copy_to_user()`
+   -  copy_to_user()
 
-   -  :c:func:`get_user()`
+   -  get_user()
 
-   -  :c:func:`put_user()`
+   -  put_user()
 
--  :c:func:`kmalloc(GFP_KERNEL) <kmalloc>`
+-  kmalloc(GFP_KERNEL) <kmalloc>`
 
--  :c:func:`mutex_lock_interruptible()` and
-   :c:func:`mutex_lock()`
+-  mutex_lock_interruptible() and
+   mutex_lock()
 
-   C'è anche :c:func:`mutex_trylock()` che però non dorme.
+   C'è anche mutex_trylock() che però non dorme.
    Comunque, non deve essere usata in un contesto d'interruzione dato
    che la sua implementazione non è sicura in quel contesto.
-   Anche :c:func:`mutex_unlock()` non dorme mai. Non può comunque essere
+   Anche mutex_unlock() non dorme mai. Non può comunque essere
    usata in un contesto d'interruzione perché un mutex deve essere rilasciato
    dallo stesso processo che l'ha acquisito.
 
@@ -1380,11 +1380,11 @@ Alcune funzioni che non dormono
 Alcune funzioni possono essere chiamate tranquillamente da qualsiasi
 contesto, o trattenendo un qualsiasi *lock*.
 
--  :c:func:`printk()`
+-  printk()
 
--  :c:func:`kfree()`
+-  kfree()
 
--  :c:func:`add_timer()` e :c:func:`del_timer()`
+-  add_timer() e del_timer()
 
 Riferimento per l'API dei Mutex
 ===============================
@@ -1444,14 +1444,14 @@ prelazione
 bh
   Bottom Half: per ragioni storiche, le funzioni che contengono '_bh' nel
   loro nome ora si riferiscono a qualsiasi interruzione software; per esempio,
-  :c:func:`spin_lock_bh()` blocca qualsiasi interuzione software sul processore
+  spin_lock_bh() blocca qualsiasi interuzione software sul processore
   corrente. I *Bottom Halves* sono deprecati, e probabilmente verranno
   sostituiti dai tasklet. In un dato momento potrà esserci solo un
   *bottom half* in esecuzione.
 
 contesto d'interruzione
   Non è il contesto utente: qui si processano le interruzioni hardware e
-  software. La macro :c:func:`in_interrupt()` ritorna vero.
+  software. La macro in_interrupt() ritorna vero.
 
 contesto utente
   Il kernel che esegue qualcosa per conto di un particolare processo (per
@@ -1461,12 +1461,12 @@ contesto utente
   che hardware.
 
 interruzione hardware
-  Richiesta di interruzione hardware. :c:func:`in_irq()` ritorna vero in un
+  Richiesta di interruzione hardware. in_irq() ritorna vero in un
   gestore d'interruzioni hardware.
 
 interruzione software / softirq
-  Gestore di interruzioni software: :c:func:`in_irq()` ritorna falso;
-  :c:func:`in_softirq()` ritorna vero. I tasklet e le softirq sono entrambi
+  Gestore di interruzioni software: in_irq() ritorna falso;
+  in_softirq() ritorna vero. I tasklet e le softirq sono entrambi
   considerati 'interruzioni software'.
 
   In soldoni, un softirq è uno delle 32 interruzioni software che possono
diff --git a/Documentation/translations/it_IT/process/2.Process.rst b/Documentation/translations/it_IT/process/2.Process.rst
index 9af4d01617c4..30dc172f06b0 100644
--- a/Documentation/translations/it_IT/process/2.Process.rst
+++ b/Documentation/translations/it_IT/process/2.Process.rst
@@ -23,18 +23,18 @@ ogni due o tre mesi viene effettuata un rilascio importante del kernel.
 I rilasci più recenti sono stati:
 
 	======  =================
-	4.11	Aprile 30, 2017
-	4.12	Luglio 2, 2017
-	4.13	Settembre 3, 2017
-	4.14	Novembre 12, 2017
-	4.15	Gennaio 28, 2018
-	4.16	Aprile 1, 2018
+	5.0     3 marzo, 2019
+	5.1     5 maggio, 2019
+	5.2     7 luglio, 2019
+	5.3     15 settembre, 2019
+	5.4     24 novembre, 2019
+	5.5     6 gennaio, 2020
 	======  =================
 
-Ciascun rilascio 4.x è un importante rilascio del kernel con nuove
+Ciascun rilascio 5.x è un importante rilascio del kernel con nuove
 funzionalità, modifiche interne dell'API, e molto altro.  Un tipico
-rilascio 4.x contiene quasi 13,000 gruppi di modifiche con ulteriori
-modifiche a parecchie migliaia di linee di codice.  La 4.x. è pertanto la
+rilascio contiene quasi 13,000 gruppi di modifiche con ulteriori
+modifiche a parecchie migliaia di linee di codice.  La 5.x. è pertanto la
 linea di confine nello sviluppo del kernel Linux; il kernel utilizza un sistema
 di sviluppo continuo che integra costantemente nuove importanti modifiche.
 
@@ -55,8 +55,8 @@ verrà descritto dettagliatamente più avanti).
 La finestra di inclusione resta attiva approssimativamente per due settimane.
 Al termine di questo periodo, Linus Torvald dichiarerà che la finestra è
 chiusa e rilascerà il primo degli "rc" del kernel.
-Per il kernel che è destinato ad essere 2.6.40, per esempio, il rilascio
-che emerge al termine della finestra d'inclusione si chiamerà 2.6.40-rc1.
+Per il kernel che è destinato ad essere 5.6, per esempio, il rilascio
+che emerge al termine della finestra d'inclusione si chiamerà 5.6-rc1.
 Questo rilascio indica che il momento di aggiungere nuovi componenti è
 passato, e che è iniziato il periodo di stabilizzazione del prossimo kernel.
 
@@ -76,22 +76,23 @@ Mentre le correzioni si aprono la loro strada all'interno del ramo principale,
 il ritmo delle modifiche rallenta col tempo.  Linus rilascia un nuovo
 kernel -rc circa una volta alla settimana; e ne usciranno circa 6 o 9 prima
 che il kernel venga considerato sufficientemente stabile e che il rilascio
-finale 2.6.x venga fatto.  A quel punto tutto il processo ricomincerà.
+finale venga fatto.  A quel punto tutto il processo ricomincerà.
 
-Esempio: ecco com'è andato il ciclo di sviluppo della versione 4.16
+Esempio: ecco com'è andato il ciclo di sviluppo della versione 5.4
 (tutte le date si collocano nel 2018)
 
 
 	==============  =======================================
-	Gennaio 28	4.15 rilascio stabile
-	Febbraio 11	4.16-rc1, finestra di inclusione chiusa
-	Febbraio 18	4.16-rc2
-	Febbraio 25	4.16-rc3
-	Marzo 4		4.16-rc4
-	Marzo 11	4.16-rc5
-	Marzo 18	4.16-rc6
-	Marzo 25	4.16-rc7
-	Aprile 1		4.17 rilascio stabile
+	15 settembre	5.3 rilascio stabile
+	30 settembre	5.4-rc1, finestra di inclusione chiusa
+	6 ottobre	5.4-rc2
+	13 ottobre	5.4-rc3
+	20 ottobre	5.4-rc4
+	27 ottobre	5.4-rc5
+	3 novembre	5.4-rc6
+	10 novembre	5.4-rc7
+	17 novembre	5.4-rc8
+	24 novembre	5.4 rilascio stabile
 	==============  =======================================
 
 In che modo gli sviluppatori decidono quando chiudere il ciclo di sviluppo e
@@ -108,43 +109,44 @@ tipo di perfezione difficilmente viene raggiunta; esistono troppe variabili
 in un progetto di questa portata.  Arriva un punto dove ritardare il rilascio
 finale peggiora la situazione; la quantità di modifiche in attesa della
 prossima finestra di inclusione crescerà enormemente, creando ancor più
-regressioni al giro successivo.  Quindi molti kernel 4.x escono con una
+regressioni al giro successivo.  Quindi molti kernel 5.x escono con una
 manciata di regressioni delle quali, si spera, nessuna è grave.
 
 Una volta che un rilascio stabile è fatto, il suo costante mantenimento è
 affidato al "squadra stabilità", attualmente composta da Greg Kroah-Hartman.
 Questa squadra rilascia occasionalmente degli aggiornamenti relativi al
-rilascio stabile usando la numerazione 4.x.y.  Per essere presa in
+rilascio stabile usando la numerazione 5.x.y.  Per essere presa in
 considerazione per un rilascio d'aggiornamento, una modifica deve:
 (1) correggere un baco importante (2) essere già inserita nel ramo principale
 per il prossimo sviluppo del kernel.  Solitamente, passato il loro rilascio
 iniziale, i kernel ricevono aggiornamenti per più di un ciclo di sviluppo.
-Quindi, per esempio, la storia del kernel 4.13 appare così:
+Quindi, per esempio, la storia del kernel 5.2 appare così (anno 2019):
 
 	==============  ===============================
-	Settembre 3 	4.13 rilascio stabile
-	Settembre 13	4.13.1
-	Settembre 20	4.13.2
-	Settembre 27	4.13.3
-	Ottobre 5	4.13.4
-	Ottobre 12	4.13.5
+	15 settembre	5.2 rilascio stabile FIXME settembre è sbagliato
+	14 luglio	5.2.1
+	21 luglio	5.2.2
+	26 luglio	5.2.3
+	28 luglio	5.2.4
+	31 luglio	5.2.5
 	...		...
-	Novembre 24	4.13.16
+	11 ottobre	5.2.21
 	==============  ===============================
 
-La 4.13.16 fu l'aggiornamento finale per la versione 4.13.
+La 5.2.21 fu l'aggiornamento finale per la versione 5.2.
 
 Alcuni kernel sono destinati ad essere kernel a "lungo termine"; questi
 riceveranno assistenza per un lungo periodo di tempo.  Al momento in cui
 scriviamo, i manutentori dei kernel stabili a lungo termine sono:
 
-	======  ======================  ==========================================
-	3.16	Ben Hutchings		(kernel stabile molto più a lungo termine)
-	4.1	Sasha Levin
-	4.4	Greg Kroah-Hartman	(kernel stabile molto più a lungo termine)
-	4.9	Greg Kroah-Hartman
-	4.14	Greg Kroah-Hartman
-	======  ======================  ==========================================
+	======  ================================  ==========================================
+	3.16	Ben Hutchings			  (kernel stabile molto più a lungo termine)
+	4.4	Greg Kroah-Hartman e Sasha Levin  (kernel stabile molto più a lungo termine)
+	4.9	Greg Kroah-Hartman e Sasha Levin
+	4.14	Greg Kroah-Hartman e Sasha Levin
+	4.19	Greg Kroah-Hartman e Sasha Levin
+	5.4i	Greg Kroah-Hartman e Sasha Levin
+	======  ================================  ==========================================
 
 
 Questa selezione di kernel di lungo periodo sono puramente dovuti ai loro
@@ -229,12 +231,13 @@ Come le modifiche finiscono nel Kernel
 --------------------------------------
 
 Esiste una sola persona che può inserire le patch nel repositorio principale
-del kernel: Linus Torvalds.  Ma, di tutte le 9500 patch che entrarono nella
-versione 2.6.38 del kernel, solo 112 (circa l'1,3%) furono scelte direttamente
-da Linus in persona.  Il progetto del kernel è cresciuto fino a raggiungere
-una dimensione tale per cui un singolo sviluppatore non può controllare e
-selezionare indipendentemente ogni modifica senza essere supportato.
-La via scelta dagli sviluppatori per indirizzare tale crescita è stata quella
+del kernel: Linus Torvalds.  Ma, per esempio, di tutte le 9500 patch
+che entrarono nella versione 2.6.38 del kernel, solo 112 (circa
+l'1,3%) furono scelte direttamente da Linus in persona.  Il progetto
+del kernel è cresciuto fino a raggiungere una dimensione tale per cui
+un singolo sviluppatore non può controllare e selezionare
+indipendentemente ogni modifica senza essere supportato.  La via
+scelta dagli sviluppatori per indirizzare tale crescita è stata quella
 di utilizzare un sistema di "sottotenenti" basato sulla fiducia.
 
 Il codice base del kernel è spezzato in una serie si sottosistemi: rete,
diff --git a/Documentation/translations/it_IT/process/coding-style.rst b/Documentation/translations/it_IT/process/coding-style.rst
index 8725f2b9e960..6f4f85832dee 100644
--- a/Documentation/translations/it_IT/process/coding-style.rst
+++ b/Documentation/translations/it_IT/process/coding-style.rst
@@ -313,7 +313,7 @@ che conta gli utenti attivi, dovreste chiamarla ``count_active_users()`` o
 qualcosa di simile, **non** dovreste chiamarla ``cntusr()``.
 
 Codificare il tipo di funzione nel suo nome (quella cosa chiamata notazione
-ungherese) fa male al cervello - il compilatore conosce comunque il tipo e
+ungherese) è stupido - il compilatore conosce comunque il tipo e
 può verificarli, e inoltre confonde i programmatori.  Non c'è da
 sorprendersi che MicroSoft faccia programmi bacati.
 
@@ -825,8 +825,8 @@ linguaggio assembler.
 
 Agli sviluppatori del kernel piace essere visti come dotti. Tenete un occhio
 di riguardo per l'ortografia e farete una belle figura. In inglese, evitate
-l'uso di parole mozzate come ``dont``: usate ``do not`` oppure ``don't``.
-Scrivete messaggi concisi, chiari, e inequivocabili.
+l'uso incorretto di abbreviazioni come ``dont``: usate ``do not`` oppure
+``don't``.  Scrivete messaggi concisi, chiari, e inequivocabili.
 
 I messaggi del kernel non devono terminare con un punto fermo.
 
diff --git a/Documentation/translations/it_IT/process/deprecated.rst b/Documentation/translations/it_IT/process/deprecated.rst
index 776f26732a94..e108eaf82cf6 100644
--- a/Documentation/translations/it_IT/process/deprecated.rst
+++ b/Documentation/translations/it_IT/process/deprecated.rst
@@ -34,6 +34,33 @@ interfaccia come 'vecchia', questa non è una soluzione completa. L'interfaccia
 deve essere rimossa dal kernel, o aggiunta a questo documento per scoraggiarne
 l'uso.
 
+BUG() e BUG_ON()
+----------------
+Al loro posto usate WARN() e WARN_ON() per gestire le
+condizioni "impossibili" e gestitele come se fosse possibile farlo.
+Nonostante le funzioni della famiglia BUG() siano state progettate
+per asserire "situazioni impossibili" e interrompere in sicurezza un
+thread del kernel, queste si sono rivelate essere troppo rischiose
+(per esempio, in quale ordine rilasciare i *lock*? Ci sono stati che
+sono stati ripristinati?). Molto spesso l'uso di BUG()
+destabilizza il sistema o lo corrompe del tutto, il che rende
+impossibile un'attività di debug o anche solo leggere un rapporto
+circa l'errore.  Linus ha un'opinione molto critica al riguardo:
+`email 1
+<https://lore.kernel.org/lkml/CA+55aFy6jNLsywVYdGp83AMrXBo_P-pkjkphPGrO=82SPKCpLQ@mail.gmail.com/>`_,
+`email 2
+<https://lore.kernel.org/lkml/CAHk-=whDHsbK3HTOpTF=ue_o04onRwTEaK_ZoJp_fjbqq4+=Jw@mail.gmail.com/>`_
+
+Tenete presente che la famiglia di funzioni WARN() dovrebbe essere
+usato solo per situazioni che si suppone siano "impossibili".  Se
+volete avvisare gli utenti riguardo a qualcosa di possibile anche se
+indesiderato, usare le funzioni della famiglia pr_warn().  Chi
+amministra il sistema potrebbe aver attivato l'opzione sysctl
+*panic_on_warn* per essere sicuri che il sistema smetta di funzionare
+in caso si verifichino delle condizioni "inaspettate". (per esempio,
+date un'occhiata al questo `commit
+<https://git.kernel.org/linus/d4689846881d160a4d12a514e991a740bcb5d65a>`_)
+
 Calcoli codificati negli argomenti di un allocatore
 ----------------------------------------------------
 Il calcolo dinamico delle dimensioni (specialmente le moltiplicazioni) non
@@ -68,52 +95,81 @@ Invece, usate la seguente funzione::
 
 	header = kzalloc(struct_size(header, item, count), GFP_KERNEL);
 
-Per maggiori dettagli fate riferimento a :c:func:`array_size`,
-:c:func:`array3_size`, e :c:func:`struct_size`, così come la famiglia di
-funzioni :c:func:`check_add_overflow` e :c:func:`check_mul_overflow`.
+Per maggiori dettagli fate riferimento a array_size(),
+array3_size(), e struct_size(), così come la famiglia di
+funzioni check_add_overflow() e check_mul_overflow().
 
 simple_strtol(), simple_strtoll(), simple_strtoul(), simple_strtoull()
 ----------------------------------------------------------------------
-Le funzioni :c:func:`simple_strtol`, :c:func:`simple_strtoll`,
-:c:func:`simple_strtoul`, e :c:func:`simple_strtoull` ignorano volutamente
+Le funzioni simple_strtol(), simple_strtoll(),
+simple_strtoul(), e simple_strtoull() ignorano volutamente
 i possibili overflow, e questo può portare il chiamante a generare risultati
-inaspettati. Le rispettive funzioni :c:func:`kstrtol`, :c:func:`kstrtoll`,
-:c:func:`kstrtoul`, e :c:func:`kstrtoull` sono da considerarsi le corrette
+inaspettati. Le rispettive funzioni kstrtol(), kstrtoll(),
+kstrtoul(), e kstrtoull() sono da considerarsi le corrette
 sostitute; tuttavia va notato che queste richiedono che la stringa sia
 terminata con il carattere NUL o quello di nuova riga.
 
 strcpy()
 --------
-La funzione :c:func:`strcpy` non fa controlli agli estremi del buffer
+La funzione strcpy() non fa controlli agli estremi del buffer
 di destinazione. Questo può portare ad un overflow oltre i limiti del
 buffer e generare svariati tipi di malfunzionamenti. Nonostante l'opzione
 `CONFIG_FORTIFY_SOURCE=y` e svariate opzioni del compilatore aiutano
 a ridurne il rischio, non c'è alcuna buona ragione per continuare ad usare
-questa funzione. La versione sicura da usare è :c:func:`strscpy`.
+questa funzione. La versione sicura da usare è strscpy().
 
 strncpy() su stringe terminate con NUL
 --------------------------------------
-L'utilizzo di :c:func:`strncpy` non fornisce alcuna garanzia sul fatto che
+L'utilizzo di strncpy() non fornisce alcuna garanzia sul fatto che
 il buffer di destinazione verrà terminato con il carattere NUL. Questo
 potrebbe portare a diversi overflow di lettura o altri malfunzionamenti
 causati, appunto, dalla mancanza del terminatore. Questa estende la
 terminazione nel buffer di destinazione quando la stringa d'origine è più
 corta; questo potrebbe portare ad una penalizzazione delle prestazioni per
 chi usa solo stringe terminate. La versione sicura da usare è
-:c:func:`strscpy`. (chi usa :c:func:`strscpy` e necessita di estendere la
-terminazione con NUL deve aggiungere una chiamata a :c:func:`memset`)
+strscpy(). (chi usa strscpy() e necessita di estendere la
+terminazione con NUL deve aggiungere una chiamata a memset())
 
-Se il chiamate no usa stringhe terminate con NUL, allore :c:func:`strncpy()`
+Se il chiamate no usa stringhe terminate con NUL, allore strncpy()()
 può continuare ad essere usata, ma i buffer di destinazione devono essere
 marchiati con l'attributo `__nonstring <https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html>`_
 per evitare avvisi durante la compilazione.
 
 strlcpy()
 ---------
-La funzione :c:func:`strlcpy`, per prima cosa, legge interamente il buffer di
+La funzione strlcpy(), per prima cosa, legge interamente il buffer di
 origine, magari leggendo più di quanto verrà effettivamente copiato. Questo
 è inefficiente e può portare a overflow di lettura quando la stringa non è
-terminata con NUL. La versione sicura da usare è :c:func:`strscpy`.
+terminata con NUL. La versione sicura da usare è strscpy().
+
+Segnaposto %p nella stringa di formato
+--------------------------------------
+
+Tradizionalmente, l'uso del segnaposto "%p" nella stringa di formato
+esponne un indirizzo di memoria in dmesg, proc, sysfs, eccetera.  Per
+evitare che questi indirizzi vengano sfruttati da malintenzionati,
+tutto gli usi di "%p" nel kernel rappresentano l'hash dell'indirizzo,
+rendendolo di fatto inutilizzabile.  Nuovi usi di "%p" non dovrebbero
+essere aggiunti al kernel.  Per una rappresentazione testuale di un
+indirizzo usate "%pS", l'output è migliore perché mostrerà il nome del
+simbolo.  Per tutto il resto, semplicemente non usate "%p".
+
+Parafrasando la `guida
+<https://lore.kernel.org/lkml/CA+55aFwQEd_d40g4mUCSsVRZzrFPUJt74vc6PPpb675hYNXcKw@mail.gmail.com/>`_
+di Linus:
+
+- Se il valore hash di "%p" è inutile, chiediti se il puntatore stesso
+  è importante. Forse dovrebbe essere rimosso del tutto?
+- Se credi davvero che il vero valore del puntatore sia importante,
+  perché alcuni stati del sistema o i livelli di privilegi di un
+  utente sono considerati "special"? Se pensi di poterlo giustificare
+  (in un commento e nel messaggio del commit) abbastanza bene da
+  affrontare il giudizio di Linus, allora forse potrai usare "%px",
+  assicurandosi anche di averne il permesso.
+
+Infine, sappi che un cambio in favore di "%p" con hash `non verrà
+accettato
+<https://lore.kernel.org/lkml/CA+55aFwieC1-nAs+NFq9RTwaR8ef9hWa4MjNBWL41F-8wM49eA@mail.gmail.com/>`_.
 
 Vettori a dimensione variabile (VLA)
 ------------------------------------
@@ -127,3 +183,47 @@ Questo può portare a dei malfunzionamenti, potrebbe sovrascrivere
 dati importanti alla fine dello stack (quando il kernel è compilato senza
 `CONFIG_THREAD_INFO_IN_TASK=y`), o sovrascrivere un pezzo di memoria adiacente
 allo stack (quando il kernel è compilato senza `CONFIG_VMAP_STACK=y`).
+
+Salto implicito nell'istruzione switch-case
+-------------------------------------------
+
+Il linguaggio C permette ai casi di un'istruzione `switch` di saltare al
+prossimo caso quando l'istruzione "break" viene omessa alla fine del caso
+corrente. Tuttavia questo rende il codice ambiguo perché non è sempre ovvio se
+l'istruzione "break" viene omessa intenzionalmente o è un baco. Per esempio,
+osservando il seguente pezzo di codice non è chiaro se lo stato
+`STATE_ONE` è stato progettato apposta per eseguire anche `STATE_TWO`::
+
+  switch (value) {
+  case STATE_ONE:
+          do_something();
+  case STATE_TWO:
+          do_other();
+          break;
+  default:
+          WARN("unknown state");
+  }
+
+Dato che c'è stata una lunga lista di problemi `dovuti alla mancanza dell'istruzione
+"break" <https://cwe.mitre.org/data/definitions/484.html>`_, oggigiorno non
+permettiamo più che vi sia un "salto implicito" (*fall-through*). Per
+identificare un salto implicito intenzionale abbiamo adottato la pseudo
+parola chiave 'fallthrough' che viene espansa nell'estensione di gcc
+`__attribute__((fallthrough))` `Statement Attributes
+<https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html>`_.
+(Quando la sintassi C17/C18 `[[fallthrough]]` sarà più comunemente
+supportata dai compilatori C, analizzatori statici, e dagli IDE,
+allora potremo usare quella sintassi per la pseudo parola chiave)
+
+Quando la sintassi [[fallthrough]] sarà più comunemente supportata dai
+compilatori, analizzatori statici, e ambienti di sviluppo IDE,
+allora potremo usarla anche noi.
+
+Ne consegue che tutti i blocchi switch/case devono finire in uno dei seguenti
+modi:
+
+* ``break;``
+* `fallthrough;``
+* ``continue;``
+* ``goto <label>;``
+* ``return [expression];``
-- 
cgit v1.2.3


From effa2cd84cb569cd20c9a0b7f04242569057358f Mon Sep 17 00:00:00 2001
From: Vitor Massaru Iha <vitor@massaru.org>
Date: Thu, 30 Apr 2020 17:36:12 -0300
Subject: docs: coresight: fix `make htmldocs` warning

Fix `make htmldocs` warning:

Documentation/trace/coresight/coresight-ect.rst:2: WARNING: Explicit markup ends without a blank line; unexpected unindent.

Signed-off-by: Vitor Massaru Iha <vitor@massaru.org>
Link: https://lore.kernel.org/r/20200430203612.90404-1-vitor@massaru.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/trace/coresight/coresight-ect.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/trace/coresight/coresight-ect.rst b/Documentation/trace/coresight/coresight-ect.rst
index ecc1e57012ef..a93e52abcf46 100644
--- a/Documentation/trace/coresight/coresight-ect.rst
+++ b/Documentation/trace/coresight/coresight-ect.rst
@@ -1,4 +1,5 @@
 .. SPDX-License-Identifier: GPL-2.0
+
 =============================================
 CoreSight Embedded Cross Trigger (CTI & CTM).
 =============================================
-- 
cgit v1.2.3


From 190e1597e5e07c7eb7edc0b040b007a59b1e4a84 Mon Sep 17 00:00:00 2001
From: Vitor Massaru Iha <vitor@massaru.org>
Date: Thu, 30 Apr 2020 19:12:38 -0300
Subject: docs: s390: Fix wrong label Guest2 instead of Guest3

This fixes:

Documentation/s390/vfio-ap.rst:488: WARNING: duplicate label s390/vfio-ap:guest2, other instance in /home/iha/sdb/opensource/lkmp/linux_doc/Documentation/s390/vfio-ap.rst

Signed-off-by: Vitor Massaru Iha <vitor@massaru.org>
Link: https://lore.kernel.org/r/20200430221238.101838-1-vitor@massaru.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/s390/vfio-ap.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/s390/vfio-ap.rst b/Documentation/s390/vfio-ap.rst
index b5c51f7c748d..367e27ec3c50 100644
--- a/Documentation/s390/vfio-ap.rst
+++ b/Documentation/s390/vfio-ap.rst
@@ -484,7 +484,7 @@ CARD.DOMAIN TYPE  MODE
 05.00ff     CEX5A Accelerator
 =========== ===== ============
 
-Guest2
+Guest3
 ------
 =========== ===== ============
 CARD.DOMAIN TYPE  MODE
-- 
cgit v1.2.3


From c624adc9cb6e6e8f1c0230a7775add700bc2409b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Apr 2020 23:25:33 +0200
Subject: samples: fix binderfs sample

A routine check for misspelled Kconfig symbols showed on instance
from last year, the correct symbol name is CONFIG_ANDROID_BINDERFS,
not CONFIG_CONFIG_ANDROID_BINDERFS, so the extra prefix must
be removed in the Kconfig file to allow enabling the sample.

As the actual sample fails to build as a kernel module, change the
Makefile enough to get to build as a hostprog instead.

Fixes: 9762dc1432e1 ("samples: add binderfs sample program")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Link: https://lore.kernel.org/r/20200428212555.2806258-1-arnd@arndb.de
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 samples/Kconfig           | 2 +-
 samples/binderfs/Makefile | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/samples/Kconfig b/samples/Kconfig
index 9d236c346de5..205076cf234e 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -171,7 +171,7 @@ config SAMPLE_VFIO_MDEV_MBOCHS
 
 config SAMPLE_ANDROID_BINDERFS
 	bool "Build Android binderfs example"
-	depends on CONFIG_ANDROID_BINDERFS
+	depends on ANDROID_BINDERFS
 	help
 	  Builds a sample program to illustrate the use of the Android binderfs
 	  filesystem.
diff --git a/samples/binderfs/Makefile b/samples/binderfs/Makefile
index ea4c93d36256..a3ac5476338a 100644
--- a/samples/binderfs/Makefile
+++ b/samples/binderfs/Makefile
@@ -1,2 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_SAMPLE_ANDROID_BINDERFS) += binderfs_example.o
+ifndef CROSS_COMPILE
+ifdef CONFIG_SAMPLE_ANDROID_BINDERFS
+hostprogs := binderfs_example
+endif
+endif
-- 
cgit v1.2.3


From 50cdae76b8d021f31ea07af3bb30983486b79df8 Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Wed, 29 Apr 2020 22:57:56 +0200
Subject: docs: sysctl/kernel: document ftrace entries

Based on the ftrace documentation, the tp_printk boot parameter
documentation, and the implementation in kernel/trace/trace.c.

Signed-off-by: Stephen Kitt <steve@sk2.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Link: https://lore.kernel.org/r/20200429205757.8677-1-steve@sk2.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/sysctl/kernel.rst | 48 +++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 706c2158ad25..5de73768f14e 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -265,6 +265,27 @@ domain names are in general different. For a detailed discussion
 see the ``hostname(1)`` man page.
 
 
+ftrace_dump_on_oops
+===================
+
+Determines whether ``ftrace_dump()`` should be called on an oops (or
+kernel panic). This will output the contents of the ftrace buffers to
+the console.  This is very useful for capturing traces that lead to
+crashes and outputting them to a serial console.
+
+= ===================================================
+0 Disabled (default).
+1 Dump buffers of all CPUs.
+2 Dump the buffer of the CPU that triggered the oops.
+= ===================================================
+
+
+ftrace_enabled, stack_tracer_enabled
+====================================
+
+See :doc:`/trace/ftrace`.
+
+
 hardlockup_all_cpu_backtrace
 ============================
 
@@ -1191,6 +1212,33 @@ If a value outside of this range is written to ``threads-max`` an
 ``EINVAL`` error occurs.
 
 
+traceoff_on_warning
+===================
+
+When set, disables tracing (see :doc:`/trace/ftrace`) when a
+``WARN()`` is hit.
+
+
+tracepoint_printk
+=================
+
+When tracepoints are sent to printk() (enabled by the ``tp_printk``
+boot parameter), this entry provides runtime control::
+
+    echo 0 > /proc/sys/kernel/tracepoint_printk
+
+will stop tracepoints from being sent to printk(), and::
+
+    echo 1 > /proc/sys/kernel/tracepoint_printk
+
+will send them to printk() again.
+
+This only works if the kernel was booted with ``tp_printk`` enabled.
+
+See :doc:`/admin-guide/kernel-parameters` and
+:doc:`/trace/boottime-trace`.
+
+
 unknown_nmi_panic
 =================
 
-- 
cgit v1.2.3


From d75829c182609f9cc6ebdbab486500f638a871df Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Wed, 29 Apr 2020 22:57:57 +0200
Subject: docs: sysctl/kernel: document firmware_config

Based on the firmware fallback mechanisms documentation and the
implementation in drivers/base/firmware_loader/fallback.c.

Signed-off-by: Stephen Kitt <steve@sk2.org>
Link: https://lore.kernel.org/r/20200429205757.8677-2-steve@sk2.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/sysctl/kernel.rst | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 5de73768f14e..eb6bc9cc0318 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -265,6 +265,19 @@ domain names are in general different. For a detailed discussion
 see the ``hostname(1)`` man page.
 
 
+firmware_config
+===============
+
+See :doc:`/driver-api/firmware/fallback-mechanisms`.
+
+The entries in this directory allow the firmware loader helper
+fallback to be controlled:
+
+* ``force_sysfs_fallback``, when set to 1, forces the use of the
+  fallback;
+* ``ignore_sysfs_fallback``, when set to 1, ignores any fallback.
+
+
 ftrace_dump_on_oops
 ===================
 
-- 
cgit v1.2.3


From 649f9cf5f5964a9b26d8407289c356624652162a Mon Sep 17 00:00:00 2001
From: Federico Vaga <federico.vaga@vaga.pv.it>
Date: Sun, 26 Apr 2020 23:14:29 +0200
Subject: doc:locking: remove info about old behavior of locktorture

It is not useful to know what was the default at some point in the
past: remove the information.

Signed-off-by: Federico Vaga <federico.vaga@vaga.pv.it>
Link: https://lore.kernel.org/r/20200426211429.29133-1-federico.vaga@vaga.pv.it
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/locking/locktorture.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/locking/locktorture.rst b/Documentation/locking/locktorture.rst
index 5bcb99ba7bd9..8012a74555e7 100644
--- a/Documentation/locking/locktorture.rst
+++ b/Documentation/locking/locktorture.rst
@@ -110,7 +110,7 @@ stutter
 		  same period of time.  Defaults to "stutter=5", so as
 		  to run and pause for (roughly) five-second intervals.
 		  Specifying "stutter=0" causes the test to run continuously
-		  without pausing, which is the old default behavior.
+		  without pausing.
 
 shuffle_interval
 		  The number of seconds to keep the test threads affinitied
-- 
cgit v1.2.3


From 56b62540782bfde459acc8eb15b949eaf151c881 Mon Sep 17 00:00:00 2001
From: Wang Wenhu <wenhu.wang@vivo.com>
Date: Wed, 29 Apr 2020 06:11:09 -0700
Subject: doc: thermal: add cpu-idle-cooling to index tree

Add cpu-idle-cooling.rst to the index list for user reference through
self-compiled htmldocs or online documentation tree of kernel.

Like: https://www.kernel.org/doc/html/latest/driver-api/thermal/index.html

Signed-off-by: Wang Wenhu <wenhu.wang@vivo.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20200429131109.80891-1-wenhu.wang@vivo.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/thermal/cpu-idle-cooling.rst | 3 +++
 Documentation/driver-api/thermal/index.rst            | 1 +
 2 files changed, 4 insertions(+)

diff --git a/Documentation/driver-api/thermal/cpu-idle-cooling.rst b/Documentation/driver-api/thermal/cpu-idle-cooling.rst
index a1c3edecae00..b9f34ceb2a38 100644
--- a/Documentation/driver-api/thermal/cpu-idle-cooling.rst
+++ b/Documentation/driver-api/thermal/cpu-idle-cooling.rst
@@ -1,3 +1,6 @@
+================
+CPU Idle Cooling
+================
 
 Situation:
 ----------
diff --git a/Documentation/driver-api/thermal/index.rst b/Documentation/driver-api/thermal/index.rst
index 5ba61d19c6ae..4cb0b9b6bfb8 100644
--- a/Documentation/driver-api/thermal/index.rst
+++ b/Documentation/driver-api/thermal/index.rst
@@ -8,6 +8,7 @@ Thermal
    :maxdepth: 1
 
    cpu-cooling-api
+   cpu-idle-cooling
    sysfs-api
    power_allocator
 
-- 
cgit v1.2.3


From 2f4c33063ad713e3a5b63002cf8362846e78bd71 Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Fri, 15 May 2020 18:02:22 +0200
Subject: docs: sysctl/kernel: document ngroups_max

This is a read-only export of NGROUPS_MAX, so this patch also changes
the declarations in kernel/sysctl.c to const.

Signed-off-by: Stephen Kitt <steve@sk2.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20200515160222.7994-1-steve@sk2.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/sysctl/kernel.rst | 9 +++++++++
 kernel/sysctl.c                             | 4 ++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index eb6bc9cc0318..6b313b6d42d5 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -517,6 +517,15 @@ Notes:
      successful IPC object allocation. If an IPC object allocation syscall
      fails, it is undefined if the value remains unmodified or is reset to -1.
 
+
+ngroups_max
+===========
+
+Maximum number of supplementary groups, _i.e._ the maximum size which
+``setgroups`` will accept. Exports ``NGROUPS_MAX`` from the kernel.
+
+
+
 nmi_watchdog
 ============
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8a176d8727a3..2ba9f449d273 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -146,7 +146,7 @@ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
 static int maxolduid = 65535;
 static int minolduid;
 
-static int ngroups_max = NGROUPS_MAX;
+static const int ngroups_max = NGROUPS_MAX;
 static const int cap_last_cap = CAP_LAST_CAP;
 
 /*
@@ -883,7 +883,7 @@ static struct ctl_table kern_table[] = {
 #endif
 	{
 		.procname	= "ngroups_max",
-		.data		= &ngroups_max,
+		.data		= (void *)&ngroups_max,
 		.maxlen		= sizeof (int),
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
-- 
cgit v1.2.3


From 036168e3df3688ff4bf0836f72f68da490fe8c47 Mon Sep 17 00:00:00 2001
From: Chucheng Luo <luochucheng@vivo.com>
Date: Sat, 9 May 2020 01:05:35 -0700
Subject: doc: zh_CN: add translatation for debugfs.txt

Translate Documentation/filesystems/debugfs.txt into Chinese.

Signed-off-by: Chucheng Luo <luochucheng@vivo.com>
Reviewed-by: Alex Shi <alex.shi@linux.alibaba.com>
Signed-off-by: Wang Wenhu <wenhu.wang@vivo.com>
[rebase for upstream conflict and minor change of subject]
Link: https://lore.kernel.org/linux-doc/20200408021528.14062-1-luochucheng@vivo.com/
Link: https://lore.kernel.org/r/20200509080535.7625-1-wenhu.wang@vivo.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/filesystems/debugfs.rst     | 221 +++++++++++++++++++++
 .../translations/zh_CN/filesystems/index.rst       |   1 +
 2 files changed, 222 insertions(+)
 create mode 100644 Documentation/translations/zh_CN/filesystems/debugfs.rst

diff --git a/Documentation/translations/zh_CN/filesystems/debugfs.rst b/Documentation/translations/zh_CN/filesystems/debugfs.rst
new file mode 100644
index 000000000000..f8a28793c277
--- /dev/null
+++ b/Documentation/translations/zh_CN/filesystems/debugfs.rst
@@ -0,0 +1,221 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: :ref:`Documentation/filesystems/debugfs.txt <debugfs_index>`
+
+=======
+Debugfs
+=======
+
+译者
+::
+
+	中文版维护者： 罗楚成 Chucheng Luo <luochucheng@vivo.com>
+	中文版翻译者： 罗楚成 Chucheng Luo <luochucheng@vivo.com>
+	中文版校译者:  罗楚成 Chucheng Luo <luochucheng@vivo.com>
+
+
+
+版权所有2020 罗楚成 <luochucheng@vivo.com>
+
+
+Debugfs是内核开发人员在用户空间获取信息的简单方法。与/proc不同，proc只提供进程
+信息。也不像sysfs,具有严格的“每个文件一个值“的规则。debugfs根本没有规则,开发
+人员可以在这里放置他们想要的任何信息。debugfs文件系统也不能用作稳定的ABI接口。
+从理论上讲，debugfs导出文件的时候没有任何约束。但是[1]实际情况并不总是那么
+简单。即使是debugfs接口，也最好根据需要进行设计,并尽量保持接口不变。
+
+
+Debugfs通常使用以下命令安装::
+
+    mount -t debugfs none /sys/kernel/debug
+
+（或等效的/etc/fstab行）。
+debugfs根目录默认仅可由root用户访问。要更改对文件树的访问，请使用“ uid”，“ gid”
+和“ mode”挂载选项。请注意，debugfs API仅按照GPL协议导出到模块。
+
+使用debugfs的代码应包含<linux/debugfs.h>。然后，首先是创建至少一个目录来保存
+一组debugfs文件::
+
+    struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
+
+如果成功，此调用将在指定的父目录下创建一个名为name的目录。如果parent参数为空，
+则会在debugfs根目录中创建。创建目录成功时，返回值是一个指向dentry结构体的指针。
+该dentry结构体的指针可用于在目录中创建文件（以及最后将其清理干净）。ERR_PTR
+（-ERROR）返回值表明出错。如果返回ERR_PTR（-ENODEV），则表明内核是在没有debugfs
+支持的情况下构建的，并且下述函数都不会起作用。
+
+在debugfs目录中创建文件的最通用方法是::
+
+    struct dentry *debugfs_create_file(const char *name, umode_t mode,
+				       struct dentry *parent, void *data,
+				       const struct file_operations *fops);
+
+在这里，name是要创建的文件的名称，mode描述了访问文件应具有的权限，parent指向
+应该保存文件的目录，data将存储在产生的inode结构体的i_private字段中，而fops是
+一组文件操作函数，这些函数中实现文件操作的具体行为。至少，read（）和/或
+write（）操作应提供；其他可以根据需要包括在内。同样的，返回值将是指向创建文件
+的dentry指针，错误时返回ERR_PTR（-ERROR），系统不支持debugfs时返回值为ERR_PTR
+（-ENODEV）。创建一个初始大小的文件，可以使用以下函数代替::
+
+    struct dentry *debugfs_create_file_size(const char *name, umode_t mode,
+				struct dentry *parent, void *data,
+				const struct file_operations *fops,
+				loff_t file_size);
+
+file_size是初始文件大小。其他参数跟函数debugfs_create_file的相同。
+
+在许多情况下，没必要自己去创建一组文件操作;对于一些简单的情况,debugfs代码提供
+了许多帮助函数。包含单个整数值的文件可以使用以下任何一项创建::
+
+    void debugfs_create_u8(const char *name, umode_t mode,
+			   struct dentry *parent, u8 *value);
+    void debugfs_create_u16(const char *name, umode_t mode,
+			    struct dentry *parent, u16 *value);
+    struct dentry *debugfs_create_u32(const char *name, umode_t mode,
+				      struct dentry *parent, u32 *value);
+    void debugfs_create_u64(const char *name, umode_t mode,
+			    struct dentry *parent, u64 *value);
+
+这些文件支持读取和写入给定值。如果某个文件不支持写入，只需根据需要设置mode
+参数位。这些文件中的值以十进制表示；如果需要使用十六进制，可以使用以下函数
+替代::
+
+    void debugfs_create_x8(const char *name, umode_t mode,
+			   struct dentry *parent, u8 *value);
+    void debugfs_create_x16(const char *name, umode_t mode,
+			    struct dentry *parent, u16 *value);
+    void debugfs_create_x32(const char *name, umode_t mode,
+			    struct dentry *parent, u32 *value);
+    void debugfs_create_x64(const char *name, umode_t mode,
+			    struct dentry *parent, u64 *value);
+
+这些功能只有在开发人员知道导出值的大小的时候才有用。某些数据类型在不同的架构上
+有不同的宽度，这样会使情况变得有些复杂。在这种特殊情况下可以使用以下函数::
+
+    void debugfs_create_size_t(const char *name, umode_t mode,
+			       struct dentry *parent, size_t *value);
+
+不出所料，此函数将创建一个debugfs文件来表示类型为size_t的变量。
+
+同样地，也有导出无符号长整型变量的函数，分别以十进制和十六进制表示如下::
+
+    struct dentry *debugfs_create_ulong(const char *name, umode_t mode,
+					struct dentry *parent,
+					unsigned long *value);
+    void debugfs_create_xul(const char *name, umode_t mode,
+			    struct dentry *parent, unsigned long *value);
+
+布尔值可以通过以下方式放置在debugfs中::
+
+    struct dentry *debugfs_create_bool(const char *name, umode_t mode,
+				       struct dentry *parent, bool *value);
+
+
+读取结果文件将产生Y（对于非零值）或N，后跟换行符写入的时候，它只接受大写或小写
+值或1或0。任何其他输入将被忽略。
+
+同样，atomic_t类型的值也可以放置在debugfs中::
+
+    void debugfs_create_atomic_t(const char *name, umode_t mode,
+				 struct dentry *parent, atomic_t *value)
+
+读取此文件将获得atomic_t值，写入此文件将设置atomic_t值。
+
+另一个选择是通过以下结构体和函数导出一个任意二进制数据块::
+
+    struct debugfs_blob_wrapper {
+	void *data;
+	unsigned long size;
+    };
+
+    struct dentry *debugfs_create_blob(const char *name, umode_t mode,
+				       struct dentry *parent,
+				       struct debugfs_blob_wrapper *blob);
+
+读取此文件将返回由指针指向debugfs_blob_wrapper结构体的数据。一些驱动使用“blobs”
+作为一种返回几行（静态）格式化文本的简单方法。这个函数可用于导出二进制信息，但
+似乎在主线中没有任何代码这样做。请注意，使用debugfs_create_blob（）命令创建的
+所有文件是只读的。
+
+如果您要转储一个寄存器块（在开发过程中经常会这么做，但是这样的调试代码很少上传
+到主线中。Debugfs提供两个函数：一个用于创建仅寄存器文件，另一个把一个寄存器块
+插入一个顺序文件中::
+
+    struct debugfs_reg32 {
+	char *name;
+	unsigned long offset;
+    };
+
+    struct debugfs_regset32 {
+	struct debugfs_reg32 *regs;
+	int nregs;
+	void __iomem *base;
+    };
+
+    struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
+				     struct dentry *parent,
+				     struct debugfs_regset32 *regset);
+
+    void debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs,
+			 int nregs, void __iomem *base, char *prefix);
+
+“base”参数可能为0，但您可能需要使用__stringify构建reg32数组，实际上有许多寄存器
+名称（宏）是寄存器块在基址上的字节偏移量。
+
+如果要在debugfs中转储u32数组，可以使用以下函数创建文件::
+
+     void debugfs_create_u32_array(const char *name, umode_t mode,
+			struct dentry *parent,
+			u32 *array, u32 elements);
+
+“array”参数提供数据，而“elements”参数为数组中元素的数量。注意：数组创建后，数组
+大小无法更改。
+
+有一个函数来创建与设备相关的seq_file::
+
+   struct dentry *debugfs_create_devm_seqfile(struct device *dev,
+				const char *name,
+				struct dentry *parent,
+				int (*read_fn)(struct seq_file *s,
+					void *data));
+
+“dev”参数是与此debugfs文件相关的设备，并且“read_fn”是一个函数指针，这个函数在
+打印seq_file内容的时候被回调。
+
+还有一些其他的面向目录的函数::
+
+    struct dentry *debugfs_rename(struct dentry *old_dir,
+		                  struct dentry *old_dentry,
+		                  struct dentry *new_dir,
+				  const char *new_name);
+
+    struct dentry *debugfs_create_symlink(const char *name,
+                                          struct dentry *parent,
+                                          const char *target);
+
+调用debugfs_rename()将为现有的debugfs文件重命名，可能同时切换目录。 new_name
+函数调用之前不能存在；返回值为old_dentry，其中包含更新的信息。可以使用
+debugfs_create_symlink（）创建符号链接。
+
+所有debugfs用户必须考虑的一件事是：
+
+debugfs不会自动清除在其中创建的任何目录。如果一个模块在不显式删除debugfs目录的
+情况下卸载模块，结果将会遗留很多野指针，从而导致系统不稳定。因此，所有debugfs
+用户-至少是那些可以作为模块构建的用户-必须做模块卸载的时候准备删除在此创建的
+所有文件和目录。一份文件可以通过以下方式删除::
+
+    void debugfs_remove(struct dentry *dentry);
+
+dentry值可以为NULL或错误值，在这种情况下，不会有任何文件被删除。
+
+很久以前，内核开发者使用debugfs时需要记录他们创建的每个dentry指针，以便最后所有
+文件都可以被清理掉。但是，现在debugfs用户能调用以下函数递归清除之前创建的文件::
+
+    void debugfs_remove_recursive(struct dentry *dentry);
+
+如果将对应顶层目录的dentry传递给以上函数，则该目录下的整个层次结构将会被删除。
+
+注释：
+[1] http://lwn.net/Articles/309298/
diff --git a/Documentation/translations/zh_CN/filesystems/index.rst b/Documentation/translations/zh_CN/filesystems/index.rst
index 14f155edaf69..186501d13bc1 100644
--- a/Documentation/translations/zh_CN/filesystems/index.rst
+++ b/Documentation/translations/zh_CN/filesystems/index.rst
@@ -24,4 +24,5 @@ Linux Kernel中的文件系统
    :maxdepth: 2
 
    virtiofs
+   debugfs
 
-- 
cgit v1.2.3


From fd79cfd71221248ecd6a6d1aa97e557e2cd3434f Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Fri, 8 May 2020 23:29:49 +0200
Subject: docs: debugfs: Update struct debugfs_reg32 definition

Update the docs to match the implementation, both the definition of
struct debugfs_regset32 and the definition of debugfs_print_regs32().

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Link: https://lore.kernel.org/r/20200508212949.2867-1-rikard.falkeborn@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/debugfs.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/filesystems/debugfs.rst b/Documentation/filesystems/debugfs.rst
index db9ea0854040..0cfeca376169 100644
--- a/Documentation/filesystems/debugfs.rst
+++ b/Documentation/filesystems/debugfs.rst
@@ -166,16 +166,17 @@ file::
     };
 
     struct debugfs_regset32 {
-	struct debugfs_reg32 *regs;
+	const struct debugfs_reg32 *regs;
 	int nregs;
 	void __iomem *base;
+	struct device *dev;     /* Optional device for Runtime PM */
     };
 
     debugfs_create_regset32(const char *name, umode_t mode,
 			    struct dentry *parent,
 			    struct debugfs_regset32 *regset);
 
-    void debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs,
+    void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
 			 int nregs, void __iomem *base, char *prefix);
 
 The "base" argument may be 0, but you may want to build the reg32 array
-- 
cgit v1.2.3


From ea8fdf1a40e9941e7eca76c057abd8bc03acb40e Mon Sep 17 00:00:00 2001
From: Kevin Hao <haokexin@gmail.com>
Date: Fri, 8 May 2020 22:09:24 +0800
Subject: docs: vm: page_frags.rst: Fix the reference of stale function

The __netdev_alloc_frag() has been dropped by commit 7ba7aeabbaba ("net:
Don't disable interrupts in napi_alloc_frag()"). So replace it with
netdev_alloc_frag().

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Link: https://lore.kernel.org/r/20200508140924.18220-1-haokexin@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/vm/page_frags.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/vm/page_frags.rst b/Documentation/vm/page_frags.rst
index 637cc49d1b2f..7d6f9385d129 100644
--- a/Documentation/vm/page_frags.rst
+++ b/Documentation/vm/page_frags.rst
@@ -26,7 +26,7 @@ to be disabled when executing the fragment allocation.
 
 The network stack uses two separate caches per CPU to handle fragment
 allocation.  The netdev_alloc_cache is used by callers making use of the
-__netdev_alloc_frag and __netdev_alloc_skb calls.  The napi_alloc_cache is
+netdev_alloc_frag and __netdev_alloc_skb calls.  The napi_alloc_cache is
 used by callers of the __napi_alloc_frag and __napi_alloc_skb calls.  The
 main difference between these two calls is the context in which they may be
 called.  The "netdev" prefixed functions are usable in any context as these
-- 
cgit v1.2.3


From 77691ee92d4abdc926c60b6fa1a5cbbe935e1244 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 7 May 2020 14:02:17 +0200
Subject: Documentation: update numastat explanation

During recent patch discussion [1] it became apparent that the "other_node"
definition in the numastat documentation has always been different from actual
implementation. It was also noted that the stats can be innacurate on systems
with memoryless nodes.

This patch corrects the other_node definition (with minor tweaks to two more
definitions), adds a note about memoryless nodes and also two introductory
paragraphs to the numastat documentation.

[1] https://lore.kernel.org/linux-mm/20200504070304.127361-1-sandipan@linux.ibm.com/T/#u

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Sandipan Das <sandipan@linux.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Link: https://lore.kernel.org/r/20200507120217.12313-1-vbabka@suse.cz
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/numastat.rst | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/Documentation/admin-guide/numastat.rst b/Documentation/admin-guide/numastat.rst
index aaf1667489f8..08ec2c2bdce3 100644
--- a/Documentation/admin-guide/numastat.rst
+++ b/Documentation/admin-guide/numastat.rst
@@ -6,6 +6,21 @@ Numa policy hit/miss statistics
 
 All units are pages. Hugepages have separate counters.
 
+The numa_hit, numa_miss and numa_foreign counters reflect how well processes
+are able to allocate memory from nodes they prefer. If they succeed, numa_hit
+is incremented on the preferred node, otherwise numa_foreign is incremented on
+the preferred node and numa_miss on the node where allocation succeeded.
+
+Usually preferred node is the one local to the CPU where the process executes,
+but restrictions such as mempolicies can change that, so there are also two
+counters based on CPU local node. local_node is similar to numa_hit and is
+incremented on allocation from a node by CPU on the same node. other_node is
+similar to numa_miss and is incremented on the node where allocation succeeds
+from a CPU from a different node. Note there is no counter analogical to
+numa_foreign.
+
+In more detail:
+
 =============== ============================================================
 numa_hit	A process wanted to allocate memory from this node,
 		and succeeded.
@@ -14,11 +29,13 @@ numa_miss	A process wanted to allocate memory from another node,
 		but ended up with memory from this node.
 
 numa_foreign	A process wanted to allocate on this node,
-		but ended up with memory from another one.
+		but ended up with memory from another node.
 
-local_node	A process ran on this node and got memory from it.
+local_node	A process ran on this node's CPU,
+		and got memory from this node.
 
-other_node	A process ran on this node and got memory from another node.
+other_node	A process ran on a different node's CPU
+		and got memory from this node.
 
 interleave_hit 	Interleaving wanted to allocate from this node
 		and succeeded.
@@ -28,3 +45,11 @@ For easier reading you can use the numastat utility from the numactl package
 (http://oss.sgi.com/projects/libnuma/). Note that it only works
 well right now on machines with a small number of CPUs.
 
+Note that on systems with memoryless nodes (where a node has CPUs but no
+memory) the numa_hit, numa_miss and numa_foreign statistics can be skewed
+heavily. In the current kernel implementation, if a process prefers a
+memoryless node (i.e.  because it is running on one of its local CPU), the
+implementation actually treats one of the nearest nodes with memory as the
+preferred node. As a result, such allocation will not increase the numa_foreign
+counter on the memoryless node, and will skew the numa_hit, numa_miss and
+numa_foreign statistics of the nearest node.
-- 
cgit v1.2.3


From b17b24fc3c10048502196fff56dcaf13938d1e75 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Wed, 6 May 2020 12:22:17 -0400
Subject: doc: Fix some errors in ras.rst

Make some miscellaneous fixes to the first paragraph of "ECC memory":
 - Change the incorrect "74 bits" to "72 bits".
 - Change "mentioned on" to "mentioned in".
 - Remove the extra "extra".
 - Rephrase some sentences as suggested by Matthew Wilcox.

Signed-off-by: Waiman Long <longman@redhat.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Link: https://lore.kernel.org/r/20200506162217.16633-1-longman@redhat.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/ras.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Documentation/admin-guide/ras.rst b/Documentation/admin-guide/ras.rst
index 6cbaab975ee5..7b481b2a368e 100644
--- a/Documentation/admin-guide/ras.rst
+++ b/Documentation/admin-guide/ras.rst
@@ -156,11 +156,11 @@ the labels provided by the BIOS won't match the real ones.
 ECC memory
 ----------
 
-As mentioned on the previous section, ECC memory has extra bits to be
-used for error correction. So, on 64 bit systems, a memory module
-has 64 bits of *data width*, and 74 bits of *total width*. So, there are
-8 bits extra bits to be used for the error detection and correction
-mechanisms. Those extra bits are called *syndrome*\ [#f1]_\ [#f2]_.
+As mentioned in the previous section, ECC memory has extra bits to be
+used for error correction. In the above example, a memory module has
+64 bits of *data width*, and 72 bits of *total width*.  The extra 8
+bits which are used for the error detection and correction mechanisms
+are referred to as the *syndrome*\ [#f1]_\ [#f2]_.
 
 So, when the cpu requests the memory controller to write a word with
 *data width*, the memory controller calculates the *syndrome* in real time,
-- 
cgit v1.2.3


From 728c1471b54499e618fb8586852ac5e15a2c95ee Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 17:37:45 +0200
Subject: docs: move DMA kAPI to Documentation/core-api

Move those files to the core-api, where they belong, renaming
them to ReST and adding to the core API index file.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/a1517185418cb9d987f566ef85a5dd5c7c99f34e.1588345503.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/DMA-API-HOWTO.txt           | 929 ------------------------------
 Documentation/DMA-API.txt                 | 745 ------------------------
 Documentation/DMA-ISA-LPC.txt             | 152 -----
 Documentation/DMA-attributes.txt          | 140 -----
 Documentation/core-api/dma-api-howto.rst  | 929 ++++++++++++++++++++++++++++++
 Documentation/core-api/dma-api.rst        | 745 ++++++++++++++++++++++++
 Documentation/core-api/dma-attributes.rst | 140 +++++
 Documentation/core-api/dma-isa-lpc.rst    | 152 +++++
 Documentation/core-api/index.rst          |   4 +
 9 files changed, 1970 insertions(+), 1966 deletions(-)
 delete mode 100644 Documentation/DMA-API-HOWTO.txt
 delete mode 100644 Documentation/DMA-API.txt
 delete mode 100644 Documentation/DMA-ISA-LPC.txt
 delete mode 100644 Documentation/DMA-attributes.txt
 create mode 100644 Documentation/core-api/dma-api-howto.rst
 create mode 100644 Documentation/core-api/dma-api.rst
 create mode 100644 Documentation/core-api/dma-attributes.rst
 create mode 100644 Documentation/core-api/dma-isa-lpc.rst

diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt
deleted file mode 100644
index 358d495456d1..000000000000
--- a/Documentation/DMA-API-HOWTO.txt
+++ /dev/null
@@ -1,929 +0,0 @@
-=========================
-Dynamic DMA mapping Guide
-=========================
-
-:Author: David S. Miller <davem@redhat.com>
-:Author: Richard Henderson <rth@cygnus.com>
-:Author: Jakub Jelinek <jakub@redhat.com>
-
-This is a guide to device driver writers on how to use the DMA API
-with example pseudo-code.  For a concise description of the API, see
-DMA-API.txt.
-
-CPU and DMA addresses
-=====================
-
-There are several kinds of addresses involved in the DMA API, and it's
-important to understand the differences.
-
-The kernel normally uses virtual addresses.  Any address returned by
-kmalloc(), vmalloc(), and similar interfaces is a virtual address and can
-be stored in a ``void *``.
-
-The virtual memory system (TLB, page tables, etc.) translates virtual
-addresses to CPU physical addresses, which are stored as "phys_addr_t" or
-"resource_size_t".  The kernel manages device resources like registers as
-physical addresses.  These are the addresses in /proc/iomem.  The physical
-address is not directly useful to a driver; it must use ioremap() to map
-the space and produce a virtual address.
-
-I/O devices use a third kind of address: a "bus address".  If a device has
-registers at an MMIO address, or if it performs DMA to read or write system
-memory, the addresses used by the device are bus addresses.  In some
-systems, bus addresses are identical to CPU physical addresses, but in
-general they are not.  IOMMUs and host bridges can produce arbitrary
-mappings between physical and bus addresses.
-
-From a device's point of view, DMA uses the bus address space, but it may
-be restricted to a subset of that space.  For example, even if a system
-supports 64-bit addresses for main memory and PCI BARs, it may use an IOMMU
-so devices only need to use 32-bit DMA addresses.
-
-Here's a picture and some examples::
-
-               CPU                  CPU                  Bus
-             Virtual              Physical             Address
-             Address              Address               Space
-              Space                Space
-
-            +-------+             +------+             +------+
-            |       |             |MMIO  |   Offset    |      |
-            |       |  Virtual    |Space |   applied   |      |
-          C +-------+ --------> B +------+ ----------> +------+ A
-            |       |  mapping    |      |   by host   |      |
-  +-----+   |       |             |      |   bridge    |      |   +--------+
-  |     |   |       |             +------+             |      |   |        |
-  | CPU |   |       |             | RAM  |             |      |   | Device |
-  |     |   |       |             |      |             |      |   |        |
-  +-----+   +-------+             +------+             +------+   +--------+
-            |       |  Virtual    |Buffer|   Mapping   |      |
-          X +-------+ --------> Y +------+ <---------- +------+ Z
-            |       |  mapping    | RAM  |   by IOMMU
-            |       |             |      |
-            |       |             |      |
-            +-------+             +------+
-
-During the enumeration process, the kernel learns about I/O devices and
-their MMIO space and the host bridges that connect them to the system.  For
-example, if a PCI device has a BAR, the kernel reads the bus address (A)
-from the BAR and converts it to a CPU physical address (B).  The address B
-is stored in a struct resource and usually exposed via /proc/iomem.  When a
-driver claims a device, it typically uses ioremap() to map physical address
-B at a virtual address (C).  It can then use, e.g., ioread32(C), to access
-the device registers at bus address A.
-
-If the device supports DMA, the driver sets up a buffer using kmalloc() or
-a similar interface, which returns a virtual address (X).  The virtual
-memory system maps X to a physical address (Y) in system RAM.  The driver
-can use virtual address X to access the buffer, but the device itself
-cannot because DMA doesn't go through the CPU virtual memory system.
-
-In some simple systems, the device can do DMA directly to physical address
-Y.  But in many others, there is IOMMU hardware that translates DMA
-addresses to physical addresses, e.g., it translates Z to Y.  This is part
-of the reason for the DMA API: the driver can give a virtual address X to
-an interface like dma_map_single(), which sets up any required IOMMU
-mapping and returns the DMA address Z.  The driver then tells the device to
-do DMA to Z, and the IOMMU maps it to the buffer at address Y in system
-RAM.
-
-So that Linux can use the dynamic DMA mapping, it needs some help from the
-drivers, namely it has to take into account that DMA addresses should be
-mapped only for the time they are actually used and unmapped after the DMA
-transfer.
-
-The following API will work of course even on platforms where no such
-hardware exists.
-
-Note that the DMA API works with any bus independent of the underlying
-microprocessor architecture. You should use the DMA API rather than the
-bus-specific DMA API, i.e., use the dma_map_*() interfaces rather than the
-pci_map_*() interfaces.
-
-First of all, you should make sure::
-
-	#include <linux/dma-mapping.h>
-
-is in your driver, which provides the definition of dma_addr_t.  This type
-can hold any valid DMA address for the platform and should be used
-everywhere you hold a DMA address returned from the DMA mapping functions.
-
-What memory is DMA'able?
-========================
-
-The first piece of information you must know is what kernel memory can
-be used with the DMA mapping facilities.  There has been an unwritten
-set of rules regarding this, and this text is an attempt to finally
-write them down.
-
-If you acquired your memory via the page allocator
-(i.e. __get_free_page*()) or the generic memory allocators
-(i.e. kmalloc() or kmem_cache_alloc()) then you may DMA to/from
-that memory using the addresses returned from those routines.
-
-This means specifically that you may _not_ use the memory/addresses
-returned from vmalloc() for DMA.  It is possible to DMA to the
-_underlying_ memory mapped into a vmalloc() area, but this requires
-walking page tables to get the physical addresses, and then
-translating each of those pages back to a kernel address using
-something like __va().  [ EDIT: Update this when we integrate
-Gerd Knorr's generic code which does this. ]
-
-This rule also means that you may use neither kernel image addresses
-(items in data/text/bss segments), nor module image addresses, nor
-stack addresses for DMA.  These could all be mapped somewhere entirely
-different than the rest of physical memory.  Even if those classes of
-memory could physically work with DMA, you'd need to ensure the I/O
-buffers were cacheline-aligned.  Without that, you'd see cacheline
-sharing problems (data corruption) on CPUs with DMA-incoherent caches.
-(The CPU could write to one word, DMA would write to a different one
-in the same cache line, and one of them could be overwritten.)
-
-Also, this means that you cannot take the return of a kmap()
-call and DMA to/from that.  This is similar to vmalloc().
-
-What about block I/O and networking buffers?  The block I/O and
-networking subsystems make sure that the buffers they use are valid
-for you to DMA from/to.
-
-DMA addressing capabilities
-===========================
-
-By default, the kernel assumes that your device can address 32-bits of DMA
-addressing.  For a 64-bit capable device, this needs to be increased, and for
-a device with limitations, it needs to be decreased.
-
-Special note about PCI: PCI-X specification requires PCI-X devices to support
-64-bit addressing (DAC) for all transactions.  And at least one platform (SGI
-SN2) requires 64-bit consistent allocations to operate correctly when the IO
-bus is in PCI-X mode.
-
-For correct operation, you must set the DMA mask to inform the kernel about
-your devices DMA addressing capabilities.
-
-This is performed via a call to dma_set_mask_and_coherent()::
-
-	int dma_set_mask_and_coherent(struct device *dev, u64 mask);
-
-which will set the mask for both streaming and coherent APIs together.  If you
-have some special requirements, then the following two separate calls can be
-used instead:
-
-	The setup for streaming mappings is performed via a call to
-	dma_set_mask()::
-
-		int dma_set_mask(struct device *dev, u64 mask);
-
-	The setup for consistent allocations is performed via a call
-	to dma_set_coherent_mask()::
-
-		int dma_set_coherent_mask(struct device *dev, u64 mask);
-
-Here, dev is a pointer to the device struct of your device, and mask is a bit
-mask describing which bits of an address your device supports.  Often the
-device struct of your device is embedded in the bus-specific device struct of
-your device.  For example, &pdev->dev is a pointer to the device struct of a
-PCI device (pdev is a pointer to the PCI device struct of your device).
-
-These calls usually return zero to indicated your device can perform DMA
-properly on the machine given the address mask you provided, but they might
-return an error if the mask is too small to be supportable on the given
-system.  If it returns non-zero, your device cannot perform DMA properly on
-this platform, and attempting to do so will result in undefined behavior.
-You must not use DMA on this device unless the dma_set_mask family of
-functions has returned success.
-
-This means that in the failure case, you have two options:
-
-1) Use some non-DMA mode for data transfer, if possible.
-2) Ignore this device and do not initialize it.
-
-It is recommended that your driver print a kernel KERN_WARNING message when
-setting the DMA mask fails.  In this manner, if a user of your driver reports
-that performance is bad or that the device is not even detected, you can ask
-them for the kernel messages to find out exactly why.
-
-The standard 64-bit addressing device would do something like this::
-
-	if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) {
-		dev_warn(dev, "mydev: No suitable DMA available\n");
-		goto ignore_this_device;
-	}
-
-If the device only supports 32-bit addressing for descriptors in the
-coherent allocations, but supports full 64-bits for streaming mappings
-it would look like this::
-
-	if (dma_set_mask(dev, DMA_BIT_MASK(64))) {
-		dev_warn(dev, "mydev: No suitable DMA available\n");
-		goto ignore_this_device;
-	}
-
-The coherent mask will always be able to set the same or a smaller mask as
-the streaming mask. However for the rare case that a device driver only
-uses consistent allocations, one would have to check the return value from
-dma_set_coherent_mask().
-
-Finally, if your device can only drive the low 24-bits of
-address you might do something like::
-
-	if (dma_set_mask(dev, DMA_BIT_MASK(24))) {
-		dev_warn(dev, "mydev: 24-bit DMA addressing not available\n");
-		goto ignore_this_device;
-	}
-
-When dma_set_mask() or dma_set_mask_and_coherent() is successful, and
-returns zero, the kernel saves away this mask you have provided.  The
-kernel will use this information later when you make DMA mappings.
-
-There is a case which we are aware of at this time, which is worth
-mentioning in this documentation.  If your device supports multiple
-functions (for example a sound card provides playback and record
-functions) and the various different functions have _different_
-DMA addressing limitations, you may wish to probe each mask and
-only provide the functionality which the machine can handle.  It
-is important that the last call to dma_set_mask() be for the
-most specific mask.
-
-Here is pseudo-code showing how this might be done::
-
-	#define PLAYBACK_ADDRESS_BITS	DMA_BIT_MASK(32)
-	#define RECORD_ADDRESS_BITS	DMA_BIT_MASK(24)
-
-	struct my_sound_card *card;
-	struct device *dev;
-
-	...
-	if (!dma_set_mask(dev, PLAYBACK_ADDRESS_BITS)) {
-		card->playback_enabled = 1;
-	} else {
-		card->playback_enabled = 0;
-		dev_warn(dev, "%s: Playback disabled due to DMA limitations\n",
-		       card->name);
-	}
-	if (!dma_set_mask(dev, RECORD_ADDRESS_BITS)) {
-		card->record_enabled = 1;
-	} else {
-		card->record_enabled = 0;
-		dev_warn(dev, "%s: Record disabled due to DMA limitations\n",
-		       card->name);
-	}
-
-A sound card was used as an example here because this genre of PCI
-devices seems to be littered with ISA chips given a PCI front end,
-and thus retaining the 16MB DMA addressing limitations of ISA.
-
-Types of DMA mappings
-=====================
-
-There are two types of DMA mappings:
-
-- Consistent DMA mappings which are usually mapped at driver
-  initialization, unmapped at the end and for which the hardware should
-  guarantee that the device and the CPU can access the data
-  in parallel and will see updates made by each other without any
-  explicit software flushing.
-
-  Think of "consistent" as "synchronous" or "coherent".
-
-  The current default is to return consistent memory in the low 32
-  bits of the DMA space.  However, for future compatibility you should
-  set the consistent mask even if this default is fine for your
-  driver.
-
-  Good examples of what to use consistent mappings for are:
-
-	- Network card DMA ring descriptors.
-	- SCSI adapter mailbox command data structures.
-	- Device firmware microcode executed out of
-	  main memory.
-
-  The invariant these examples all require is that any CPU store
-  to memory is immediately visible to the device, and vice
-  versa.  Consistent mappings guarantee this.
-
-  .. important::
-
-	     Consistent DMA memory does not preclude the usage of
-	     proper memory barriers.  The CPU may reorder stores to
-	     consistent memory just as it may normal memory.  Example:
-	     if it is important for the device to see the first word
-	     of a descriptor updated before the second, you must do
-	     something like::
-
-		desc->word0 = address;
-		wmb();
-		desc->word1 = DESC_VALID;
-
-             in order to get correct behavior on all platforms.
-
-	     Also, on some platforms your driver may need to flush CPU write
-	     buffers in much the same way as it needs to flush write buffers
-	     found in PCI bridges (such as by reading a register's value
-	     after writing it).
-
-- Streaming DMA mappings which are usually mapped for one DMA
-  transfer, unmapped right after it (unless you use dma_sync_* below)
-  and for which hardware can optimize for sequential accesses.
-
-  Think of "streaming" as "asynchronous" or "outside the coherency
-  domain".
-
-  Good examples of what to use streaming mappings for are:
-
-	- Networking buffers transmitted/received by a device.
-	- Filesystem buffers written/read by a SCSI device.
-
-  The interfaces for using this type of mapping were designed in
-  such a way that an implementation can make whatever performance
-  optimizations the hardware allows.  To this end, when using
-  such mappings you must be explicit about what you want to happen.
-
-Neither type of DMA mapping has alignment restrictions that come from
-the underlying bus, although some devices may have such restrictions.
-Also, systems with caches that aren't DMA-coherent will work better
-when the underlying buffers don't share cache lines with other data.
-
-
-Using Consistent DMA mappings
-=============================
-
-To allocate and map large (PAGE_SIZE or so) consistent DMA regions,
-you should do::
-
-	dma_addr_t dma_handle;
-
-	cpu_addr = dma_alloc_coherent(dev, size, &dma_handle, gfp);
-
-where device is a ``struct device *``. This may be called in interrupt
-context with the GFP_ATOMIC flag.
-
-Size is the length of the region you want to allocate, in bytes.
-
-This routine will allocate RAM for that region, so it acts similarly to
-__get_free_pages() (but takes size instead of a page order).  If your
-driver needs regions sized smaller than a page, you may prefer using
-the dma_pool interface, described below.
-
-The consistent DMA mapping interfaces, will by default return a DMA address
-which is 32-bit addressable.  Even if the device indicates (via the DMA mask)
-that it may address the upper 32-bits, consistent allocation will only
-return > 32-bit addresses for DMA if the consistent DMA mask has been
-explicitly changed via dma_set_coherent_mask().  This is true of the
-dma_pool interface as well.
-
-dma_alloc_coherent() returns two values: the virtual address which you
-can use to access it from the CPU and dma_handle which you pass to the
-card.
-
-The CPU virtual address and the DMA address are both
-guaranteed to be aligned to the smallest PAGE_SIZE order which
-is greater than or equal to the requested size.  This invariant
-exists (for example) to guarantee that if you allocate a chunk
-which is smaller than or equal to 64 kilobytes, the extent of the
-buffer you receive will not cross a 64K boundary.
-
-To unmap and free such a DMA region, you call::
-
-	dma_free_coherent(dev, size, cpu_addr, dma_handle);
-
-where dev, size are the same as in the above call and cpu_addr and
-dma_handle are the values dma_alloc_coherent() returned to you.
-This function may not be called in interrupt context.
-
-If your driver needs lots of smaller memory regions, you can write
-custom code to subdivide pages returned by dma_alloc_coherent(),
-or you can use the dma_pool API to do that.  A dma_pool is like
-a kmem_cache, but it uses dma_alloc_coherent(), not __get_free_pages().
-Also, it understands common hardware constraints for alignment,
-like queue heads needing to be aligned on N byte boundaries.
-
-Create a dma_pool like this::
-
-	struct dma_pool *pool;
-
-	pool = dma_pool_create(name, dev, size, align, boundary);
-
-The "name" is for diagnostics (like a kmem_cache name); dev and size
-are as above.  The device's hardware alignment requirement for this
-type of data is "align" (which is expressed in bytes, and must be a
-power of two).  If your device has no boundary crossing restrictions,
-pass 0 for boundary; passing 4096 says memory allocated from this pool
-must not cross 4KByte boundaries (but at that time it may be better to
-use dma_alloc_coherent() directly instead).
-
-Allocate memory from a DMA pool like this::
-
-	cpu_addr = dma_pool_alloc(pool, flags, &dma_handle);
-
-flags are GFP_KERNEL if blocking is permitted (not in_interrupt nor
-holding SMP locks), GFP_ATOMIC otherwise.  Like dma_alloc_coherent(),
-this returns two values, cpu_addr and dma_handle.
-
-Free memory that was allocated from a dma_pool like this::
-
-	dma_pool_free(pool, cpu_addr, dma_handle);
-
-where pool is what you passed to dma_pool_alloc(), and cpu_addr and
-dma_handle are the values dma_pool_alloc() returned. This function
-may be called in interrupt context.
-
-Destroy a dma_pool by calling::
-
-	dma_pool_destroy(pool);
-
-Make sure you've called dma_pool_free() for all memory allocated
-from a pool before you destroy the pool. This function may not
-be called in interrupt context.
-
-DMA Direction
-=============
-
-The interfaces described in subsequent portions of this document
-take a DMA direction argument, which is an integer and takes on
-one of the following values::
-
- DMA_BIDIRECTIONAL
- DMA_TO_DEVICE
- DMA_FROM_DEVICE
- DMA_NONE
-
-You should provide the exact DMA direction if you know it.
-
-DMA_TO_DEVICE means "from main memory to the device"
-DMA_FROM_DEVICE means "from the device to main memory"
-It is the direction in which the data moves during the DMA
-transfer.
-
-You are _strongly_ encouraged to specify this as precisely
-as you possibly can.
-
-If you absolutely cannot know the direction of the DMA transfer,
-specify DMA_BIDIRECTIONAL.  It means that the DMA can go in
-either direction.  The platform guarantees that you may legally
-specify this, and that it will work, but this may be at the
-cost of performance for example.
-
-The value DMA_NONE is to be used for debugging.  One can
-hold this in a data structure before you come to know the
-precise direction, and this will help catch cases where your
-direction tracking logic has failed to set things up properly.
-
-Another advantage of specifying this value precisely (outside of
-potential platform-specific optimizations of such) is for debugging.
-Some platforms actually have a write permission boolean which DMA
-mappings can be marked with, much like page protections in the user
-program address space.  Such platforms can and do report errors in the
-kernel logs when the DMA controller hardware detects violation of the
-permission setting.
-
-Only streaming mappings specify a direction, consistent mappings
-implicitly have a direction attribute setting of
-DMA_BIDIRECTIONAL.
-
-The SCSI subsystem tells you the direction to use in the
-'sc_data_direction' member of the SCSI command your driver is
-working on.
-
-For Networking drivers, it's a rather simple affair.  For transmit
-packets, map/unmap them with the DMA_TO_DEVICE direction
-specifier.  For receive packets, just the opposite, map/unmap them
-with the DMA_FROM_DEVICE direction specifier.
-
-Using Streaming DMA mappings
-============================
-
-The streaming DMA mapping routines can be called from interrupt
-context.  There are two versions of each map/unmap, one which will
-map/unmap a single memory region, and one which will map/unmap a
-scatterlist.
-
-To map a single region, you do::
-
-	struct device *dev = &my_dev->dev;
-	dma_addr_t dma_handle;
-	void *addr = buffer->ptr;
-	size_t size = buffer->len;
-
-	dma_handle = dma_map_single(dev, addr, size, direction);
-	if (dma_mapping_error(dev, dma_handle)) {
-		/*
-		 * reduce current DMA mapping usage,
-		 * delay and try again later or
-		 * reset driver.
-		 */
-		goto map_error_handling;
-	}
-
-and to unmap it::
-
-	dma_unmap_single(dev, dma_handle, size, direction);
-
-You should call dma_mapping_error() as dma_map_single() could fail and return
-error.  Doing so will ensure that the mapping code will work correctly on all
-DMA implementations without any dependency on the specifics of the underlying
-implementation. Using the returned address without checking for errors could
-result in failures ranging from panics to silent data corruption.  The same
-applies to dma_map_page() as well.
-
-You should call dma_unmap_single() when the DMA activity is finished, e.g.,
-from the interrupt which told you that the DMA transfer is done.
-
-Using CPU pointers like this for single mappings has a disadvantage:
-you cannot reference HIGHMEM memory in this way.  Thus, there is a
-map/unmap interface pair akin to dma_{map,unmap}_single().  These
-interfaces deal with page/offset pairs instead of CPU pointers.
-Specifically::
-
-	struct device *dev = &my_dev->dev;
-	dma_addr_t dma_handle;
-	struct page *page = buffer->page;
-	unsigned long offset = buffer->offset;
-	size_t size = buffer->len;
-
-	dma_handle = dma_map_page(dev, page, offset, size, direction);
-	if (dma_mapping_error(dev, dma_handle)) {
-		/*
-		 * reduce current DMA mapping usage,
-		 * delay and try again later or
-		 * reset driver.
-		 */
-		goto map_error_handling;
-	}
-
-	...
-
-	dma_unmap_page(dev, dma_handle, size, direction);
-
-Here, "offset" means byte offset within the given page.
-
-You should call dma_mapping_error() as dma_map_page() could fail and return
-error as outlined under the dma_map_single() discussion.
-
-You should call dma_unmap_page() when the DMA activity is finished, e.g.,
-from the interrupt which told you that the DMA transfer is done.
-
-With scatterlists, you map a region gathered from several regions by::
-
-	int i, count = dma_map_sg(dev, sglist, nents, direction);
-	struct scatterlist *sg;
-
-	for_each_sg(sglist, sg, count, i) {
-		hw_address[i] = sg_dma_address(sg);
-		hw_len[i] = sg_dma_len(sg);
-	}
-
-where nents is the number of entries in the sglist.
-
-The implementation is free to merge several consecutive sglist entries
-into one (e.g. if DMA mapping is done with PAGE_SIZE granularity, any
-consecutive sglist entries can be merged into one provided the first one
-ends and the second one starts on a page boundary - in fact this is a huge
-advantage for cards which either cannot do scatter-gather or have very
-limited number of scatter-gather entries) and returns the actual number
-of sg entries it mapped them to. On failure 0 is returned.
-
-Then you should loop count times (note: this can be less than nents times)
-and use sg_dma_address() and sg_dma_len() macros where you previously
-accessed sg->address and sg->length as shown above.
-
-To unmap a scatterlist, just call::
-
-	dma_unmap_sg(dev, sglist, nents, direction);
-
-Again, make sure DMA activity has already finished.
-
-.. note::
-
-	The 'nents' argument to the dma_unmap_sg call must be
-	the _same_ one you passed into the dma_map_sg call,
-	it should _NOT_ be the 'count' value _returned_ from the
-	dma_map_sg call.
-
-Every dma_map_{single,sg}() call should have its dma_unmap_{single,sg}()
-counterpart, because the DMA address space is a shared resource and
-you could render the machine unusable by consuming all DMA addresses.
-
-If you need to use the same streaming DMA region multiple times and touch
-the data in between the DMA transfers, the buffer needs to be synced
-properly in order for the CPU and device to see the most up-to-date and
-correct copy of the DMA buffer.
-
-So, firstly, just map it with dma_map_{single,sg}(), and after each DMA
-transfer call either::
-
-	dma_sync_single_for_cpu(dev, dma_handle, size, direction);
-
-or::
-
-	dma_sync_sg_for_cpu(dev, sglist, nents, direction);
-
-as appropriate.
-
-Then, if you wish to let the device get at the DMA area again,
-finish accessing the data with the CPU, and then before actually
-giving the buffer to the hardware call either::
-
-	dma_sync_single_for_device(dev, dma_handle, size, direction);
-
-or::
-
-	dma_sync_sg_for_device(dev, sglist, nents, direction);
-
-as appropriate.
-
-.. note::
-
-	      The 'nents' argument to dma_sync_sg_for_cpu() and
-	      dma_sync_sg_for_device() must be the same passed to
-	      dma_map_sg(). It is _NOT_ the count returned by
-	      dma_map_sg().
-
-After the last DMA transfer call one of the DMA unmap routines
-dma_unmap_{single,sg}(). If you don't touch the data from the first
-dma_map_*() call till dma_unmap_*(), then you don't have to call the
-dma_sync_*() routines at all.
-
-Here is pseudo code which shows a situation in which you would need
-to use the dma_sync_*() interfaces::
-
-	my_card_setup_receive_buffer(struct my_card *cp, char *buffer, int len)
-	{
-		dma_addr_t mapping;
-
-		mapping = dma_map_single(cp->dev, buffer, len, DMA_FROM_DEVICE);
-		if (dma_mapping_error(cp->dev, mapping)) {
-			/*
-			 * reduce current DMA mapping usage,
-			 * delay and try again later or
-			 * reset driver.
-			 */
-			goto map_error_handling;
-		}
-
-		cp->rx_buf = buffer;
-		cp->rx_len = len;
-		cp->rx_dma = mapping;
-
-		give_rx_buf_to_card(cp);
-	}
-
-	...
-
-	my_card_interrupt_handler(int irq, void *devid, struct pt_regs *regs)
-	{
-		struct my_card *cp = devid;
-
-		...
-		if (read_card_status(cp) == RX_BUF_TRANSFERRED) {
-			struct my_card_header *hp;
-
-			/* Examine the header to see if we wish
-			 * to accept the data.  But synchronize
-			 * the DMA transfer with the CPU first
-			 * so that we see updated contents.
-			 */
-			dma_sync_single_for_cpu(&cp->dev, cp->rx_dma,
-						cp->rx_len,
-						DMA_FROM_DEVICE);
-
-			/* Now it is safe to examine the buffer. */
-			hp = (struct my_card_header *) cp->rx_buf;
-			if (header_is_ok(hp)) {
-				dma_unmap_single(&cp->dev, cp->rx_dma, cp->rx_len,
-						 DMA_FROM_DEVICE);
-				pass_to_upper_layers(cp->rx_buf);
-				make_and_setup_new_rx_buf(cp);
-			} else {
-				/* CPU should not write to
-				 * DMA_FROM_DEVICE-mapped area,
-				 * so dma_sync_single_for_device() is
-				 * not needed here. It would be required
-				 * for DMA_BIDIRECTIONAL mapping if
-				 * the memory was modified.
-				 */
-				give_rx_buf_to_card(cp);
-			}
-		}
-	}
-
-Drivers converted fully to this interface should not use virt_to_bus() any
-longer, nor should they use bus_to_virt(). Some drivers have to be changed a
-little bit, because there is no longer an equivalent to bus_to_virt() in the
-dynamic DMA mapping scheme - you have to always store the DMA addresses
-returned by the dma_alloc_coherent(), dma_pool_alloc(), and dma_map_single()
-calls (dma_map_sg() stores them in the scatterlist itself if the platform
-supports dynamic DMA mapping in hardware) in your driver structures and/or
-in the card registers.
-
-All drivers should be using these interfaces with no exceptions.  It
-is planned to completely remove virt_to_bus() and bus_to_virt() as
-they are entirely deprecated.  Some ports already do not provide these
-as it is impossible to correctly support them.
-
-Handling Errors
-===============
-
-DMA address space is limited on some architectures and an allocation
-failure can be determined by:
-
-- checking if dma_alloc_coherent() returns NULL or dma_map_sg returns 0
-
-- checking the dma_addr_t returned from dma_map_single() and dma_map_page()
-  by using dma_mapping_error()::
-
-	dma_addr_t dma_handle;
-
-	dma_handle = dma_map_single(dev, addr, size, direction);
-	if (dma_mapping_error(dev, dma_handle)) {
-		/*
-		 * reduce current DMA mapping usage,
-		 * delay and try again later or
-		 * reset driver.
-		 */
-		goto map_error_handling;
-	}
-
-- unmap pages that are already mapped, when mapping error occurs in the middle
-  of a multiple page mapping attempt. These example are applicable to
-  dma_map_page() as well.
-
-Example 1::
-
-	dma_addr_t dma_handle1;
-	dma_addr_t dma_handle2;
-
-	dma_handle1 = dma_map_single(dev, addr, size, direction);
-	if (dma_mapping_error(dev, dma_handle1)) {
-		/*
-		 * reduce current DMA mapping usage,
-		 * delay and try again later or
-		 * reset driver.
-		 */
-		goto map_error_handling1;
-	}
-	dma_handle2 = dma_map_single(dev, addr, size, direction);
-	if (dma_mapping_error(dev, dma_handle2)) {
-		/*
-		 * reduce current DMA mapping usage,
-		 * delay and try again later or
-		 * reset driver.
-		 */
-		goto map_error_handling2;
-	}
-
-	...
-
-	map_error_handling2:
-		dma_unmap_single(dma_handle1);
-	map_error_handling1:
-
-Example 2::
-
-	/*
-	 * if buffers are allocated in a loop, unmap all mapped buffers when
-	 * mapping error is detected in the middle
-	 */
-
-	dma_addr_t dma_addr;
-	dma_addr_t array[DMA_BUFFERS];
-	int save_index = 0;
-
-	for (i = 0; i < DMA_BUFFERS; i++) {
-
-		...
-
-		dma_addr = dma_map_single(dev, addr, size, direction);
-		if (dma_mapping_error(dev, dma_addr)) {
-			/*
-			 * reduce current DMA mapping usage,
-			 * delay and try again later or
-			 * reset driver.
-			 */
-			goto map_error_handling;
-		}
-		array[i].dma_addr = dma_addr;
-		save_index++;
-	}
-
-	...
-
-	map_error_handling:
-
-	for (i = 0; i < save_index; i++) {
-
-		...
-
-		dma_unmap_single(array[i].dma_addr);
-	}
-
-Networking drivers must call dev_kfree_skb() to free the socket buffer
-and return NETDEV_TX_OK if the DMA mapping fails on the transmit hook
-(ndo_start_xmit). This means that the socket buffer is just dropped in
-the failure case.
-
-SCSI drivers must return SCSI_MLQUEUE_HOST_BUSY if the DMA mapping
-fails in the queuecommand hook. This means that the SCSI subsystem
-passes the command to the driver again later.
-
-Optimizing Unmap State Space Consumption
-========================================
-
-On many platforms, dma_unmap_{single,page}() is simply a nop.
-Therefore, keeping track of the mapping address and length is a waste
-of space.  Instead of filling your drivers up with ifdefs and the like
-to "work around" this (which would defeat the whole purpose of a
-portable API) the following facilities are provided.
-
-Actually, instead of describing the macros one by one, we'll
-transform some example code.
-
-1) Use DEFINE_DMA_UNMAP_{ADDR,LEN} in state saving structures.
-   Example, before::
-
-	struct ring_state {
-		struct sk_buff *skb;
-		dma_addr_t mapping;
-		__u32 len;
-	};
-
-   after::
-
-	struct ring_state {
-		struct sk_buff *skb;
-		DEFINE_DMA_UNMAP_ADDR(mapping);
-		DEFINE_DMA_UNMAP_LEN(len);
-	};
-
-2) Use dma_unmap_{addr,len}_set() to set these values.
-   Example, before::
-
-	ringp->mapping = FOO;
-	ringp->len = BAR;
-
-   after::
-
-	dma_unmap_addr_set(ringp, mapping, FOO);
-	dma_unmap_len_set(ringp, len, BAR);
-
-3) Use dma_unmap_{addr,len}() to access these values.
-   Example, before::
-
-	dma_unmap_single(dev, ringp->mapping, ringp->len,
-			 DMA_FROM_DEVICE);
-
-   after::
-
-	dma_unmap_single(dev,
-			 dma_unmap_addr(ringp, mapping),
-			 dma_unmap_len(ringp, len),
-			 DMA_FROM_DEVICE);
-
-It really should be self-explanatory.  We treat the ADDR and LEN
-separately, because it is possible for an implementation to only
-need the address in order to perform the unmap operation.
-
-Platform Issues
-===============
-
-If you are just writing drivers for Linux and do not maintain
-an architecture port for the kernel, you can safely skip down
-to "Closing".
-
-1) Struct scatterlist requirements.
-
-   You need to enable CONFIG_NEED_SG_DMA_LENGTH if the architecture
-   supports IOMMUs (including software IOMMU).
-
-2) ARCH_DMA_MINALIGN
-
-   Architectures must ensure that kmalloc'ed buffer is
-   DMA-safe. Drivers and subsystems depend on it. If an architecture
-   isn't fully DMA-coherent (i.e. hardware doesn't ensure that data in
-   the CPU cache is identical to data in main memory),
-   ARCH_DMA_MINALIGN must be set so that the memory allocator
-   makes sure that kmalloc'ed buffer doesn't share a cache line with
-   the others. See arch/arm/include/asm/cache.h as an example.
-
-   Note that ARCH_DMA_MINALIGN is about DMA memory alignment
-   constraints. You don't need to worry about the architecture data
-   alignment constraints (e.g. the alignment constraints about 64-bit
-   objects).
-
-Closing
-=======
-
-This document, and the API itself, would not be in its current
-form without the feedback and suggestions from numerous individuals.
-We would like to specifically mention, in no particular order, the
-following people::
-
-	Russell King <rmk@arm.linux.org.uk>
-	Leo Dagum <dagum@barrel.engr.sgi.com>
-	Ralf Baechle <ralf@oss.sgi.com>
-	Grant Grundler <grundler@cup.hp.com>
-	Jay Estabrook <Jay.Estabrook@compaq.com>
-	Thomas Sailer <sailer@ife.ee.ethz.ch>
-	Andrea Arcangeli <andrea@suse.de>
-	Jens Axboe <jens.axboe@oracle.com>
-	David Mosberger-Tang <davidm@hpl.hp.com>
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
deleted file mode 100644
index 2d8d2fed7317..000000000000
--- a/Documentation/DMA-API.txt
+++ /dev/null
@@ -1,745 +0,0 @@
-============================================
-Dynamic DMA mapping using the generic device
-============================================
-
-:Author: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
-
-This document describes the DMA API.  For a more gentle introduction
-of the API (and actual examples), see Documentation/DMA-API-HOWTO.txt.
-
-This API is split into two pieces.  Part I describes the basic API.
-Part II describes extensions for supporting non-consistent memory
-machines.  Unless you know that your driver absolutely has to support
-non-consistent platforms (this is usually only legacy platforms) you
-should only use the API described in part I.
-
-Part I - dma_API
-----------------
-
-To get the dma_API, you must #include <linux/dma-mapping.h>.  This
-provides dma_addr_t and the interfaces described below.
-
-A dma_addr_t can hold any valid DMA address for the platform.  It can be
-given to a device to use as a DMA source or target.  A CPU cannot reference
-a dma_addr_t directly because there may be translation between its physical
-address space and the DMA address space.
-
-Part Ia - Using large DMA-coherent buffers
-------------------------------------------
-
-::
-
-	void *
-	dma_alloc_coherent(struct device *dev, size_t size,
-			   dma_addr_t *dma_handle, gfp_t flag)
-
-Consistent memory is memory for which a write by either the device or
-the processor can immediately be read by the processor or device
-without having to worry about caching effects.  (You may however need
-to make sure to flush the processor's write buffers before telling
-devices to read that memory.)
-
-This routine allocates a region of <size> bytes of consistent memory.
-
-It returns a pointer to the allocated region (in the processor's virtual
-address space) or NULL if the allocation failed.
-
-It also returns a <dma_handle> which may be cast to an unsigned integer the
-same width as the bus and given to the device as the DMA address base of
-the region.
-
-Note: consistent memory can be expensive on some platforms, and the
-minimum allocation length may be as big as a page, so you should
-consolidate your requests for consistent memory as much as possible.
-The simplest way to do that is to use the dma_pool calls (see below).
-
-The flag parameter (dma_alloc_coherent() only) allows the caller to
-specify the ``GFP_`` flags (see kmalloc()) for the allocation (the
-implementation may choose to ignore flags that affect the location of
-the returned memory, like GFP_DMA).
-
-::
-
-	void
-	dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
-			  dma_addr_t dma_handle)
-
-Free a region of consistent memory you previously allocated.  dev,
-size and dma_handle must all be the same as those passed into
-dma_alloc_coherent().  cpu_addr must be the virtual address returned by
-the dma_alloc_coherent().
-
-Note that unlike their sibling allocation calls, these routines
-may only be called with IRQs enabled.
-
-
-Part Ib - Using small DMA-coherent buffers
-------------------------------------------
-
-To get this part of the dma_API, you must #include <linux/dmapool.h>
-
-Many drivers need lots of small DMA-coherent memory regions for DMA
-descriptors or I/O buffers.  Rather than allocating in units of a page
-or more using dma_alloc_coherent(), you can use DMA pools.  These work
-much like a struct kmem_cache, except that they use the DMA-coherent allocator,
-not __get_free_pages().  Also, they understand common hardware constraints
-for alignment, like queue heads needing to be aligned on N-byte boundaries.
-
-
-::
-
-	struct dma_pool *
-	dma_pool_create(const char *name, struct device *dev,
-			size_t size, size_t align, size_t alloc);
-
-dma_pool_create() initializes a pool of DMA-coherent buffers
-for use with a given device.  It must be called in a context which
-can sleep.
-
-The "name" is for diagnostics (like a struct kmem_cache name); dev and size
-are like what you'd pass to dma_alloc_coherent().  The device's hardware
-alignment requirement for this type of data is "align" (which is expressed
-in bytes, and must be a power of two).  If your device has no boundary
-crossing restrictions, pass 0 for alloc; passing 4096 says memory allocated
-from this pool must not cross 4KByte boundaries.
-
-::
-
-	void *
-	dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags,
-		        dma_addr_t *handle)
-
-Wraps dma_pool_alloc() and also zeroes the returned memory if the
-allocation attempt succeeded.
-
-
-::
-
-	void *
-	dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags,
-		       dma_addr_t *dma_handle);
-
-This allocates memory from the pool; the returned memory will meet the
-size and alignment requirements specified at creation time.  Pass
-GFP_ATOMIC to prevent blocking, or if it's permitted (not
-in_interrupt, not holding SMP locks), pass GFP_KERNEL to allow
-blocking.  Like dma_alloc_coherent(), this returns two values:  an
-address usable by the CPU, and the DMA address usable by the pool's
-device.
-
-::
-
-	void
-	dma_pool_free(struct dma_pool *pool, void *vaddr,
-		      dma_addr_t addr);
-
-This puts memory back into the pool.  The pool is what was passed to
-dma_pool_alloc(); the CPU (vaddr) and DMA addresses are what
-were returned when that routine allocated the memory being freed.
-
-::
-
-	void
-	dma_pool_destroy(struct dma_pool *pool);
-
-dma_pool_destroy() frees the resources of the pool.  It must be
-called in a context which can sleep.  Make sure you've freed all allocated
-memory back to the pool before you destroy it.
-
-
-Part Ic - DMA addressing limitations
-------------------------------------
-
-::
-
-	int
-	dma_set_mask_and_coherent(struct device *dev, u64 mask)
-
-Checks to see if the mask is possible and updates the device
-streaming and coherent DMA mask parameters if it is.
-
-Returns: 0 if successful and a negative error if not.
-
-::
-
-	int
-	dma_set_mask(struct device *dev, u64 mask)
-
-Checks to see if the mask is possible and updates the device
-parameters if it is.
-
-Returns: 0 if successful and a negative error if not.
-
-::
-
-	int
-	dma_set_coherent_mask(struct device *dev, u64 mask)
-
-Checks to see if the mask is possible and updates the device
-parameters if it is.
-
-Returns: 0 if successful and a negative error if not.
-
-::
-
-	u64
-	dma_get_required_mask(struct device *dev)
-
-This API returns the mask that the platform requires to
-operate efficiently.  Usually this means the returned mask
-is the minimum required to cover all of memory.  Examining the
-required mask gives drivers with variable descriptor sizes the
-opportunity to use smaller descriptors as necessary.
-
-Requesting the required mask does not alter the current mask.  If you
-wish to take advantage of it, you should issue a dma_set_mask()
-call to set the mask to the value returned.
-
-::
-
-	size_t
-	dma_max_mapping_size(struct device *dev);
-
-Returns the maximum size of a mapping for the device. The size parameter
-of the mapping functions like dma_map_single(), dma_map_page() and
-others should not be larger than the returned value.
-
-::
-
-	unsigned long
-	dma_get_merge_boundary(struct device *dev);
-
-Returns the DMA merge boundary. If the device cannot merge any the DMA address
-segments, the function returns 0.
-
-Part Id - Streaming DMA mappings
---------------------------------
-
-::
-
-	dma_addr_t
-	dma_map_single(struct device *dev, void *cpu_addr, size_t size,
-		       enum dma_data_direction direction)
-
-Maps a piece of processor virtual memory so it can be accessed by the
-device and returns the DMA address of the memory.
-
-The direction for both APIs may be converted freely by casting.
-However the dma_API uses a strongly typed enumerator for its
-direction:
-
-======================= =============================================
-DMA_NONE		no direction (used for debugging)
-DMA_TO_DEVICE		data is going from the memory to the device
-DMA_FROM_DEVICE		data is coming from the device to the memory
-DMA_BIDIRECTIONAL	direction isn't known
-======================= =============================================
-
-.. note::
-
-	Not all memory regions in a machine can be mapped by this API.
-	Further, contiguous kernel virtual space may not be contiguous as
-	physical memory.  Since this API does not provide any scatter/gather
-	capability, it will fail if the user tries to map a non-physically
-	contiguous piece of memory.  For this reason, memory to be mapped by
-	this API should be obtained from sources which guarantee it to be
-	physically contiguous (like kmalloc).
-
-	Further, the DMA address of the memory must be within the
-	dma_mask of the device (the dma_mask is a bit mask of the
-	addressable region for the device, i.e., if the DMA address of
-	the memory ANDed with the dma_mask is still equal to the DMA
-	address, then the device can perform DMA to the memory).  To
-	ensure that the memory allocated by kmalloc is within the dma_mask,
-	the driver may specify various platform-dependent flags to restrict
-	the DMA address range of the allocation (e.g., on x86, GFP_DMA
-	guarantees to be within the first 16MB of available DMA addresses,
-	as required by ISA devices).
-
-	Note also that the above constraints on physical contiguity and
-	dma_mask may not apply if the platform has an IOMMU (a device which
-	maps an I/O DMA address to a physical memory address).  However, to be
-	portable, device driver writers may *not* assume that such an IOMMU
-	exists.
-
-.. warning::
-
-	Memory coherency operates at a granularity called the cache
-	line width.  In order for memory mapped by this API to operate
-	correctly, the mapped region must begin exactly on a cache line
-	boundary and end exactly on one (to prevent two separately mapped
-	regions from sharing a single cache line).  Since the cache line size
-	may not be known at compile time, the API will not enforce this
-	requirement.  Therefore, it is recommended that driver writers who
-	don't take special care to determine the cache line size at run time
-	only map virtual regions that begin and end on page boundaries (which
-	are guaranteed also to be cache line boundaries).
-
-	DMA_TO_DEVICE synchronisation must be done after the last modification
-	of the memory region by the software and before it is handed off to
-	the device.  Once this primitive is used, memory covered by this
-	primitive should be treated as read-only by the device.  If the device
-	may write to it at any point, it should be DMA_BIDIRECTIONAL (see
-	below).
-
-	DMA_FROM_DEVICE synchronisation must be done before the driver
-	accesses data that may be changed by the device.  This memory should
-	be treated as read-only by the driver.  If the driver needs to write
-	to it at any point, it should be DMA_BIDIRECTIONAL (see below).
-
-	DMA_BIDIRECTIONAL requires special handling: it means that the driver
-	isn't sure if the memory was modified before being handed off to the
-	device and also isn't sure if the device will also modify it.  Thus,
-	you must always sync bidirectional memory twice: once before the
-	memory is handed off to the device (to make sure all memory changes
-	are flushed from the processor) and once before the data may be
-	accessed after being used by the device (to make sure any processor
-	cache lines are updated with data that the device may have changed).
-
-::
-
-	void
-	dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
-			 enum dma_data_direction direction)
-
-Unmaps the region previously mapped.  All the parameters passed in
-must be identical to those passed in (and returned) by the mapping
-API.
-
-::
-
-	dma_addr_t
-	dma_map_page(struct device *dev, struct page *page,
-		     unsigned long offset, size_t size,
-		     enum dma_data_direction direction)
-
-	void
-	dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
-		       enum dma_data_direction direction)
-
-API for mapping and unmapping for pages.  All the notes and warnings
-for the other mapping APIs apply here.  Also, although the <offset>
-and <size> parameters are provided to do partial page mapping, it is
-recommended that you never use these unless you really know what the
-cache width is.
-
-::
-
-	dma_addr_t
-	dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
-			 enum dma_data_direction dir, unsigned long attrs)
-
-	void
-	dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
-			   enum dma_data_direction dir, unsigned long attrs)
-
-API for mapping and unmapping for MMIO resources. All the notes and
-warnings for the other mapping APIs apply here. The API should only be
-used to map device MMIO resources, mapping of RAM is not permitted.
-
-::
-
-	int
-	dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-
-In some circumstances dma_map_single(), dma_map_page() and dma_map_resource()
-will fail to create a mapping. A driver can check for these errors by testing
-the returned DMA address with dma_mapping_error(). A non-zero return value
-means the mapping could not be created and the driver should take appropriate
-action (e.g. reduce current DMA mapping usage or delay and try again later).
-
-::
-
-	int
-	dma_map_sg(struct device *dev, struct scatterlist *sg,
-		   int nents, enum dma_data_direction direction)
-
-Returns: the number of DMA address segments mapped (this may be shorter
-than <nents> passed in if some elements of the scatter/gather list are
-physically or virtually adjacent and an IOMMU maps them with a single
-entry).
-
-Please note that the sg cannot be mapped again if it has been mapped once.
-The mapping process is allowed to destroy information in the sg.
-
-As with the other mapping interfaces, dma_map_sg() can fail. When it
-does, 0 is returned and a driver must take appropriate action. It is
-critical that the driver do something, in the case of a block driver
-aborting the request or even oopsing is better than doing nothing and
-corrupting the filesystem.
-
-With scatterlists, you use the resulting mapping like this::
-
-	int i, count = dma_map_sg(dev, sglist, nents, direction);
-	struct scatterlist *sg;
-
-	for_each_sg(sglist, sg, count, i) {
-		hw_address[i] = sg_dma_address(sg);
-		hw_len[i] = sg_dma_len(sg);
-	}
-
-where nents is the number of entries in the sglist.
-
-The implementation is free to merge several consecutive sglist entries
-into one (e.g. with an IOMMU, or if several pages just happen to be
-physically contiguous) and returns the actual number of sg entries it
-mapped them to. On failure 0, is returned.
-
-Then you should loop count times (note: this can be less than nents times)
-and use sg_dma_address() and sg_dma_len() macros where you previously
-accessed sg->address and sg->length as shown above.
-
-::
-
-	void
-	dma_unmap_sg(struct device *dev, struct scatterlist *sg,
-		     int nents, enum dma_data_direction direction)
-
-Unmap the previously mapped scatter/gather list.  All the parameters
-must be the same as those and passed in to the scatter/gather mapping
-API.
-
-Note: <nents> must be the number you passed in, *not* the number of
-DMA address entries returned.
-
-::
-
-	void
-	dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
-				size_t size,
-				enum dma_data_direction direction)
-
-	void
-	dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
-				   size_t size,
-				   enum dma_data_direction direction)
-
-	void
-	dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
-			    int nents,
-			    enum dma_data_direction direction)
-
-	void
-	dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
-			       int nents,
-			       enum dma_data_direction direction)
-
-Synchronise a single contiguous or scatter/gather mapping for the CPU
-and device. With the sync_sg API, all the parameters must be the same
-as those passed into the single mapping API. With the sync_single API,
-you can use dma_handle and size parameters that aren't identical to
-those passed into the single mapping API to do a partial sync.
-
-
-.. note::
-
-   You must do this:
-
-   - Before reading values that have been written by DMA from the device
-     (use the DMA_FROM_DEVICE direction)
-   - After writing values that will be written to the device using DMA
-     (use the DMA_TO_DEVICE) direction
-   - before *and* after handing memory to the device if the memory is
-     DMA_BIDIRECTIONAL
-
-See also dma_map_single().
-
-::
-
-	dma_addr_t
-	dma_map_single_attrs(struct device *dev, void *cpu_addr, size_t size,
-			     enum dma_data_direction dir,
-			     unsigned long attrs)
-
-	void
-	dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr,
-			       size_t size, enum dma_data_direction dir,
-			       unsigned long attrs)
-
-	int
-	dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
-			 int nents, enum dma_data_direction dir,
-			 unsigned long attrs)
-
-	void
-	dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl,
-			   int nents, enum dma_data_direction dir,
-			   unsigned long attrs)
-
-The four functions above are just like the counterpart functions
-without the _attrs suffixes, except that they pass an optional
-dma_attrs.
-
-The interpretation of DMA attributes is architecture-specific, and
-each attribute should be documented in Documentation/DMA-attributes.txt.
-
-If dma_attrs are 0, the semantics of each of these functions
-is identical to those of the corresponding function
-without the _attrs suffix. As a result dma_map_single_attrs()
-can generally replace dma_map_single(), etc.
-
-As an example of the use of the ``*_attrs`` functions, here's how
-you could pass an attribute DMA_ATTR_FOO when mapping memory
-for DMA::
-
-	#include <linux/dma-mapping.h>
-	/* DMA_ATTR_FOO should be defined in linux/dma-mapping.h and
-	* documented in Documentation/DMA-attributes.txt */
-	...
-
-		unsigned long attr;
-		attr |= DMA_ATTR_FOO;
-		....
-		n = dma_map_sg_attrs(dev, sg, nents, DMA_TO_DEVICE, attr);
-		....
-
-Architectures that care about DMA_ATTR_FOO would check for its
-presence in their implementations of the mapping and unmapping
-routines, e.g.:::
-
-	void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr,
-				     size_t size, enum dma_data_direction dir,
-				     unsigned long attrs)
-	{
-		....
-		if (attrs & DMA_ATTR_FOO)
-			/* twizzle the frobnozzle */
-		....
-	}
-
-
-Part II - Advanced dma usage
-----------------------------
-
-Warning: These pieces of the DMA API should not be used in the
-majority of cases, since they cater for unlikely corner cases that
-don't belong in usual drivers.
-
-If you don't understand how cache line coherency works between a
-processor and an I/O device, you should not be using this part of the
-API at all.
-
-::
-
-	void *
-	dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
-			gfp_t flag, unsigned long attrs)
-
-Identical to dma_alloc_coherent() except that when the
-DMA_ATTR_NON_CONSISTENT flags is passed in the attrs argument, the
-platform will choose to return either consistent or non-consistent memory
-as it sees fit.  By using this API, you are guaranteeing to the platform
-that you have all the correct and necessary sync points for this memory
-in the driver should it choose to return non-consistent memory.
-
-Note: where the platform can return consistent memory, it will
-guarantee that the sync points become nops.
-
-Warning:  Handling non-consistent memory is a real pain.  You should
-only use this API if you positively know your driver will be
-required to work on one of the rare (usually non-PCI) architectures
-that simply cannot make consistent memory.
-
-::
-
-	void
-	dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
-		       dma_addr_t dma_handle, unsigned long attrs)
-
-Free memory allocated by the dma_alloc_attrs().  All common
-parameters must be identical to those otherwise passed to dma_free_coherent,
-and the attrs argument must be identical to the attrs passed to
-dma_alloc_attrs().
-
-::
-
-	int
-	dma_get_cache_alignment(void)
-
-Returns the processor cache alignment.  This is the absolute minimum
-alignment *and* width that you must observe when either mapping
-memory or doing partial flushes.
-
-.. note::
-
-	This API may return a number *larger* than the actual cache
-	line, but it will guarantee that one or more cache lines fit exactly
-	into the width returned by this call.  It will also always be a power
-	of two for easy alignment.
-
-::
-
-	void
-	dma_cache_sync(struct device *dev, void *vaddr, size_t size,
-		       enum dma_data_direction direction)
-
-Do a partial sync of memory that was allocated by dma_alloc_attrs() with
-the DMA_ATTR_NON_CONSISTENT flag starting at virtual address vaddr and
-continuing on for size.  Again, you *must* observe the cache line
-boundaries when doing this.
-
-::
-
-	int
-	dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
-				    dma_addr_t device_addr, size_t size);
-
-Declare region of memory to be handed out by dma_alloc_coherent() when
-it's asked for coherent memory for this device.
-
-phys_addr is the CPU physical address to which the memory is currently
-assigned (this will be ioremapped so the CPU can access the region).
-
-device_addr is the DMA address the device needs to be programmed
-with to actually address this memory (this will be handed out as the
-dma_addr_t in dma_alloc_coherent()).
-
-size is the size of the area (must be multiples of PAGE_SIZE).
-
-As a simplification for the platforms, only *one* such region of
-memory may be declared per device.
-
-For reasons of efficiency, most platforms choose to track the declared
-region only at the granularity of a page.  For smaller allocations,
-you should use the dma_pool() API.
-
-Part III - Debug drivers use of the DMA-API
--------------------------------------------
-
-The DMA-API as described above has some constraints. DMA addresses must be
-released with the corresponding function with the same size for example. With
-the advent of hardware IOMMUs it becomes more and more important that drivers
-do not violate those constraints. In the worst case such a violation can
-result in data corruption up to destroyed filesystems.
-
-To debug drivers and find bugs in the usage of the DMA-API checking code can
-be compiled into the kernel which will tell the developer about those
-violations. If your architecture supports it you can select the "Enable
-debugging of DMA-API usage" option in your kernel configuration. Enabling this
-option has a performance impact. Do not enable it in production kernels.
-
-If you boot the resulting kernel will contain code which does some bookkeeping
-about what DMA memory was allocated for which device. If this code detects an
-error it prints a warning message with some details into your kernel log. An
-example warning message may look like this::
-
-	WARNING: at /data2/repos/linux-2.6-iommu/lib/dma-debug.c:448
-		check_unmap+0x203/0x490()
-	Hardware name:
-	forcedeth 0000:00:08.0: DMA-API: device driver frees DMA memory with wrong
-		function [device address=0x00000000640444be] [size=66 bytes] [mapped as
-	single] [unmapped as page]
-	Modules linked in: nfsd exportfs bridge stp llc r8169
-	Pid: 0, comm: swapper Tainted: G        W  2.6.28-dmatest-09289-g8bb99c0 #1
-	Call Trace:
-	<IRQ>  [<ffffffff80240b22>] warn_slowpath+0xf2/0x130
-	[<ffffffff80647b70>] _spin_unlock+0x10/0x30
-	[<ffffffff80537e75>] usb_hcd_link_urb_to_ep+0x75/0xc0
-	[<ffffffff80647c22>] _spin_unlock_irqrestore+0x12/0x40
-	[<ffffffff8055347f>] ohci_urb_enqueue+0x19f/0x7c0
-	[<ffffffff80252f96>] queue_work+0x56/0x60
-	[<ffffffff80237e10>] enqueue_task_fair+0x20/0x50
-	[<ffffffff80539279>] usb_hcd_submit_urb+0x379/0xbc0
-	[<ffffffff803b78c3>] cpumask_next_and+0x23/0x40
-	[<ffffffff80235177>] find_busiest_group+0x207/0x8a0
-	[<ffffffff8064784f>] _spin_lock_irqsave+0x1f/0x50
-	[<ffffffff803c7ea3>] check_unmap+0x203/0x490
-	[<ffffffff803c8259>] debug_dma_unmap_page+0x49/0x50
-	[<ffffffff80485f26>] nv_tx_done_optimized+0xc6/0x2c0
-	[<ffffffff80486c13>] nv_nic_irq_optimized+0x73/0x2b0
-	[<ffffffff8026df84>] handle_IRQ_event+0x34/0x70
-	[<ffffffff8026ffe9>] handle_edge_irq+0xc9/0x150
-	[<ffffffff8020e3ab>] do_IRQ+0xcb/0x1c0
-	[<ffffffff8020c093>] ret_from_intr+0x0/0xa
-	<EOI> <4>---[ end trace f6435a98e2a38c0e ]---
-
-The driver developer can find the driver and the device including a stacktrace
-of the DMA-API call which caused this warning.
-
-Per default only the first error will result in a warning message. All other
-errors will only silently counted. This limitation exist to prevent the code
-from flooding your kernel log. To support debugging a device driver this can
-be disabled via debugfs. See the debugfs interface documentation below for
-details.
-
-The debugfs directory for the DMA-API debugging code is called dma-api/. In
-this directory the following files can currently be found:
-
-=============================== ===============================================
-dma-api/all_errors		This file contains a numeric value. If this
-				value is not equal to zero the debugging code
-				will print a warning for every error it finds
-				into the kernel log. Be careful with this
-				option, as it can easily flood your logs.
-
-dma-api/disabled		This read-only file contains the character 'Y'
-				if the debugging code is disabled. This can
-				happen when it runs out of memory or if it was
-				disabled at boot time
-
-dma-api/dump			This read-only file contains current DMA
-				mappings.
-
-dma-api/error_count		This file is read-only and shows the total
-				numbers of errors found.
-
-dma-api/num_errors		The number in this file shows how many
-				warnings will be printed to the kernel log
-				before it stops. This number is initialized to
-				one at system boot and be set by writing into
-				this file
-
-dma-api/min_free_entries	This read-only file can be read to get the
-				minimum number of free dma_debug_entries the
-				allocator has ever seen. If this value goes
-				down to zero the code will attempt to increase
-				nr_total_entries to compensate.
-
-dma-api/num_free_entries	The current number of free dma_debug_entries
-				in the allocator.
-
-dma-api/nr_total_entries	The total number of dma_debug_entries in the
-				allocator, both free and used.
-
-dma-api/driver_filter		You can write a name of a driver into this file
-				to limit the debug output to requests from that
-				particular driver. Write an empty string to
-				that file to disable the filter and see
-				all errors again.
-=============================== ===============================================
-
-If you have this code compiled into your kernel it will be enabled by default.
-If you want to boot without the bookkeeping anyway you can provide
-'dma_debug=off' as a boot parameter. This will disable DMA-API debugging.
-Notice that you can not enable it again at runtime. You have to reboot to do
-so.
-
-If you want to see debug messages only for a special device driver you can
-specify the dma_debug_driver=<drivername> parameter. This will enable the
-driver filter at boot time. The debug code will only print errors for that
-driver afterwards. This filter can be disabled or changed later using debugfs.
-
-When the code disables itself at runtime this is most likely because it ran
-out of dma_debug_entries and was unable to allocate more on-demand. 65536
-entries are preallocated at boot - if this is too low for you boot with
-'dma_debug_entries=<your_desired_number>' to overwrite the default. Note
-that the code allocates entries in batches, so the exact number of
-preallocated entries may be greater than the actual number requested. The
-code will print to the kernel log each time it has dynamically allocated
-as many entries as were initially preallocated. This is to indicate that a
-larger preallocation size may be appropriate, or if it happens continually
-that a driver may be leaking mappings.
-
-::
-
-	void
-	debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
-
-dma-debug interface debug_dma_mapping_error() to debug drivers that fail
-to check DMA mapping errors on addresses returned by dma_map_single() and
-dma_map_page() interfaces. This interface clears a flag set by
-debug_dma_map_page() to indicate that dma_mapping_error() has been called by
-the driver. When driver does unmap, debug_dma_unmap() checks the flag and if
-this flag is still set, prints warning message that includes call trace that
-leads up to the unmap. This interface can be called from dma_mapping_error()
-routines to enable DMA mapping error check debugging.
diff --git a/Documentation/DMA-ISA-LPC.txt b/Documentation/DMA-ISA-LPC.txt
deleted file mode 100644
index b1ec7b16c21f..000000000000
--- a/Documentation/DMA-ISA-LPC.txt
+++ /dev/null
@@ -1,152 +0,0 @@
-============================
-DMA with ISA and LPC devices
-============================
-
-:Author: Pierre Ossman <drzeus@drzeus.cx>
-
-This document describes how to do DMA transfers using the old ISA DMA
-controller. Even though ISA is more or less dead today the LPC bus
-uses the same DMA system so it will be around for quite some time.
-
-Headers and dependencies
-------------------------
-
-To do ISA style DMA you need to include two headers::
-
-	#include <linux/dma-mapping.h>
-	#include <asm/dma.h>
-
-The first is the generic DMA API used to convert virtual addresses to
-bus addresses (see Documentation/DMA-API.txt for details).
-
-The second contains the routines specific to ISA DMA transfers. Since
-this is not present on all platforms make sure you construct your
-Kconfig to be dependent on ISA_DMA_API (not ISA) so that nobody tries
-to build your driver on unsupported platforms.
-
-Buffer allocation
------------------
-
-The ISA DMA controller has some very strict requirements on which
-memory it can access so extra care must be taken when allocating
-buffers.
-
-(You usually need a special buffer for DMA transfers instead of
-transferring directly to and from your normal data structures.)
-
-The DMA-able address space is the lowest 16 MB of _physical_ memory.
-Also the transfer block may not cross page boundaries (which are 64
-or 128 KiB depending on which channel you use).
-
-In order to allocate a piece of memory that satisfies all these
-requirements you pass the flag GFP_DMA to kmalloc.
-
-Unfortunately the memory available for ISA DMA is scarce so unless you
-allocate the memory during boot-up it's a good idea to also pass
-__GFP_RETRY_MAYFAIL and __GFP_NOWARN to make the allocator try a bit harder.
-
-(This scarcity also means that you should allocate the buffer as
-early as possible and not release it until the driver is unloaded.)
-
-Address translation
--------------------
-
-To translate the virtual address to a bus address, use the normal DMA
-API. Do _not_ use isa_virt_to_bus() even though it does the same
-thing. The reason for this is that the function isa_virt_to_bus()
-will require a Kconfig dependency to ISA, not just ISA_DMA_API which
-is really all you need. Remember that even though the DMA controller
-has its origins in ISA it is used elsewhere.
-
-Note: x86_64 had a broken DMA API when it came to ISA but has since
-been fixed. If your arch has problems then fix the DMA API instead of
-reverting to the ISA functions.
-
-Channels
---------
-
-A normal ISA DMA controller has 8 channels. The lower four are for
-8-bit transfers and the upper four are for 16-bit transfers.
-
-(Actually the DMA controller is really two separate controllers where
-channel 4 is used to give DMA access for the second controller (0-3).
-This means that of the four 16-bits channels only three are usable.)
-
-You allocate these in a similar fashion as all basic resources:
-
-extern int request_dma(unsigned int dmanr, const char * device_id);
-extern void free_dma(unsigned int dmanr);
-
-The ability to use 16-bit or 8-bit transfers is _not_ up to you as a
-driver author but depends on what the hardware supports. Check your
-specs or test different channels.
-
-Transfer data
--------------
-
-Now for the good stuff, the actual DMA transfer. :)
-
-Before you use any ISA DMA routines you need to claim the DMA lock
-using claim_dma_lock(). The reason is that some DMA operations are
-not atomic so only one driver may fiddle with the registers at a
-time.
-
-The first time you use the DMA controller you should call
-clear_dma_ff(). This clears an internal register in the DMA
-controller that is used for the non-atomic operations. As long as you
-(and everyone else) uses the locking functions then you only need to
-reset this once.
-
-Next, you tell the controller in which direction you intend to do the
-transfer using set_dma_mode(). Currently you have the options
-DMA_MODE_READ and DMA_MODE_WRITE.
-
-Set the address from where the transfer should start (this needs to
-be 16-bit aligned for 16-bit transfers) and how many bytes to
-transfer. Note that it's _bytes_. The DMA routines will do all the
-required translation to values that the DMA controller understands.
-
-The final step is enabling the DMA channel and releasing the DMA
-lock.
-
-Once the DMA transfer is finished (or timed out) you should disable
-the channel again. You should also check get_dma_residue() to make
-sure that all data has been transferred.
-
-Example::
-
-	int flags, residue;
-
-	flags = claim_dma_lock();
-
-	clear_dma_ff();
-
-	set_dma_mode(channel, DMA_MODE_WRITE);
-	set_dma_addr(channel, phys_addr);
-	set_dma_count(channel, num_bytes);
-
-	dma_enable(channel);
-
-	release_dma_lock(flags);
-
-	while (!device_done());
-
-	flags = claim_dma_lock();
-
-	dma_disable(channel);
-
-	residue = dma_get_residue(channel);
-	if (residue != 0)
-		printk(KERN_ERR "driver: Incomplete DMA transfer!"
-			" %d bytes left!\n", residue);
-
-	release_dma_lock(flags);
-
-Suspend/resume
---------------
-
-It is the driver's responsibility to make sure that the machine isn't
-suspended while a DMA transfer is in progress. Also, all DMA settings
-are lost when the system suspends so if your driver relies on the DMA
-controller being in a certain state then you have to restore these
-registers upon resume.
diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
deleted file mode 100644
index 29dcbe8826e8..000000000000
--- a/Documentation/DMA-attributes.txt
+++ /dev/null
@@ -1,140 +0,0 @@
-==============
-DMA attributes
-==============
-
-This document describes the semantics of the DMA attributes that are
-defined in linux/dma-mapping.h.
-
-DMA_ATTR_WEAK_ORDERING
-----------------------
-
-DMA_ATTR_WEAK_ORDERING specifies that reads and writes to the mapping
-may be weakly ordered, that is that reads and writes may pass each other.
-
-Since it is optional for platforms to implement DMA_ATTR_WEAK_ORDERING,
-those that do not will simply ignore the attribute and exhibit default
-behavior.
-
-DMA_ATTR_WRITE_COMBINE
-----------------------
-
-DMA_ATTR_WRITE_COMBINE specifies that writes to the mapping may be
-buffered to improve performance.
-
-Since it is optional for platforms to implement DMA_ATTR_WRITE_COMBINE,
-those that do not will simply ignore the attribute and exhibit default
-behavior.
-
-DMA_ATTR_NON_CONSISTENT
------------------------
-
-DMA_ATTR_NON_CONSISTENT lets the platform to choose to return either
-consistent or non-consistent memory as it sees fit.  By using this API,
-you are guaranteeing to the platform that you have all the correct and
-necessary sync points for this memory in the driver.
-
-DMA_ATTR_NO_KERNEL_MAPPING
---------------------------
-
-DMA_ATTR_NO_KERNEL_MAPPING lets the platform to avoid creating a kernel
-virtual mapping for the allocated buffer. On some architectures creating
-such mapping is non-trivial task and consumes very limited resources
-(like kernel virtual address space or dma consistent address space).
-Buffers allocated with this attribute can be only passed to user space
-by calling dma_mmap_attrs(). By using this API, you are guaranteeing
-that you won't dereference the pointer returned by dma_alloc_attr(). You
-can treat it as a cookie that must be passed to dma_mmap_attrs() and
-dma_free_attrs(). Make sure that both of these also get this attribute
-set on each call.
-
-Since it is optional for platforms to implement
-DMA_ATTR_NO_KERNEL_MAPPING, those that do not will simply ignore the
-attribute and exhibit default behavior.
-
-DMA_ATTR_SKIP_CPU_SYNC
-----------------------
-
-By default dma_map_{single,page,sg} functions family transfer a given
-buffer from CPU domain to device domain. Some advanced use cases might
-require sharing a buffer between more than one device. This requires
-having a mapping created separately for each device and is usually
-performed by calling dma_map_{single,page,sg} function more than once
-for the given buffer with device pointer to each device taking part in
-the buffer sharing. The first call transfers a buffer from 'CPU' domain
-to 'device' domain, what synchronizes CPU caches for the given region
-(usually it means that the cache has been flushed or invalidated
-depending on the dma direction). However, next calls to
-dma_map_{single,page,sg}() for other devices will perform exactly the
-same synchronization operation on the CPU cache. CPU cache synchronization
-might be a time consuming operation, especially if the buffers are
-large, so it is highly recommended to avoid it if possible.
-DMA_ATTR_SKIP_CPU_SYNC allows platform code to skip synchronization of
-the CPU cache for the given buffer assuming that it has been already
-transferred to 'device' domain. This attribute can be also used for
-dma_unmap_{single,page,sg} functions family to force buffer to stay in
-device domain after releasing a mapping for it. Use this attribute with
-care!
-
-DMA_ATTR_FORCE_CONTIGUOUS
--------------------------
-
-By default DMA-mapping subsystem is allowed to assemble the buffer
-allocated by dma_alloc_attrs() function from individual pages if it can
-be mapped as contiguous chunk into device dma address space. By
-specifying this attribute the allocated buffer is forced to be contiguous
-also in physical memory.
-
-DMA_ATTR_ALLOC_SINGLE_PAGES
----------------------------
-
-This is a hint to the DMA-mapping subsystem that it's probably not worth
-the time to try to allocate memory to in a way that gives better TLB
-efficiency (AKA it's not worth trying to build the mapping out of larger
-pages).  You might want to specify this if:
-
-- You know that the accesses to this memory won't thrash the TLB.
-  You might know that the accesses are likely to be sequential or
-  that they aren't sequential but it's unlikely you'll ping-pong
-  between many addresses that are likely to be in different physical
-  pages.
-- You know that the penalty of TLB misses while accessing the
-  memory will be small enough to be inconsequential.  If you are
-  doing a heavy operation like decryption or decompression this
-  might be the case.
-- You know that the DMA mapping is fairly transitory.  If you expect
-  the mapping to have a short lifetime then it may be worth it to
-  optimize allocation (avoid coming up with large pages) instead of
-  getting the slight performance win of larger pages.
-
-Setting this hint doesn't guarantee that you won't get huge pages, but it
-means that we won't try quite as hard to get them.
-
-.. note:: At the moment DMA_ATTR_ALLOC_SINGLE_PAGES is only implemented on ARM,
-	  though ARM64 patches will likely be posted soon.
-
-DMA_ATTR_NO_WARN
-----------------
-
-This tells the DMA-mapping subsystem to suppress allocation failure reports
-(similarly to __GFP_NOWARN).
-
-On some architectures allocation failures are reported with error messages
-to the system logs.  Although this can help to identify and debug problems,
-drivers which handle failures (eg, retry later) have no problems with them,
-and can actually flood the system logs with error messages that aren't any
-problem at all, depending on the implementation of the retry mechanism.
-
-So, this provides a way for drivers to avoid those error messages on calls
-where allocation failures are not a problem, and shouldn't bother the logs.
-
-.. note:: At the moment DMA_ATTR_NO_WARN is only implemented on PowerPC.
-
-DMA_ATTR_PRIVILEGED
--------------------
-
-Some advanced peripherals such as remote processors and GPUs perform
-accesses to DMA buffers in both privileged "supervisor" and unprivileged
-"user" modes.  This attribute is used to indicate to the DMA-mapping
-subsystem that the buffer is fully accessible at the elevated privilege
-level (and ideally inaccessible or at least read-only at the
-lesser-privileged levels).
diff --git a/Documentation/core-api/dma-api-howto.rst b/Documentation/core-api/dma-api-howto.rst
new file mode 100644
index 000000000000..358d495456d1
--- /dev/null
+++ b/Documentation/core-api/dma-api-howto.rst
@@ -0,0 +1,929 @@
+=========================
+Dynamic DMA mapping Guide
+=========================
+
+:Author: David S. Miller <davem@redhat.com>
+:Author: Richard Henderson <rth@cygnus.com>
+:Author: Jakub Jelinek <jakub@redhat.com>
+
+This is a guide to device driver writers on how to use the DMA API
+with example pseudo-code.  For a concise description of the API, see
+DMA-API.txt.
+
+CPU and DMA addresses
+=====================
+
+There are several kinds of addresses involved in the DMA API, and it's
+important to understand the differences.
+
+The kernel normally uses virtual addresses.  Any address returned by
+kmalloc(), vmalloc(), and similar interfaces is a virtual address and can
+be stored in a ``void *``.
+
+The virtual memory system (TLB, page tables, etc.) translates virtual
+addresses to CPU physical addresses, which are stored as "phys_addr_t" or
+"resource_size_t".  The kernel manages device resources like registers as
+physical addresses.  These are the addresses in /proc/iomem.  The physical
+address is not directly useful to a driver; it must use ioremap() to map
+the space and produce a virtual address.
+
+I/O devices use a third kind of address: a "bus address".  If a device has
+registers at an MMIO address, or if it performs DMA to read or write system
+memory, the addresses used by the device are bus addresses.  In some
+systems, bus addresses are identical to CPU physical addresses, but in
+general they are not.  IOMMUs and host bridges can produce arbitrary
+mappings between physical and bus addresses.
+
+From a device's point of view, DMA uses the bus address space, but it may
+be restricted to a subset of that space.  For example, even if a system
+supports 64-bit addresses for main memory and PCI BARs, it may use an IOMMU
+so devices only need to use 32-bit DMA addresses.
+
+Here's a picture and some examples::
+
+               CPU                  CPU                  Bus
+             Virtual              Physical             Address
+             Address              Address               Space
+              Space                Space
+
+            +-------+             +------+             +------+
+            |       |             |MMIO  |   Offset    |      |
+            |       |  Virtual    |Space |   applied   |      |
+          C +-------+ --------> B +------+ ----------> +------+ A
+            |       |  mapping    |      |   by host   |      |
+  +-----+   |       |             |      |   bridge    |      |   +--------+
+  |     |   |       |             +------+             |      |   |        |
+  | CPU |   |       |             | RAM  |             |      |   | Device |
+  |     |   |       |             |      |             |      |   |        |
+  +-----+   +-------+             +------+             +------+   +--------+
+            |       |  Virtual    |Buffer|   Mapping   |      |
+          X +-------+ --------> Y +------+ <---------- +------+ Z
+            |       |  mapping    | RAM  |   by IOMMU
+            |       |             |      |
+            |       |             |      |
+            +-------+             +------+
+
+During the enumeration process, the kernel learns about I/O devices and
+their MMIO space and the host bridges that connect them to the system.  For
+example, if a PCI device has a BAR, the kernel reads the bus address (A)
+from the BAR and converts it to a CPU physical address (B).  The address B
+is stored in a struct resource and usually exposed via /proc/iomem.  When a
+driver claims a device, it typically uses ioremap() to map physical address
+B at a virtual address (C).  It can then use, e.g., ioread32(C), to access
+the device registers at bus address A.
+
+If the device supports DMA, the driver sets up a buffer using kmalloc() or
+a similar interface, which returns a virtual address (X).  The virtual
+memory system maps X to a physical address (Y) in system RAM.  The driver
+can use virtual address X to access the buffer, but the device itself
+cannot because DMA doesn't go through the CPU virtual memory system.
+
+In some simple systems, the device can do DMA directly to physical address
+Y.  But in many others, there is IOMMU hardware that translates DMA
+addresses to physical addresses, e.g., it translates Z to Y.  This is part
+of the reason for the DMA API: the driver can give a virtual address X to
+an interface like dma_map_single(), which sets up any required IOMMU
+mapping and returns the DMA address Z.  The driver then tells the device to
+do DMA to Z, and the IOMMU maps it to the buffer at address Y in system
+RAM.
+
+So that Linux can use the dynamic DMA mapping, it needs some help from the
+drivers, namely it has to take into account that DMA addresses should be
+mapped only for the time they are actually used and unmapped after the DMA
+transfer.
+
+The following API will work of course even on platforms where no such
+hardware exists.
+
+Note that the DMA API works with any bus independent of the underlying
+microprocessor architecture. You should use the DMA API rather than the
+bus-specific DMA API, i.e., use the dma_map_*() interfaces rather than the
+pci_map_*() interfaces.
+
+First of all, you should make sure::
+
+	#include <linux/dma-mapping.h>
+
+is in your driver, which provides the definition of dma_addr_t.  This type
+can hold any valid DMA address for the platform and should be used
+everywhere you hold a DMA address returned from the DMA mapping functions.
+
+What memory is DMA'able?
+========================
+
+The first piece of information you must know is what kernel memory can
+be used with the DMA mapping facilities.  There has been an unwritten
+set of rules regarding this, and this text is an attempt to finally
+write them down.
+
+If you acquired your memory via the page allocator
+(i.e. __get_free_page*()) or the generic memory allocators
+(i.e. kmalloc() or kmem_cache_alloc()) then you may DMA to/from
+that memory using the addresses returned from those routines.
+
+This means specifically that you may _not_ use the memory/addresses
+returned from vmalloc() for DMA.  It is possible to DMA to the
+_underlying_ memory mapped into a vmalloc() area, but this requires
+walking page tables to get the physical addresses, and then
+translating each of those pages back to a kernel address using
+something like __va().  [ EDIT: Update this when we integrate
+Gerd Knorr's generic code which does this. ]
+
+This rule also means that you may use neither kernel image addresses
+(items in data/text/bss segments), nor module image addresses, nor
+stack addresses for DMA.  These could all be mapped somewhere entirely
+different than the rest of physical memory.  Even if those classes of
+memory could physically work with DMA, you'd need to ensure the I/O
+buffers were cacheline-aligned.  Without that, you'd see cacheline
+sharing problems (data corruption) on CPUs with DMA-incoherent caches.
+(The CPU could write to one word, DMA would write to a different one
+in the same cache line, and one of them could be overwritten.)
+
+Also, this means that you cannot take the return of a kmap()
+call and DMA to/from that.  This is similar to vmalloc().
+
+What about block I/O and networking buffers?  The block I/O and
+networking subsystems make sure that the buffers they use are valid
+for you to DMA from/to.
+
+DMA addressing capabilities
+===========================
+
+By default, the kernel assumes that your device can address 32-bits of DMA
+addressing.  For a 64-bit capable device, this needs to be increased, and for
+a device with limitations, it needs to be decreased.
+
+Special note about PCI: PCI-X specification requires PCI-X devices to support
+64-bit addressing (DAC) for all transactions.  And at least one platform (SGI
+SN2) requires 64-bit consistent allocations to operate correctly when the IO
+bus is in PCI-X mode.
+
+For correct operation, you must set the DMA mask to inform the kernel about
+your devices DMA addressing capabilities.
+
+This is performed via a call to dma_set_mask_and_coherent()::
+
+	int dma_set_mask_and_coherent(struct device *dev, u64 mask);
+
+which will set the mask for both streaming and coherent APIs together.  If you
+have some special requirements, then the following two separate calls can be
+used instead:
+
+	The setup for streaming mappings is performed via a call to
+	dma_set_mask()::
+
+		int dma_set_mask(struct device *dev, u64 mask);
+
+	The setup for consistent allocations is performed via a call
+	to dma_set_coherent_mask()::
+
+		int dma_set_coherent_mask(struct device *dev, u64 mask);
+
+Here, dev is a pointer to the device struct of your device, and mask is a bit
+mask describing which bits of an address your device supports.  Often the
+device struct of your device is embedded in the bus-specific device struct of
+your device.  For example, &pdev->dev is a pointer to the device struct of a
+PCI device (pdev is a pointer to the PCI device struct of your device).
+
+These calls usually return zero to indicated your device can perform DMA
+properly on the machine given the address mask you provided, but they might
+return an error if the mask is too small to be supportable on the given
+system.  If it returns non-zero, your device cannot perform DMA properly on
+this platform, and attempting to do so will result in undefined behavior.
+You must not use DMA on this device unless the dma_set_mask family of
+functions has returned success.
+
+This means that in the failure case, you have two options:
+
+1) Use some non-DMA mode for data transfer, if possible.
+2) Ignore this device and do not initialize it.
+
+It is recommended that your driver print a kernel KERN_WARNING message when
+setting the DMA mask fails.  In this manner, if a user of your driver reports
+that performance is bad or that the device is not even detected, you can ask
+them for the kernel messages to find out exactly why.
+
+The standard 64-bit addressing device would do something like this::
+
+	if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) {
+		dev_warn(dev, "mydev: No suitable DMA available\n");
+		goto ignore_this_device;
+	}
+
+If the device only supports 32-bit addressing for descriptors in the
+coherent allocations, but supports full 64-bits for streaming mappings
+it would look like this::
+
+	if (dma_set_mask(dev, DMA_BIT_MASK(64))) {
+		dev_warn(dev, "mydev: No suitable DMA available\n");
+		goto ignore_this_device;
+	}
+
+The coherent mask will always be able to set the same or a smaller mask as
+the streaming mask. However for the rare case that a device driver only
+uses consistent allocations, one would have to check the return value from
+dma_set_coherent_mask().
+
+Finally, if your device can only drive the low 24-bits of
+address you might do something like::
+
+	if (dma_set_mask(dev, DMA_BIT_MASK(24))) {
+		dev_warn(dev, "mydev: 24-bit DMA addressing not available\n");
+		goto ignore_this_device;
+	}
+
+When dma_set_mask() or dma_set_mask_and_coherent() is successful, and
+returns zero, the kernel saves away this mask you have provided.  The
+kernel will use this information later when you make DMA mappings.
+
+There is a case which we are aware of at this time, which is worth
+mentioning in this documentation.  If your device supports multiple
+functions (for example a sound card provides playback and record
+functions) and the various different functions have _different_
+DMA addressing limitations, you may wish to probe each mask and
+only provide the functionality which the machine can handle.  It
+is important that the last call to dma_set_mask() be for the
+most specific mask.
+
+Here is pseudo-code showing how this might be done::
+
+	#define PLAYBACK_ADDRESS_BITS	DMA_BIT_MASK(32)
+	#define RECORD_ADDRESS_BITS	DMA_BIT_MASK(24)
+
+	struct my_sound_card *card;
+	struct device *dev;
+
+	...
+	if (!dma_set_mask(dev, PLAYBACK_ADDRESS_BITS)) {
+		card->playback_enabled = 1;
+	} else {
+		card->playback_enabled = 0;
+		dev_warn(dev, "%s: Playback disabled due to DMA limitations\n",
+		       card->name);
+	}
+	if (!dma_set_mask(dev, RECORD_ADDRESS_BITS)) {
+		card->record_enabled = 1;
+	} else {
+		card->record_enabled = 0;
+		dev_warn(dev, "%s: Record disabled due to DMA limitations\n",
+		       card->name);
+	}
+
+A sound card was used as an example here because this genre of PCI
+devices seems to be littered with ISA chips given a PCI front end,
+and thus retaining the 16MB DMA addressing limitations of ISA.
+
+Types of DMA mappings
+=====================
+
+There are two types of DMA mappings:
+
+- Consistent DMA mappings which are usually mapped at driver
+  initialization, unmapped at the end and for which the hardware should
+  guarantee that the device and the CPU can access the data
+  in parallel and will see updates made by each other without any
+  explicit software flushing.
+
+  Think of "consistent" as "synchronous" or "coherent".
+
+  The current default is to return consistent memory in the low 32
+  bits of the DMA space.  However, for future compatibility you should
+  set the consistent mask even if this default is fine for your
+  driver.
+
+  Good examples of what to use consistent mappings for are:
+
+	- Network card DMA ring descriptors.
+	- SCSI adapter mailbox command data structures.
+	- Device firmware microcode executed out of
+	  main memory.
+
+  The invariant these examples all require is that any CPU store
+  to memory is immediately visible to the device, and vice
+  versa.  Consistent mappings guarantee this.
+
+  .. important::
+
+	     Consistent DMA memory does not preclude the usage of
+	     proper memory barriers.  The CPU may reorder stores to
+	     consistent memory just as it may normal memory.  Example:
+	     if it is important for the device to see the first word
+	     of a descriptor updated before the second, you must do
+	     something like::
+
+		desc->word0 = address;
+		wmb();
+		desc->word1 = DESC_VALID;
+
+             in order to get correct behavior on all platforms.
+
+	     Also, on some platforms your driver may need to flush CPU write
+	     buffers in much the same way as it needs to flush write buffers
+	     found in PCI bridges (such as by reading a register's value
+	     after writing it).
+
+- Streaming DMA mappings which are usually mapped for one DMA
+  transfer, unmapped right after it (unless you use dma_sync_* below)
+  and for which hardware can optimize for sequential accesses.
+
+  Think of "streaming" as "asynchronous" or "outside the coherency
+  domain".
+
+  Good examples of what to use streaming mappings for are:
+
+	- Networking buffers transmitted/received by a device.
+	- Filesystem buffers written/read by a SCSI device.
+
+  The interfaces for using this type of mapping were designed in
+  such a way that an implementation can make whatever performance
+  optimizations the hardware allows.  To this end, when using
+  such mappings you must be explicit about what you want to happen.
+
+Neither type of DMA mapping has alignment restrictions that come from
+the underlying bus, although some devices may have such restrictions.
+Also, systems with caches that aren't DMA-coherent will work better
+when the underlying buffers don't share cache lines with other data.
+
+
+Using Consistent DMA mappings
+=============================
+
+To allocate and map large (PAGE_SIZE or so) consistent DMA regions,
+you should do::
+
+	dma_addr_t dma_handle;
+
+	cpu_addr = dma_alloc_coherent(dev, size, &dma_handle, gfp);
+
+where device is a ``struct device *``. This may be called in interrupt
+context with the GFP_ATOMIC flag.
+
+Size is the length of the region you want to allocate, in bytes.
+
+This routine will allocate RAM for that region, so it acts similarly to
+__get_free_pages() (but takes size instead of a page order).  If your
+driver needs regions sized smaller than a page, you may prefer using
+the dma_pool interface, described below.
+
+The consistent DMA mapping interfaces, will by default return a DMA address
+which is 32-bit addressable.  Even if the device indicates (via the DMA mask)
+that it may address the upper 32-bits, consistent allocation will only
+return > 32-bit addresses for DMA if the consistent DMA mask has been
+explicitly changed via dma_set_coherent_mask().  This is true of the
+dma_pool interface as well.
+
+dma_alloc_coherent() returns two values: the virtual address which you
+can use to access it from the CPU and dma_handle which you pass to the
+card.
+
+The CPU virtual address and the DMA address are both
+guaranteed to be aligned to the smallest PAGE_SIZE order which
+is greater than or equal to the requested size.  This invariant
+exists (for example) to guarantee that if you allocate a chunk
+which is smaller than or equal to 64 kilobytes, the extent of the
+buffer you receive will not cross a 64K boundary.
+
+To unmap and free such a DMA region, you call::
+
+	dma_free_coherent(dev, size, cpu_addr, dma_handle);
+
+where dev, size are the same as in the above call and cpu_addr and
+dma_handle are the values dma_alloc_coherent() returned to you.
+This function may not be called in interrupt context.
+
+If your driver needs lots of smaller memory regions, you can write
+custom code to subdivide pages returned by dma_alloc_coherent(),
+or you can use the dma_pool API to do that.  A dma_pool is like
+a kmem_cache, but it uses dma_alloc_coherent(), not __get_free_pages().
+Also, it understands common hardware constraints for alignment,
+like queue heads needing to be aligned on N byte boundaries.
+
+Create a dma_pool like this::
+
+	struct dma_pool *pool;
+
+	pool = dma_pool_create(name, dev, size, align, boundary);
+
+The "name" is for diagnostics (like a kmem_cache name); dev and size
+are as above.  The device's hardware alignment requirement for this
+type of data is "align" (which is expressed in bytes, and must be a
+power of two).  If your device has no boundary crossing restrictions,
+pass 0 for boundary; passing 4096 says memory allocated from this pool
+must not cross 4KByte boundaries (but at that time it may be better to
+use dma_alloc_coherent() directly instead).
+
+Allocate memory from a DMA pool like this::
+
+	cpu_addr = dma_pool_alloc(pool, flags, &dma_handle);
+
+flags are GFP_KERNEL if blocking is permitted (not in_interrupt nor
+holding SMP locks), GFP_ATOMIC otherwise.  Like dma_alloc_coherent(),
+this returns two values, cpu_addr and dma_handle.
+
+Free memory that was allocated from a dma_pool like this::
+
+	dma_pool_free(pool, cpu_addr, dma_handle);
+
+where pool is what you passed to dma_pool_alloc(), and cpu_addr and
+dma_handle are the values dma_pool_alloc() returned. This function
+may be called in interrupt context.
+
+Destroy a dma_pool by calling::
+
+	dma_pool_destroy(pool);
+
+Make sure you've called dma_pool_free() for all memory allocated
+from a pool before you destroy the pool. This function may not
+be called in interrupt context.
+
+DMA Direction
+=============
+
+The interfaces described in subsequent portions of this document
+take a DMA direction argument, which is an integer and takes on
+one of the following values::
+
+ DMA_BIDIRECTIONAL
+ DMA_TO_DEVICE
+ DMA_FROM_DEVICE
+ DMA_NONE
+
+You should provide the exact DMA direction if you know it.
+
+DMA_TO_DEVICE means "from main memory to the device"
+DMA_FROM_DEVICE means "from the device to main memory"
+It is the direction in which the data moves during the DMA
+transfer.
+
+You are _strongly_ encouraged to specify this as precisely
+as you possibly can.
+
+If you absolutely cannot know the direction of the DMA transfer,
+specify DMA_BIDIRECTIONAL.  It means that the DMA can go in
+either direction.  The platform guarantees that you may legally
+specify this, and that it will work, but this may be at the
+cost of performance for example.
+
+The value DMA_NONE is to be used for debugging.  One can
+hold this in a data structure before you come to know the
+precise direction, and this will help catch cases where your
+direction tracking logic has failed to set things up properly.
+
+Another advantage of specifying this value precisely (outside of
+potential platform-specific optimizations of such) is for debugging.
+Some platforms actually have a write permission boolean which DMA
+mappings can be marked with, much like page protections in the user
+program address space.  Such platforms can and do report errors in the
+kernel logs when the DMA controller hardware detects violation of the
+permission setting.
+
+Only streaming mappings specify a direction, consistent mappings
+implicitly have a direction attribute setting of
+DMA_BIDIRECTIONAL.
+
+The SCSI subsystem tells you the direction to use in the
+'sc_data_direction' member of the SCSI command your driver is
+working on.
+
+For Networking drivers, it's a rather simple affair.  For transmit
+packets, map/unmap them with the DMA_TO_DEVICE direction
+specifier.  For receive packets, just the opposite, map/unmap them
+with the DMA_FROM_DEVICE direction specifier.
+
+Using Streaming DMA mappings
+============================
+
+The streaming DMA mapping routines can be called from interrupt
+context.  There are two versions of each map/unmap, one which will
+map/unmap a single memory region, and one which will map/unmap a
+scatterlist.
+
+To map a single region, you do::
+
+	struct device *dev = &my_dev->dev;
+	dma_addr_t dma_handle;
+	void *addr = buffer->ptr;
+	size_t size = buffer->len;
+
+	dma_handle = dma_map_single(dev, addr, size, direction);
+	if (dma_mapping_error(dev, dma_handle)) {
+		/*
+		 * reduce current DMA mapping usage,
+		 * delay and try again later or
+		 * reset driver.
+		 */
+		goto map_error_handling;
+	}
+
+and to unmap it::
+
+	dma_unmap_single(dev, dma_handle, size, direction);
+
+You should call dma_mapping_error() as dma_map_single() could fail and return
+error.  Doing so will ensure that the mapping code will work correctly on all
+DMA implementations without any dependency on the specifics of the underlying
+implementation. Using the returned address without checking for errors could
+result in failures ranging from panics to silent data corruption.  The same
+applies to dma_map_page() as well.
+
+You should call dma_unmap_single() when the DMA activity is finished, e.g.,
+from the interrupt which told you that the DMA transfer is done.
+
+Using CPU pointers like this for single mappings has a disadvantage:
+you cannot reference HIGHMEM memory in this way.  Thus, there is a
+map/unmap interface pair akin to dma_{map,unmap}_single().  These
+interfaces deal with page/offset pairs instead of CPU pointers.
+Specifically::
+
+	struct device *dev = &my_dev->dev;
+	dma_addr_t dma_handle;
+	struct page *page = buffer->page;
+	unsigned long offset = buffer->offset;
+	size_t size = buffer->len;
+
+	dma_handle = dma_map_page(dev, page, offset, size, direction);
+	if (dma_mapping_error(dev, dma_handle)) {
+		/*
+		 * reduce current DMA mapping usage,
+		 * delay and try again later or
+		 * reset driver.
+		 */
+		goto map_error_handling;
+	}
+
+	...
+
+	dma_unmap_page(dev, dma_handle, size, direction);
+
+Here, "offset" means byte offset within the given page.
+
+You should call dma_mapping_error() as dma_map_page() could fail and return
+error as outlined under the dma_map_single() discussion.
+
+You should call dma_unmap_page() when the DMA activity is finished, e.g.,
+from the interrupt which told you that the DMA transfer is done.
+
+With scatterlists, you map a region gathered from several regions by::
+
+	int i, count = dma_map_sg(dev, sglist, nents, direction);
+	struct scatterlist *sg;
+
+	for_each_sg(sglist, sg, count, i) {
+		hw_address[i] = sg_dma_address(sg);
+		hw_len[i] = sg_dma_len(sg);
+	}
+
+where nents is the number of entries in the sglist.
+
+The implementation is free to merge several consecutive sglist entries
+into one (e.g. if DMA mapping is done with PAGE_SIZE granularity, any
+consecutive sglist entries can be merged into one provided the first one
+ends and the second one starts on a page boundary - in fact this is a huge
+advantage for cards which either cannot do scatter-gather or have very
+limited number of scatter-gather entries) and returns the actual number
+of sg entries it mapped them to. On failure 0 is returned.
+
+Then you should loop count times (note: this can be less than nents times)
+and use sg_dma_address() and sg_dma_len() macros where you previously
+accessed sg->address and sg->length as shown above.
+
+To unmap a scatterlist, just call::
+
+	dma_unmap_sg(dev, sglist, nents, direction);
+
+Again, make sure DMA activity has already finished.
+
+.. note::
+
+	The 'nents' argument to the dma_unmap_sg call must be
+	the _same_ one you passed into the dma_map_sg call,
+	it should _NOT_ be the 'count' value _returned_ from the
+	dma_map_sg call.
+
+Every dma_map_{single,sg}() call should have its dma_unmap_{single,sg}()
+counterpart, because the DMA address space is a shared resource and
+you could render the machine unusable by consuming all DMA addresses.
+
+If you need to use the same streaming DMA region multiple times and touch
+the data in between the DMA transfers, the buffer needs to be synced
+properly in order for the CPU and device to see the most up-to-date and
+correct copy of the DMA buffer.
+
+So, firstly, just map it with dma_map_{single,sg}(), and after each DMA
+transfer call either::
+
+	dma_sync_single_for_cpu(dev, dma_handle, size, direction);
+
+or::
+
+	dma_sync_sg_for_cpu(dev, sglist, nents, direction);
+
+as appropriate.
+
+Then, if you wish to let the device get at the DMA area again,
+finish accessing the data with the CPU, and then before actually
+giving the buffer to the hardware call either::
+
+	dma_sync_single_for_device(dev, dma_handle, size, direction);
+
+or::
+
+	dma_sync_sg_for_device(dev, sglist, nents, direction);
+
+as appropriate.
+
+.. note::
+
+	      The 'nents' argument to dma_sync_sg_for_cpu() and
+	      dma_sync_sg_for_device() must be the same passed to
+	      dma_map_sg(). It is _NOT_ the count returned by
+	      dma_map_sg().
+
+After the last DMA transfer call one of the DMA unmap routines
+dma_unmap_{single,sg}(). If you don't touch the data from the first
+dma_map_*() call till dma_unmap_*(), then you don't have to call the
+dma_sync_*() routines at all.
+
+Here is pseudo code which shows a situation in which you would need
+to use the dma_sync_*() interfaces::
+
+	my_card_setup_receive_buffer(struct my_card *cp, char *buffer, int len)
+	{
+		dma_addr_t mapping;
+
+		mapping = dma_map_single(cp->dev, buffer, len, DMA_FROM_DEVICE);
+		if (dma_mapping_error(cp->dev, mapping)) {
+			/*
+			 * reduce current DMA mapping usage,
+			 * delay and try again later or
+			 * reset driver.
+			 */
+			goto map_error_handling;
+		}
+
+		cp->rx_buf = buffer;
+		cp->rx_len = len;
+		cp->rx_dma = mapping;
+
+		give_rx_buf_to_card(cp);
+	}
+
+	...
+
+	my_card_interrupt_handler(int irq, void *devid, struct pt_regs *regs)
+	{
+		struct my_card *cp = devid;
+
+		...
+		if (read_card_status(cp) == RX_BUF_TRANSFERRED) {
+			struct my_card_header *hp;
+
+			/* Examine the header to see if we wish
+			 * to accept the data.  But synchronize
+			 * the DMA transfer with the CPU first
+			 * so that we see updated contents.
+			 */
+			dma_sync_single_for_cpu(&cp->dev, cp->rx_dma,
+						cp->rx_len,
+						DMA_FROM_DEVICE);
+
+			/* Now it is safe to examine the buffer. */
+			hp = (struct my_card_header *) cp->rx_buf;
+			if (header_is_ok(hp)) {
+				dma_unmap_single(&cp->dev, cp->rx_dma, cp->rx_len,
+						 DMA_FROM_DEVICE);
+				pass_to_upper_layers(cp->rx_buf);
+				make_and_setup_new_rx_buf(cp);
+			} else {
+				/* CPU should not write to
+				 * DMA_FROM_DEVICE-mapped area,
+				 * so dma_sync_single_for_device() is
+				 * not needed here. It would be required
+				 * for DMA_BIDIRECTIONAL mapping if
+				 * the memory was modified.
+				 */
+				give_rx_buf_to_card(cp);
+			}
+		}
+	}
+
+Drivers converted fully to this interface should not use virt_to_bus() any
+longer, nor should they use bus_to_virt(). Some drivers have to be changed a
+little bit, because there is no longer an equivalent to bus_to_virt() in the
+dynamic DMA mapping scheme - you have to always store the DMA addresses
+returned by the dma_alloc_coherent(), dma_pool_alloc(), and dma_map_single()
+calls (dma_map_sg() stores them in the scatterlist itself if the platform
+supports dynamic DMA mapping in hardware) in your driver structures and/or
+in the card registers.
+
+All drivers should be using these interfaces with no exceptions.  It
+is planned to completely remove virt_to_bus() and bus_to_virt() as
+they are entirely deprecated.  Some ports already do not provide these
+as it is impossible to correctly support them.
+
+Handling Errors
+===============
+
+DMA address space is limited on some architectures and an allocation
+failure can be determined by:
+
+- checking if dma_alloc_coherent() returns NULL or dma_map_sg returns 0
+
+- checking the dma_addr_t returned from dma_map_single() and dma_map_page()
+  by using dma_mapping_error()::
+
+	dma_addr_t dma_handle;
+
+	dma_handle = dma_map_single(dev, addr, size, direction);
+	if (dma_mapping_error(dev, dma_handle)) {
+		/*
+		 * reduce current DMA mapping usage,
+		 * delay and try again later or
+		 * reset driver.
+		 */
+		goto map_error_handling;
+	}
+
+- unmap pages that are already mapped, when mapping error occurs in the middle
+  of a multiple page mapping attempt. These example are applicable to
+  dma_map_page() as well.
+
+Example 1::
+
+	dma_addr_t dma_handle1;
+	dma_addr_t dma_handle2;
+
+	dma_handle1 = dma_map_single(dev, addr, size, direction);
+	if (dma_mapping_error(dev, dma_handle1)) {
+		/*
+		 * reduce current DMA mapping usage,
+		 * delay and try again later or
+		 * reset driver.
+		 */
+		goto map_error_handling1;
+	}
+	dma_handle2 = dma_map_single(dev, addr, size, direction);
+	if (dma_mapping_error(dev, dma_handle2)) {
+		/*
+		 * reduce current DMA mapping usage,
+		 * delay and try again later or
+		 * reset driver.
+		 */
+		goto map_error_handling2;
+	}
+
+	...
+
+	map_error_handling2:
+		dma_unmap_single(dma_handle1);
+	map_error_handling1:
+
+Example 2::
+
+	/*
+	 * if buffers are allocated in a loop, unmap all mapped buffers when
+	 * mapping error is detected in the middle
+	 */
+
+	dma_addr_t dma_addr;
+	dma_addr_t array[DMA_BUFFERS];
+	int save_index = 0;
+
+	for (i = 0; i < DMA_BUFFERS; i++) {
+
+		...
+
+		dma_addr = dma_map_single(dev, addr, size, direction);
+		if (dma_mapping_error(dev, dma_addr)) {
+			/*
+			 * reduce current DMA mapping usage,
+			 * delay and try again later or
+			 * reset driver.
+			 */
+			goto map_error_handling;
+		}
+		array[i].dma_addr = dma_addr;
+		save_index++;
+	}
+
+	...
+
+	map_error_handling:
+
+	for (i = 0; i < save_index; i++) {
+
+		...
+
+		dma_unmap_single(array[i].dma_addr);
+	}
+
+Networking drivers must call dev_kfree_skb() to free the socket buffer
+and return NETDEV_TX_OK if the DMA mapping fails on the transmit hook
+(ndo_start_xmit). This means that the socket buffer is just dropped in
+the failure case.
+
+SCSI drivers must return SCSI_MLQUEUE_HOST_BUSY if the DMA mapping
+fails in the queuecommand hook. This means that the SCSI subsystem
+passes the command to the driver again later.
+
+Optimizing Unmap State Space Consumption
+========================================
+
+On many platforms, dma_unmap_{single,page}() is simply a nop.
+Therefore, keeping track of the mapping address and length is a waste
+of space.  Instead of filling your drivers up with ifdefs and the like
+to "work around" this (which would defeat the whole purpose of a
+portable API) the following facilities are provided.
+
+Actually, instead of describing the macros one by one, we'll
+transform some example code.
+
+1) Use DEFINE_DMA_UNMAP_{ADDR,LEN} in state saving structures.
+   Example, before::
+
+	struct ring_state {
+		struct sk_buff *skb;
+		dma_addr_t mapping;
+		__u32 len;
+	};
+
+   after::
+
+	struct ring_state {
+		struct sk_buff *skb;
+		DEFINE_DMA_UNMAP_ADDR(mapping);
+		DEFINE_DMA_UNMAP_LEN(len);
+	};
+
+2) Use dma_unmap_{addr,len}_set() to set these values.
+   Example, before::
+
+	ringp->mapping = FOO;
+	ringp->len = BAR;
+
+   after::
+
+	dma_unmap_addr_set(ringp, mapping, FOO);
+	dma_unmap_len_set(ringp, len, BAR);
+
+3) Use dma_unmap_{addr,len}() to access these values.
+   Example, before::
+
+	dma_unmap_single(dev, ringp->mapping, ringp->len,
+			 DMA_FROM_DEVICE);
+
+   after::
+
+	dma_unmap_single(dev,
+			 dma_unmap_addr(ringp, mapping),
+			 dma_unmap_len(ringp, len),
+			 DMA_FROM_DEVICE);
+
+It really should be self-explanatory.  We treat the ADDR and LEN
+separately, because it is possible for an implementation to only
+need the address in order to perform the unmap operation.
+
+Platform Issues
+===============
+
+If you are just writing drivers for Linux and do not maintain
+an architecture port for the kernel, you can safely skip down
+to "Closing".
+
+1) Struct scatterlist requirements.
+
+   You need to enable CONFIG_NEED_SG_DMA_LENGTH if the architecture
+   supports IOMMUs (including software IOMMU).
+
+2) ARCH_DMA_MINALIGN
+
+   Architectures must ensure that kmalloc'ed buffer is
+   DMA-safe. Drivers and subsystems depend on it. If an architecture
+   isn't fully DMA-coherent (i.e. hardware doesn't ensure that data in
+   the CPU cache is identical to data in main memory),
+   ARCH_DMA_MINALIGN must be set so that the memory allocator
+   makes sure that kmalloc'ed buffer doesn't share a cache line with
+   the others. See arch/arm/include/asm/cache.h as an example.
+
+   Note that ARCH_DMA_MINALIGN is about DMA memory alignment
+   constraints. You don't need to worry about the architecture data
+   alignment constraints (e.g. the alignment constraints about 64-bit
+   objects).
+
+Closing
+=======
+
+This document, and the API itself, would not be in its current
+form without the feedback and suggestions from numerous individuals.
+We would like to specifically mention, in no particular order, the
+following people::
+
+	Russell King <rmk@arm.linux.org.uk>
+	Leo Dagum <dagum@barrel.engr.sgi.com>
+	Ralf Baechle <ralf@oss.sgi.com>
+	Grant Grundler <grundler@cup.hp.com>
+	Jay Estabrook <Jay.Estabrook@compaq.com>
+	Thomas Sailer <sailer@ife.ee.ethz.ch>
+	Andrea Arcangeli <andrea@suse.de>
+	Jens Axboe <jens.axboe@oracle.com>
+	David Mosberger-Tang <davidm@hpl.hp.com>
diff --git a/Documentation/core-api/dma-api.rst b/Documentation/core-api/dma-api.rst
new file mode 100644
index 000000000000..2d8d2fed7317
--- /dev/null
+++ b/Documentation/core-api/dma-api.rst
@@ -0,0 +1,745 @@
+============================================
+Dynamic DMA mapping using the generic device
+============================================
+
+:Author: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
+
+This document describes the DMA API.  For a more gentle introduction
+of the API (and actual examples), see Documentation/DMA-API-HOWTO.txt.
+
+This API is split into two pieces.  Part I describes the basic API.
+Part II describes extensions for supporting non-consistent memory
+machines.  Unless you know that your driver absolutely has to support
+non-consistent platforms (this is usually only legacy platforms) you
+should only use the API described in part I.
+
+Part I - dma_API
+----------------
+
+To get the dma_API, you must #include <linux/dma-mapping.h>.  This
+provides dma_addr_t and the interfaces described below.
+
+A dma_addr_t can hold any valid DMA address for the platform.  It can be
+given to a device to use as a DMA source or target.  A CPU cannot reference
+a dma_addr_t directly because there may be translation between its physical
+address space and the DMA address space.
+
+Part Ia - Using large DMA-coherent buffers
+------------------------------------------
+
+::
+
+	void *
+	dma_alloc_coherent(struct device *dev, size_t size,
+			   dma_addr_t *dma_handle, gfp_t flag)
+
+Consistent memory is memory for which a write by either the device or
+the processor can immediately be read by the processor or device
+without having to worry about caching effects.  (You may however need
+to make sure to flush the processor's write buffers before telling
+devices to read that memory.)
+
+This routine allocates a region of <size> bytes of consistent memory.
+
+It returns a pointer to the allocated region (in the processor's virtual
+address space) or NULL if the allocation failed.
+
+It also returns a <dma_handle> which may be cast to an unsigned integer the
+same width as the bus and given to the device as the DMA address base of
+the region.
+
+Note: consistent memory can be expensive on some platforms, and the
+minimum allocation length may be as big as a page, so you should
+consolidate your requests for consistent memory as much as possible.
+The simplest way to do that is to use the dma_pool calls (see below).
+
+The flag parameter (dma_alloc_coherent() only) allows the caller to
+specify the ``GFP_`` flags (see kmalloc()) for the allocation (the
+implementation may choose to ignore flags that affect the location of
+the returned memory, like GFP_DMA).
+
+::
+
+	void
+	dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
+			  dma_addr_t dma_handle)
+
+Free a region of consistent memory you previously allocated.  dev,
+size and dma_handle must all be the same as those passed into
+dma_alloc_coherent().  cpu_addr must be the virtual address returned by
+the dma_alloc_coherent().
+
+Note that unlike their sibling allocation calls, these routines
+may only be called with IRQs enabled.
+
+
+Part Ib - Using small DMA-coherent buffers
+------------------------------------------
+
+To get this part of the dma_API, you must #include <linux/dmapool.h>
+
+Many drivers need lots of small DMA-coherent memory regions for DMA
+descriptors or I/O buffers.  Rather than allocating in units of a page
+or more using dma_alloc_coherent(), you can use DMA pools.  These work
+much like a struct kmem_cache, except that they use the DMA-coherent allocator,
+not __get_free_pages().  Also, they understand common hardware constraints
+for alignment, like queue heads needing to be aligned on N-byte boundaries.
+
+
+::
+
+	struct dma_pool *
+	dma_pool_create(const char *name, struct device *dev,
+			size_t size, size_t align, size_t alloc);
+
+dma_pool_create() initializes a pool of DMA-coherent buffers
+for use with a given device.  It must be called in a context which
+can sleep.
+
+The "name" is for diagnostics (like a struct kmem_cache name); dev and size
+are like what you'd pass to dma_alloc_coherent().  The device's hardware
+alignment requirement for this type of data is "align" (which is expressed
+in bytes, and must be a power of two).  If your device has no boundary
+crossing restrictions, pass 0 for alloc; passing 4096 says memory allocated
+from this pool must not cross 4KByte boundaries.
+
+::
+
+	void *
+	dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags,
+		        dma_addr_t *handle)
+
+Wraps dma_pool_alloc() and also zeroes the returned memory if the
+allocation attempt succeeded.
+
+
+::
+
+	void *
+	dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags,
+		       dma_addr_t *dma_handle);
+
+This allocates memory from the pool; the returned memory will meet the
+size and alignment requirements specified at creation time.  Pass
+GFP_ATOMIC to prevent blocking, or if it's permitted (not
+in_interrupt, not holding SMP locks), pass GFP_KERNEL to allow
+blocking.  Like dma_alloc_coherent(), this returns two values:  an
+address usable by the CPU, and the DMA address usable by the pool's
+device.
+
+::
+
+	void
+	dma_pool_free(struct dma_pool *pool, void *vaddr,
+		      dma_addr_t addr);
+
+This puts memory back into the pool.  The pool is what was passed to
+dma_pool_alloc(); the CPU (vaddr) and DMA addresses are what
+were returned when that routine allocated the memory being freed.
+
+::
+
+	void
+	dma_pool_destroy(struct dma_pool *pool);
+
+dma_pool_destroy() frees the resources of the pool.  It must be
+called in a context which can sleep.  Make sure you've freed all allocated
+memory back to the pool before you destroy it.
+
+
+Part Ic - DMA addressing limitations
+------------------------------------
+
+::
+
+	int
+	dma_set_mask_and_coherent(struct device *dev, u64 mask)
+
+Checks to see if the mask is possible and updates the device
+streaming and coherent DMA mask parameters if it is.
+
+Returns: 0 if successful and a negative error if not.
+
+::
+
+	int
+	dma_set_mask(struct device *dev, u64 mask)
+
+Checks to see if the mask is possible and updates the device
+parameters if it is.
+
+Returns: 0 if successful and a negative error if not.
+
+::
+
+	int
+	dma_set_coherent_mask(struct device *dev, u64 mask)
+
+Checks to see if the mask is possible and updates the device
+parameters if it is.
+
+Returns: 0 if successful and a negative error if not.
+
+::
+
+	u64
+	dma_get_required_mask(struct device *dev)
+
+This API returns the mask that the platform requires to
+operate efficiently.  Usually this means the returned mask
+is the minimum required to cover all of memory.  Examining the
+required mask gives drivers with variable descriptor sizes the
+opportunity to use smaller descriptors as necessary.
+
+Requesting the required mask does not alter the current mask.  If you
+wish to take advantage of it, you should issue a dma_set_mask()
+call to set the mask to the value returned.
+
+::
+
+	size_t
+	dma_max_mapping_size(struct device *dev);
+
+Returns the maximum size of a mapping for the device. The size parameter
+of the mapping functions like dma_map_single(), dma_map_page() and
+others should not be larger than the returned value.
+
+::
+
+	unsigned long
+	dma_get_merge_boundary(struct device *dev);
+
+Returns the DMA merge boundary. If the device cannot merge any the DMA address
+segments, the function returns 0.
+
+Part Id - Streaming DMA mappings
+--------------------------------
+
+::
+
+	dma_addr_t
+	dma_map_single(struct device *dev, void *cpu_addr, size_t size,
+		       enum dma_data_direction direction)
+
+Maps a piece of processor virtual memory so it can be accessed by the
+device and returns the DMA address of the memory.
+
+The direction for both APIs may be converted freely by casting.
+However the dma_API uses a strongly typed enumerator for its
+direction:
+
+======================= =============================================
+DMA_NONE		no direction (used for debugging)
+DMA_TO_DEVICE		data is going from the memory to the device
+DMA_FROM_DEVICE		data is coming from the device to the memory
+DMA_BIDIRECTIONAL	direction isn't known
+======================= =============================================
+
+.. note::
+
+	Not all memory regions in a machine can be mapped by this API.
+	Further, contiguous kernel virtual space may not be contiguous as
+	physical memory.  Since this API does not provide any scatter/gather
+	capability, it will fail if the user tries to map a non-physically
+	contiguous piece of memory.  For this reason, memory to be mapped by
+	this API should be obtained from sources which guarantee it to be
+	physically contiguous (like kmalloc).
+
+	Further, the DMA address of the memory must be within the
+	dma_mask of the device (the dma_mask is a bit mask of the
+	addressable region for the device, i.e., if the DMA address of
+	the memory ANDed with the dma_mask is still equal to the DMA
+	address, then the device can perform DMA to the memory).  To
+	ensure that the memory allocated by kmalloc is within the dma_mask,
+	the driver may specify various platform-dependent flags to restrict
+	the DMA address range of the allocation (e.g., on x86, GFP_DMA
+	guarantees to be within the first 16MB of available DMA addresses,
+	as required by ISA devices).
+
+	Note also that the above constraints on physical contiguity and
+	dma_mask may not apply if the platform has an IOMMU (a device which
+	maps an I/O DMA address to a physical memory address).  However, to be
+	portable, device driver writers may *not* assume that such an IOMMU
+	exists.
+
+.. warning::
+
+	Memory coherency operates at a granularity called the cache
+	line width.  In order for memory mapped by this API to operate
+	correctly, the mapped region must begin exactly on a cache line
+	boundary and end exactly on one (to prevent two separately mapped
+	regions from sharing a single cache line).  Since the cache line size
+	may not be known at compile time, the API will not enforce this
+	requirement.  Therefore, it is recommended that driver writers who
+	don't take special care to determine the cache line size at run time
+	only map virtual regions that begin and end on page boundaries (which
+	are guaranteed also to be cache line boundaries).
+
+	DMA_TO_DEVICE synchronisation must be done after the last modification
+	of the memory region by the software and before it is handed off to
+	the device.  Once this primitive is used, memory covered by this
+	primitive should be treated as read-only by the device.  If the device
+	may write to it at any point, it should be DMA_BIDIRECTIONAL (see
+	below).
+
+	DMA_FROM_DEVICE synchronisation must be done before the driver
+	accesses data that may be changed by the device.  This memory should
+	be treated as read-only by the driver.  If the driver needs to write
+	to it at any point, it should be DMA_BIDIRECTIONAL (see below).
+
+	DMA_BIDIRECTIONAL requires special handling: it means that the driver
+	isn't sure if the memory was modified before being handed off to the
+	device and also isn't sure if the device will also modify it.  Thus,
+	you must always sync bidirectional memory twice: once before the
+	memory is handed off to the device (to make sure all memory changes
+	are flushed from the processor) and once before the data may be
+	accessed after being used by the device (to make sure any processor
+	cache lines are updated with data that the device may have changed).
+
+::
+
+	void
+	dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+			 enum dma_data_direction direction)
+
+Unmaps the region previously mapped.  All the parameters passed in
+must be identical to those passed in (and returned) by the mapping
+API.
+
+::
+
+	dma_addr_t
+	dma_map_page(struct device *dev, struct page *page,
+		     unsigned long offset, size_t size,
+		     enum dma_data_direction direction)
+
+	void
+	dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+		       enum dma_data_direction direction)
+
+API for mapping and unmapping for pages.  All the notes and warnings
+for the other mapping APIs apply here.  Also, although the <offset>
+and <size> parameters are provided to do partial page mapping, it is
+recommended that you never use these unless you really know what the
+cache width is.
+
+::
+
+	dma_addr_t
+	dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
+			 enum dma_data_direction dir, unsigned long attrs)
+
+	void
+	dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
+			   enum dma_data_direction dir, unsigned long attrs)
+
+API for mapping and unmapping for MMIO resources. All the notes and
+warnings for the other mapping APIs apply here. The API should only be
+used to map device MMIO resources, mapping of RAM is not permitted.
+
+::
+
+	int
+	dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+
+In some circumstances dma_map_single(), dma_map_page() and dma_map_resource()
+will fail to create a mapping. A driver can check for these errors by testing
+the returned DMA address with dma_mapping_error(). A non-zero return value
+means the mapping could not be created and the driver should take appropriate
+action (e.g. reduce current DMA mapping usage or delay and try again later).
+
+::
+
+	int
+	dma_map_sg(struct device *dev, struct scatterlist *sg,
+		   int nents, enum dma_data_direction direction)
+
+Returns: the number of DMA address segments mapped (this may be shorter
+than <nents> passed in if some elements of the scatter/gather list are
+physically or virtually adjacent and an IOMMU maps them with a single
+entry).
+
+Please note that the sg cannot be mapped again if it has been mapped once.
+The mapping process is allowed to destroy information in the sg.
+
+As with the other mapping interfaces, dma_map_sg() can fail. When it
+does, 0 is returned and a driver must take appropriate action. It is
+critical that the driver do something, in the case of a block driver
+aborting the request or even oopsing is better than doing nothing and
+corrupting the filesystem.
+
+With scatterlists, you use the resulting mapping like this::
+
+	int i, count = dma_map_sg(dev, sglist, nents, direction);
+	struct scatterlist *sg;
+
+	for_each_sg(sglist, sg, count, i) {
+		hw_address[i] = sg_dma_address(sg);
+		hw_len[i] = sg_dma_len(sg);
+	}
+
+where nents is the number of entries in the sglist.
+
+The implementation is free to merge several consecutive sglist entries
+into one (e.g. with an IOMMU, or if several pages just happen to be
+physically contiguous) and returns the actual number of sg entries it
+mapped them to. On failure 0, is returned.
+
+Then you should loop count times (note: this can be less than nents times)
+and use sg_dma_address() and sg_dma_len() macros where you previously
+accessed sg->address and sg->length as shown above.
+
+::
+
+	void
+	dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+		     int nents, enum dma_data_direction direction)
+
+Unmap the previously mapped scatter/gather list.  All the parameters
+must be the same as those and passed in to the scatter/gather mapping
+API.
+
+Note: <nents> must be the number you passed in, *not* the number of
+DMA address entries returned.
+
+::
+
+	void
+	dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+				size_t size,
+				enum dma_data_direction direction)
+
+	void
+	dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+				   size_t size,
+				   enum dma_data_direction direction)
+
+	void
+	dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+			    int nents,
+			    enum dma_data_direction direction)
+
+	void
+	dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+			       int nents,
+			       enum dma_data_direction direction)
+
+Synchronise a single contiguous or scatter/gather mapping for the CPU
+and device. With the sync_sg API, all the parameters must be the same
+as those passed into the single mapping API. With the sync_single API,
+you can use dma_handle and size parameters that aren't identical to
+those passed into the single mapping API to do a partial sync.
+
+
+.. note::
+
+   You must do this:
+
+   - Before reading values that have been written by DMA from the device
+     (use the DMA_FROM_DEVICE direction)
+   - After writing values that will be written to the device using DMA
+     (use the DMA_TO_DEVICE) direction
+   - before *and* after handing memory to the device if the memory is
+     DMA_BIDIRECTIONAL
+
+See also dma_map_single().
+
+::
+
+	dma_addr_t
+	dma_map_single_attrs(struct device *dev, void *cpu_addr, size_t size,
+			     enum dma_data_direction dir,
+			     unsigned long attrs)
+
+	void
+	dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr,
+			       size_t size, enum dma_data_direction dir,
+			       unsigned long attrs)
+
+	int
+	dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
+			 int nents, enum dma_data_direction dir,
+			 unsigned long attrs)
+
+	void
+	dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl,
+			   int nents, enum dma_data_direction dir,
+			   unsigned long attrs)
+
+The four functions above are just like the counterpart functions
+without the _attrs suffixes, except that they pass an optional
+dma_attrs.
+
+The interpretation of DMA attributes is architecture-specific, and
+each attribute should be documented in Documentation/DMA-attributes.txt.
+
+If dma_attrs are 0, the semantics of each of these functions
+is identical to those of the corresponding function
+without the _attrs suffix. As a result dma_map_single_attrs()
+can generally replace dma_map_single(), etc.
+
+As an example of the use of the ``*_attrs`` functions, here's how
+you could pass an attribute DMA_ATTR_FOO when mapping memory
+for DMA::
+
+	#include <linux/dma-mapping.h>
+	/* DMA_ATTR_FOO should be defined in linux/dma-mapping.h and
+	* documented in Documentation/DMA-attributes.txt */
+	...
+
+		unsigned long attr;
+		attr |= DMA_ATTR_FOO;
+		....
+		n = dma_map_sg_attrs(dev, sg, nents, DMA_TO_DEVICE, attr);
+		....
+
+Architectures that care about DMA_ATTR_FOO would check for its
+presence in their implementations of the mapping and unmapping
+routines, e.g.:::
+
+	void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr,
+				     size_t size, enum dma_data_direction dir,
+				     unsigned long attrs)
+	{
+		....
+		if (attrs & DMA_ATTR_FOO)
+			/* twizzle the frobnozzle */
+		....
+	}
+
+
+Part II - Advanced dma usage
+----------------------------
+
+Warning: These pieces of the DMA API should not be used in the
+majority of cases, since they cater for unlikely corner cases that
+don't belong in usual drivers.
+
+If you don't understand how cache line coherency works between a
+processor and an I/O device, you should not be using this part of the
+API at all.
+
+::
+
+	void *
+	dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
+			gfp_t flag, unsigned long attrs)
+
+Identical to dma_alloc_coherent() except that when the
+DMA_ATTR_NON_CONSISTENT flags is passed in the attrs argument, the
+platform will choose to return either consistent or non-consistent memory
+as it sees fit.  By using this API, you are guaranteeing to the platform
+that you have all the correct and necessary sync points for this memory
+in the driver should it choose to return non-consistent memory.
+
+Note: where the platform can return consistent memory, it will
+guarantee that the sync points become nops.
+
+Warning:  Handling non-consistent memory is a real pain.  You should
+only use this API if you positively know your driver will be
+required to work on one of the rare (usually non-PCI) architectures
+that simply cannot make consistent memory.
+
+::
+
+	void
+	dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
+		       dma_addr_t dma_handle, unsigned long attrs)
+
+Free memory allocated by the dma_alloc_attrs().  All common
+parameters must be identical to those otherwise passed to dma_free_coherent,
+and the attrs argument must be identical to the attrs passed to
+dma_alloc_attrs().
+
+::
+
+	int
+	dma_get_cache_alignment(void)
+
+Returns the processor cache alignment.  This is the absolute minimum
+alignment *and* width that you must observe when either mapping
+memory or doing partial flushes.
+
+.. note::
+
+	This API may return a number *larger* than the actual cache
+	line, but it will guarantee that one or more cache lines fit exactly
+	into the width returned by this call.  It will also always be a power
+	of two for easy alignment.
+
+::
+
+	void
+	dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+		       enum dma_data_direction direction)
+
+Do a partial sync of memory that was allocated by dma_alloc_attrs() with
+the DMA_ATTR_NON_CONSISTENT flag starting at virtual address vaddr and
+continuing on for size.  Again, you *must* observe the cache line
+boundaries when doing this.
+
+::
+
+	int
+	dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
+				    dma_addr_t device_addr, size_t size);
+
+Declare region of memory to be handed out by dma_alloc_coherent() when
+it's asked for coherent memory for this device.
+
+phys_addr is the CPU physical address to which the memory is currently
+assigned (this will be ioremapped so the CPU can access the region).
+
+device_addr is the DMA address the device needs to be programmed
+with to actually address this memory (this will be handed out as the
+dma_addr_t in dma_alloc_coherent()).
+
+size is the size of the area (must be multiples of PAGE_SIZE).
+
+As a simplification for the platforms, only *one* such region of
+memory may be declared per device.
+
+For reasons of efficiency, most platforms choose to track the declared
+region only at the granularity of a page.  For smaller allocations,
+you should use the dma_pool() API.
+
+Part III - Debug drivers use of the DMA-API
+-------------------------------------------
+
+The DMA-API as described above has some constraints. DMA addresses must be
+released with the corresponding function with the same size for example. With
+the advent of hardware IOMMUs it becomes more and more important that drivers
+do not violate those constraints. In the worst case such a violation can
+result in data corruption up to destroyed filesystems.
+
+To debug drivers and find bugs in the usage of the DMA-API checking code can
+be compiled into the kernel which will tell the developer about those
+violations. If your architecture supports it you can select the "Enable
+debugging of DMA-API usage" option in your kernel configuration. Enabling this
+option has a performance impact. Do not enable it in production kernels.
+
+If you boot the resulting kernel will contain code which does some bookkeeping
+about what DMA memory was allocated for which device. If this code detects an
+error it prints a warning message with some details into your kernel log. An
+example warning message may look like this::
+
+	WARNING: at /data2/repos/linux-2.6-iommu/lib/dma-debug.c:448
+		check_unmap+0x203/0x490()
+	Hardware name:
+	forcedeth 0000:00:08.0: DMA-API: device driver frees DMA memory with wrong
+		function [device address=0x00000000640444be] [size=66 bytes] [mapped as
+	single] [unmapped as page]
+	Modules linked in: nfsd exportfs bridge stp llc r8169
+	Pid: 0, comm: swapper Tainted: G        W  2.6.28-dmatest-09289-g8bb99c0 #1
+	Call Trace:
+	<IRQ>  [<ffffffff80240b22>] warn_slowpath+0xf2/0x130
+	[<ffffffff80647b70>] _spin_unlock+0x10/0x30
+	[<ffffffff80537e75>] usb_hcd_link_urb_to_ep+0x75/0xc0
+	[<ffffffff80647c22>] _spin_unlock_irqrestore+0x12/0x40
+	[<ffffffff8055347f>] ohci_urb_enqueue+0x19f/0x7c0
+	[<ffffffff80252f96>] queue_work+0x56/0x60
+	[<ffffffff80237e10>] enqueue_task_fair+0x20/0x50
+	[<ffffffff80539279>] usb_hcd_submit_urb+0x379/0xbc0
+	[<ffffffff803b78c3>] cpumask_next_and+0x23/0x40
+	[<ffffffff80235177>] find_busiest_group+0x207/0x8a0
+	[<ffffffff8064784f>] _spin_lock_irqsave+0x1f/0x50
+	[<ffffffff803c7ea3>] check_unmap+0x203/0x490
+	[<ffffffff803c8259>] debug_dma_unmap_page+0x49/0x50
+	[<ffffffff80485f26>] nv_tx_done_optimized+0xc6/0x2c0
+	[<ffffffff80486c13>] nv_nic_irq_optimized+0x73/0x2b0
+	[<ffffffff8026df84>] handle_IRQ_event+0x34/0x70
+	[<ffffffff8026ffe9>] handle_edge_irq+0xc9/0x150
+	[<ffffffff8020e3ab>] do_IRQ+0xcb/0x1c0
+	[<ffffffff8020c093>] ret_from_intr+0x0/0xa
+	<EOI> <4>---[ end trace f6435a98e2a38c0e ]---
+
+The driver developer can find the driver and the device including a stacktrace
+of the DMA-API call which caused this warning.
+
+Per default only the first error will result in a warning message. All other
+errors will only silently counted. This limitation exist to prevent the code
+from flooding your kernel log. To support debugging a device driver this can
+be disabled via debugfs. See the debugfs interface documentation below for
+details.
+
+The debugfs directory for the DMA-API debugging code is called dma-api/. In
+this directory the following files can currently be found:
+
+=============================== ===============================================
+dma-api/all_errors		This file contains a numeric value. If this
+				value is not equal to zero the debugging code
+				will print a warning for every error it finds
+				into the kernel log. Be careful with this
+				option, as it can easily flood your logs.
+
+dma-api/disabled		This read-only file contains the character 'Y'
+				if the debugging code is disabled. This can
+				happen when it runs out of memory or if it was
+				disabled at boot time
+
+dma-api/dump			This read-only file contains current DMA
+				mappings.
+
+dma-api/error_count		This file is read-only and shows the total
+				numbers of errors found.
+
+dma-api/num_errors		The number in this file shows how many
+				warnings will be printed to the kernel log
+				before it stops. This number is initialized to
+				one at system boot and be set by writing into
+				this file
+
+dma-api/min_free_entries	This read-only file can be read to get the
+				minimum number of free dma_debug_entries the
+				allocator has ever seen. If this value goes
+				down to zero the code will attempt to increase
+				nr_total_entries to compensate.
+
+dma-api/num_free_entries	The current number of free dma_debug_entries
+				in the allocator.
+
+dma-api/nr_total_entries	The total number of dma_debug_entries in the
+				allocator, both free and used.
+
+dma-api/driver_filter		You can write a name of a driver into this file
+				to limit the debug output to requests from that
+				particular driver. Write an empty string to
+				that file to disable the filter and see
+				all errors again.
+=============================== ===============================================
+
+If you have this code compiled into your kernel it will be enabled by default.
+If you want to boot without the bookkeeping anyway you can provide
+'dma_debug=off' as a boot parameter. This will disable DMA-API debugging.
+Notice that you can not enable it again at runtime. You have to reboot to do
+so.
+
+If you want to see debug messages only for a special device driver you can
+specify the dma_debug_driver=<drivername> parameter. This will enable the
+driver filter at boot time. The debug code will only print errors for that
+driver afterwards. This filter can be disabled or changed later using debugfs.
+
+When the code disables itself at runtime this is most likely because it ran
+out of dma_debug_entries and was unable to allocate more on-demand. 65536
+entries are preallocated at boot - if this is too low for you boot with
+'dma_debug_entries=<your_desired_number>' to overwrite the default. Note
+that the code allocates entries in batches, so the exact number of
+preallocated entries may be greater than the actual number requested. The
+code will print to the kernel log each time it has dynamically allocated
+as many entries as were initially preallocated. This is to indicate that a
+larger preallocation size may be appropriate, or if it happens continually
+that a driver may be leaking mappings.
+
+::
+
+	void
+	debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
+
+dma-debug interface debug_dma_mapping_error() to debug drivers that fail
+to check DMA mapping errors on addresses returned by dma_map_single() and
+dma_map_page() interfaces. This interface clears a flag set by
+debug_dma_map_page() to indicate that dma_mapping_error() has been called by
+the driver. When driver does unmap, debug_dma_unmap() checks the flag and if
+this flag is still set, prints warning message that includes call trace that
+leads up to the unmap. This interface can be called from dma_mapping_error()
+routines to enable DMA mapping error check debugging.
diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst
new file mode 100644
index 000000000000..29dcbe8826e8
--- /dev/null
+++ b/Documentation/core-api/dma-attributes.rst
@@ -0,0 +1,140 @@
+==============
+DMA attributes
+==============
+
+This document describes the semantics of the DMA attributes that are
+defined in linux/dma-mapping.h.
+
+DMA_ATTR_WEAK_ORDERING
+----------------------
+
+DMA_ATTR_WEAK_ORDERING specifies that reads and writes to the mapping
+may be weakly ordered, that is that reads and writes may pass each other.
+
+Since it is optional for platforms to implement DMA_ATTR_WEAK_ORDERING,
+those that do not will simply ignore the attribute and exhibit default
+behavior.
+
+DMA_ATTR_WRITE_COMBINE
+----------------------
+
+DMA_ATTR_WRITE_COMBINE specifies that writes to the mapping may be
+buffered to improve performance.
+
+Since it is optional for platforms to implement DMA_ATTR_WRITE_COMBINE,
+those that do not will simply ignore the attribute and exhibit default
+behavior.
+
+DMA_ATTR_NON_CONSISTENT
+-----------------------
+
+DMA_ATTR_NON_CONSISTENT lets the platform to choose to return either
+consistent or non-consistent memory as it sees fit.  By using this API,
+you are guaranteeing to the platform that you have all the correct and
+necessary sync points for this memory in the driver.
+
+DMA_ATTR_NO_KERNEL_MAPPING
+--------------------------
+
+DMA_ATTR_NO_KERNEL_MAPPING lets the platform to avoid creating a kernel
+virtual mapping for the allocated buffer. On some architectures creating
+such mapping is non-trivial task and consumes very limited resources
+(like kernel virtual address space or dma consistent address space).
+Buffers allocated with this attribute can be only passed to user space
+by calling dma_mmap_attrs(). By using this API, you are guaranteeing
+that you won't dereference the pointer returned by dma_alloc_attr(). You
+can treat it as a cookie that must be passed to dma_mmap_attrs() and
+dma_free_attrs(). Make sure that both of these also get this attribute
+set on each call.
+
+Since it is optional for platforms to implement
+DMA_ATTR_NO_KERNEL_MAPPING, those that do not will simply ignore the
+attribute and exhibit default behavior.
+
+DMA_ATTR_SKIP_CPU_SYNC
+----------------------
+
+By default dma_map_{single,page,sg} functions family transfer a given
+buffer from CPU domain to device domain. Some advanced use cases might
+require sharing a buffer between more than one device. This requires
+having a mapping created separately for each device and is usually
+performed by calling dma_map_{single,page,sg} function more than once
+for the given buffer with device pointer to each device taking part in
+the buffer sharing. The first call transfers a buffer from 'CPU' domain
+to 'device' domain, what synchronizes CPU caches for the given region
+(usually it means that the cache has been flushed or invalidated
+depending on the dma direction). However, next calls to
+dma_map_{single,page,sg}() for other devices will perform exactly the
+same synchronization operation on the CPU cache. CPU cache synchronization
+might be a time consuming operation, especially if the buffers are
+large, so it is highly recommended to avoid it if possible.
+DMA_ATTR_SKIP_CPU_SYNC allows platform code to skip synchronization of
+the CPU cache for the given buffer assuming that it has been already
+transferred to 'device' domain. This attribute can be also used for
+dma_unmap_{single,page,sg} functions family to force buffer to stay in
+device domain after releasing a mapping for it. Use this attribute with
+care!
+
+DMA_ATTR_FORCE_CONTIGUOUS
+-------------------------
+
+By default DMA-mapping subsystem is allowed to assemble the buffer
+allocated by dma_alloc_attrs() function from individual pages if it can
+be mapped as contiguous chunk into device dma address space. By
+specifying this attribute the allocated buffer is forced to be contiguous
+also in physical memory.
+
+DMA_ATTR_ALLOC_SINGLE_PAGES
+---------------------------
+
+This is a hint to the DMA-mapping subsystem that it's probably not worth
+the time to try to allocate memory to in a way that gives better TLB
+efficiency (AKA it's not worth trying to build the mapping out of larger
+pages).  You might want to specify this if:
+
+- You know that the accesses to this memory won't thrash the TLB.
+  You might know that the accesses are likely to be sequential or
+  that they aren't sequential but it's unlikely you'll ping-pong
+  between many addresses that are likely to be in different physical
+  pages.
+- You know that the penalty of TLB misses while accessing the
+  memory will be small enough to be inconsequential.  If you are
+  doing a heavy operation like decryption or decompression this
+  might be the case.
+- You know that the DMA mapping is fairly transitory.  If you expect
+  the mapping to have a short lifetime then it may be worth it to
+  optimize allocation (avoid coming up with large pages) instead of
+  getting the slight performance win of larger pages.
+
+Setting this hint doesn't guarantee that you won't get huge pages, but it
+means that we won't try quite as hard to get them.
+
+.. note:: At the moment DMA_ATTR_ALLOC_SINGLE_PAGES is only implemented on ARM,
+	  though ARM64 patches will likely be posted soon.
+
+DMA_ATTR_NO_WARN
+----------------
+
+This tells the DMA-mapping subsystem to suppress allocation failure reports
+(similarly to __GFP_NOWARN).
+
+On some architectures allocation failures are reported with error messages
+to the system logs.  Although this can help to identify and debug problems,
+drivers which handle failures (eg, retry later) have no problems with them,
+and can actually flood the system logs with error messages that aren't any
+problem at all, depending on the implementation of the retry mechanism.
+
+So, this provides a way for drivers to avoid those error messages on calls
+where allocation failures are not a problem, and shouldn't bother the logs.
+
+.. note:: At the moment DMA_ATTR_NO_WARN is only implemented on PowerPC.
+
+DMA_ATTR_PRIVILEGED
+-------------------
+
+Some advanced peripherals such as remote processors and GPUs perform
+accesses to DMA buffers in both privileged "supervisor" and unprivileged
+"user" modes.  This attribute is used to indicate to the DMA-mapping
+subsystem that the buffer is fully accessible at the elevated privilege
+level (and ideally inaccessible or at least read-only at the
+lesser-privileged levels).
diff --git a/Documentation/core-api/dma-isa-lpc.rst b/Documentation/core-api/dma-isa-lpc.rst
new file mode 100644
index 000000000000..b1ec7b16c21f
--- /dev/null
+++ b/Documentation/core-api/dma-isa-lpc.rst
@@ -0,0 +1,152 @@
+============================
+DMA with ISA and LPC devices
+============================
+
+:Author: Pierre Ossman <drzeus@drzeus.cx>
+
+This document describes how to do DMA transfers using the old ISA DMA
+controller. Even though ISA is more or less dead today the LPC bus
+uses the same DMA system so it will be around for quite some time.
+
+Headers and dependencies
+------------------------
+
+To do ISA style DMA you need to include two headers::
+
+	#include <linux/dma-mapping.h>
+	#include <asm/dma.h>
+
+The first is the generic DMA API used to convert virtual addresses to
+bus addresses (see Documentation/DMA-API.txt for details).
+
+The second contains the routines specific to ISA DMA transfers. Since
+this is not present on all platforms make sure you construct your
+Kconfig to be dependent on ISA_DMA_API (not ISA) so that nobody tries
+to build your driver on unsupported platforms.
+
+Buffer allocation
+-----------------
+
+The ISA DMA controller has some very strict requirements on which
+memory it can access so extra care must be taken when allocating
+buffers.
+
+(You usually need a special buffer for DMA transfers instead of
+transferring directly to and from your normal data structures.)
+
+The DMA-able address space is the lowest 16 MB of _physical_ memory.
+Also the transfer block may not cross page boundaries (which are 64
+or 128 KiB depending on which channel you use).
+
+In order to allocate a piece of memory that satisfies all these
+requirements you pass the flag GFP_DMA to kmalloc.
+
+Unfortunately the memory available for ISA DMA is scarce so unless you
+allocate the memory during boot-up it's a good idea to also pass
+__GFP_RETRY_MAYFAIL and __GFP_NOWARN to make the allocator try a bit harder.
+
+(This scarcity also means that you should allocate the buffer as
+early as possible and not release it until the driver is unloaded.)
+
+Address translation
+-------------------
+
+To translate the virtual address to a bus address, use the normal DMA
+API. Do _not_ use isa_virt_to_bus() even though it does the same
+thing. The reason for this is that the function isa_virt_to_bus()
+will require a Kconfig dependency to ISA, not just ISA_DMA_API which
+is really all you need. Remember that even though the DMA controller
+has its origins in ISA it is used elsewhere.
+
+Note: x86_64 had a broken DMA API when it came to ISA but has since
+been fixed. If your arch has problems then fix the DMA API instead of
+reverting to the ISA functions.
+
+Channels
+--------
+
+A normal ISA DMA controller has 8 channels. The lower four are for
+8-bit transfers and the upper four are for 16-bit transfers.
+
+(Actually the DMA controller is really two separate controllers where
+channel 4 is used to give DMA access for the second controller (0-3).
+This means that of the four 16-bits channels only three are usable.)
+
+You allocate these in a similar fashion as all basic resources:
+
+extern int request_dma(unsigned int dmanr, const char * device_id);
+extern void free_dma(unsigned int dmanr);
+
+The ability to use 16-bit or 8-bit transfers is _not_ up to you as a
+driver author but depends on what the hardware supports. Check your
+specs or test different channels.
+
+Transfer data
+-------------
+
+Now for the good stuff, the actual DMA transfer. :)
+
+Before you use any ISA DMA routines you need to claim the DMA lock
+using claim_dma_lock(). The reason is that some DMA operations are
+not atomic so only one driver may fiddle with the registers at a
+time.
+
+The first time you use the DMA controller you should call
+clear_dma_ff(). This clears an internal register in the DMA
+controller that is used for the non-atomic operations. As long as you
+(and everyone else) uses the locking functions then you only need to
+reset this once.
+
+Next, you tell the controller in which direction you intend to do the
+transfer using set_dma_mode(). Currently you have the options
+DMA_MODE_READ and DMA_MODE_WRITE.
+
+Set the address from where the transfer should start (this needs to
+be 16-bit aligned for 16-bit transfers) and how many bytes to
+transfer. Note that it's _bytes_. The DMA routines will do all the
+required translation to values that the DMA controller understands.
+
+The final step is enabling the DMA channel and releasing the DMA
+lock.
+
+Once the DMA transfer is finished (or timed out) you should disable
+the channel again. You should also check get_dma_residue() to make
+sure that all data has been transferred.
+
+Example::
+
+	int flags, residue;
+
+	flags = claim_dma_lock();
+
+	clear_dma_ff();
+
+	set_dma_mode(channel, DMA_MODE_WRITE);
+	set_dma_addr(channel, phys_addr);
+	set_dma_count(channel, num_bytes);
+
+	dma_enable(channel);
+
+	release_dma_lock(flags);
+
+	while (!device_done());
+
+	flags = claim_dma_lock();
+
+	dma_disable(channel);
+
+	residue = dma_get_residue(channel);
+	if (residue != 0)
+		printk(KERN_ERR "driver: Incomplete DMA transfer!"
+			" %d bytes left!\n", residue);
+
+	release_dma_lock(flags);
+
+Suspend/resume
+--------------
+
+It is the driver's responsibility to make sure that the machine isn't
+suspended while a DMA transfer is in progress. Also, all DMA settings
+are lost when the system suspends so if your driver relies on the DMA
+controller being in a certain state then you have to restore these
+registers upon resume.
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index b29c4a07beda..c00aef843341 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -80,6 +80,10 @@ more memory-management documentation in :doc:`/vm/index`.
    :maxdepth: 1
 
    memory-allocation
+   dma-api
+   dma-api-howto
+   dma-attributes
+   dma-isa-lpc
    mm-api
    genalloc
    pin_user_pages
-- 
cgit v1.2.3


From b25fc491cb6aa5ec9d15a4dc32c09c9119363a52 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 17:37:48 +0200
Subject: docs: move IPMI.txt to the driver API book

The IPMI is under drivers/char. This doc describes the kAPI
part of the IPMI (mainly).

So, move it to the driver-api directory and add it to the
corresponding index.rst file.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/ae4edbc6753dc522cb93de36800978e3b58e0cfb.1588345503.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/IPMI.txt             | 746 -------------------------------------
 Documentation/driver-api/index.rst |   1 +
 Documentation/driver-api/ipmi.rst  | 746 +++++++++++++++++++++++++++++++++++++
 3 files changed, 747 insertions(+), 746 deletions(-)
 delete mode 100644 Documentation/IPMI.txt
 create mode 100644 Documentation/driver-api/ipmi.rst

diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt
deleted file mode 100644
index 5ef1047e2e66..000000000000
--- a/Documentation/IPMI.txt
+++ /dev/null
@@ -1,746 +0,0 @@
-=====================
-The Linux IPMI Driver
-=====================
-
-:Author: Corey Minyard <minyard@mvista.com> / <minyard@acm.org>
-
-The Intelligent Platform Management Interface, or IPMI, is a
-standard for controlling intelligent devices that monitor a system.
-It provides for dynamic discovery of sensors in the system and the
-ability to monitor the sensors and be informed when the sensor's
-values change or go outside certain boundaries.  It also has a
-standardized database for field-replaceable units (FRUs) and a watchdog
-timer.
-
-To use this, you need an interface to an IPMI controller in your
-system (called a Baseboard Management Controller, or BMC) and
-management software that can use the IPMI system.
-
-This document describes how to use the IPMI driver for Linux.  If you
-are not familiar with IPMI itself, see the web site at
-http://www.intel.com/design/servers/ipmi/index.htm.  IPMI is a big
-subject and I can't cover it all here!
-
-Configuration
--------------
-
-The Linux IPMI driver is modular, which means you have to pick several
-things to have it work right depending on your hardware.  Most of
-these are available in the 'Character Devices' menu then the IPMI
-menu.
-
-No matter what, you must pick 'IPMI top-level message handler' to use
-IPMI.  What you do beyond that depends on your needs and hardware.
-
-The message handler does not provide any user-level interfaces.
-Kernel code (like the watchdog) can still use it.  If you need access
-from userland, you need to select 'Device interface for IPMI' if you
-want access through a device driver.
-
-The driver interface depends on your hardware.  If your system
-properly provides the SMBIOS info for IPMI, the driver will detect it
-and just work.  If you have a board with a standard interface (These
-will generally be either "KCS", "SMIC", or "BT", consult your hardware
-manual), choose the 'IPMI SI handler' option.  A driver also exists
-for direct I2C access to the IPMI management controller.  Some boards
-support this, but it is unknown if it will work on every board.  For
-this, choose 'IPMI SMBus handler', but be ready to try to do some
-figuring to see if it will work on your system if the SMBIOS/APCI
-information is wrong or not present.  It is fairly safe to have both
-these enabled and let the drivers auto-detect what is present.
-
-You should generally enable ACPI on your system, as systems with IPMI
-can have ACPI tables describing them.
-
-If you have a standard interface and the board manufacturer has done
-their job correctly, the IPMI controller should be automatically
-detected (via ACPI or SMBIOS tables) and should just work.  Sadly,
-many boards do not have this information.  The driver attempts
-standard defaults, but they may not work.  If you fall into this
-situation, you need to read the section below named 'The SI Driver' or
-"The SMBus Driver" on how to hand-configure your system.
-
-IPMI defines a standard watchdog timer.  You can enable this with the
-'IPMI Watchdog Timer' config option.  If you compile the driver into
-the kernel, then via a kernel command-line option you can have the
-watchdog timer start as soon as it initializes.  It also have a lot
-of other options, see the 'Watchdog' section below for more details.
-Note that you can also have the watchdog continue to run if it is
-closed (by default it is disabled on close).  Go into the 'Watchdog
-Cards' menu, enable 'Watchdog Timer Support', and enable the option
-'Disable watchdog shutdown on close'.
-
-IPMI systems can often be powered off using IPMI commands.  Select
-'IPMI Poweroff' to do this.  The driver will auto-detect if the system
-can be powered off by IPMI.  It is safe to enable this even if your
-system doesn't support this option.  This works on ATCA systems, the
-Radisys CPI1 card, and any IPMI system that supports standard chassis
-management commands.
-
-If you want the driver to put an event into the event log on a panic,
-enable the 'Generate a panic event to all BMCs on a panic' option.  If
-you want the whole panic string put into the event log using OEM
-events, enable the 'Generate OEM events containing the panic string'
-option.  You can also enable these dynamically by setting the module
-parameter named "panic_op" in the ipmi_msghandler module to "event"
-or "string".  Setting that parameter to "none" disables this function.
-
-Basic Design
-------------
-
-The Linux IPMI driver is designed to be very modular and flexible, you
-only need to take the pieces you need and you can use it in many
-different ways.  Because of that, it's broken into many chunks of
-code.  These chunks (by module name) are:
-
-ipmi_msghandler - This is the central piece of software for the IPMI
-system.  It handles all messages, message timing, and responses.  The
-IPMI users tie into this, and the IPMI physical interfaces (called
-System Management Interfaces, or SMIs) also tie in here.  This
-provides the kernelland interface for IPMI, but does not provide an
-interface for use by application processes.
-
-ipmi_devintf - This provides a userland IOCTL interface for the IPMI
-driver, each open file for this device ties in to the message handler
-as an IPMI user.
-
-ipmi_si - A driver for various system interfaces.  This supports KCS,
-SMIC, and BT interfaces.  Unless you have an SMBus interface or your
-own custom interface, you probably need to use this.
-
-ipmi_ssif - A driver for accessing BMCs on the SMBus. It uses the
-I2C kernel driver's SMBus interfaces to send and receive IPMI messages
-over the SMBus.
-
-ipmi_powernv - A driver for access BMCs on POWERNV systems.
-
-ipmi_watchdog - IPMI requires systems to have a very capable watchdog
-timer.  This driver implements the standard Linux watchdog timer
-interface on top of the IPMI message handler.
-
-ipmi_poweroff - Some systems support the ability to be turned off via
-IPMI commands.
-
-bt-bmc - This is not part of the main driver, but instead a driver for
-accessing a BMC-side interface of a BT interface.  It is used on BMCs
-running Linux to provide an interface to the host.
-
-These are all individually selectable via configuration options.
-
-Much documentation for the interface is in the include files.  The
-IPMI include files are:
-
-linux/ipmi.h - Contains the user interface and IOCTL interface for IPMI.
-
-linux/ipmi_smi.h - Contains the interface for system management interfaces
-(things that interface to IPMI controllers) to use.
-
-linux/ipmi_msgdefs.h - General definitions for base IPMI messaging.
-
-
-Addressing
-----------
-
-The IPMI addressing works much like IP addresses, you have an overlay
-to handle the different address types.  The overlay is::
-
-  struct ipmi_addr
-  {
-	int   addr_type;
-	short channel;
-	char  data[IPMI_MAX_ADDR_SIZE];
-  };
-
-The addr_type determines what the address really is.  The driver
-currently understands two different types of addresses.
-
-"System Interface" addresses are defined as::
-
-  struct ipmi_system_interface_addr
-  {
-	int   addr_type;
-	short channel;
-  };
-
-and the type is IPMI_SYSTEM_INTERFACE_ADDR_TYPE.  This is used for talking
-straight to the BMC on the current card.  The channel must be
-IPMI_BMC_CHANNEL.
-
-Messages that are destined to go out on the IPMB bus use the
-IPMI_IPMB_ADDR_TYPE address type.  The format is::
-
-  struct ipmi_ipmb_addr
-  {
-	int           addr_type;
-	short         channel;
-	unsigned char slave_addr;
-	unsigned char lun;
-  };
-
-The "channel" here is generally zero, but some devices support more
-than one channel, it corresponds to the channel as defined in the IPMI
-spec.
-
-
-Messages
---------
-
-Messages are defined as::
-
-  struct ipmi_msg
-  {
-	unsigned char netfn;
-	unsigned char lun;
-	unsigned char cmd;
-	unsigned char *data;
-	int           data_len;
-  };
-
-The driver takes care of adding/stripping the header information.  The
-data portion is just the data to be send (do NOT put addressing info
-here) or the response.  Note that the completion code of a response is
-the first item in "data", it is not stripped out because that is how
-all the messages are defined in the spec (and thus makes counting the
-offsets a little easier :-).
-
-When using the IOCTL interface from userland, you must provide a block
-of data for "data", fill it, and set data_len to the length of the
-block of data, even when receiving messages.  Otherwise the driver
-will have no place to put the message.
-
-Messages coming up from the message handler in kernelland will come in
-as::
-
-  struct ipmi_recv_msg
-  {
-	struct list_head link;
-
-	/* The type of message as defined in the "Receive Types"
-           defines above. */
-	int         recv_type;
-
-	ipmi_user_t      *user;
-	struct ipmi_addr addr;
-	long             msgid;
-	struct ipmi_msg  msg;
-
-	/* Call this when done with the message.  It will presumably free
-	   the message and do any other necessary cleanup. */
-	void (*done)(struct ipmi_recv_msg *msg);
-
-	/* Place-holder for the data, don't make any assumptions about
-	   the size or existence of this, since it may change. */
-	unsigned char   msg_data[IPMI_MAX_MSG_LENGTH];
-  };
-
-You should look at the receive type and handle the message
-appropriately.
-
-
-The Upper Layer Interface (Message Handler)
--------------------------------------------
-
-The upper layer of the interface provides the users with a consistent
-view of the IPMI interfaces.  It allows multiple SMI interfaces to be
-addressed (because some boards actually have multiple BMCs on them)
-and the user should not have to care what type of SMI is below them.
-
-
-Watching For Interfaces
-^^^^^^^^^^^^^^^^^^^^^^^
-
-When your code comes up, the IPMI driver may or may not have detected
-if IPMI devices exist.  So you might have to defer your setup until
-the device is detected, or you might be able to do it immediately.
-To handle this, and to allow for discovery, you register an SMI
-watcher with ipmi_smi_watcher_register() to iterate over interfaces
-and tell you when they come and go.
-
-
-Creating the User
-^^^^^^^^^^^^^^^^^
-
-To use the message handler, you must first create a user using
-ipmi_create_user.  The interface number specifies which SMI you want
-to connect to, and you must supply callback functions to be called
-when data comes in.  The callback function can run at interrupt level,
-so be careful using the callbacks.  This also allows to you pass in a
-piece of data, the handler_data, that will be passed back to you on
-all calls.
-
-Once you are done, call ipmi_destroy_user() to get rid of the user.
-
-From userland, opening the device automatically creates a user, and
-closing the device automatically destroys the user.
-
-
-Messaging
-^^^^^^^^^
-
-To send a message from kernel-land, the ipmi_request_settime() call does
-pretty much all message handling.  Most of the parameter are
-self-explanatory.  However, it takes a "msgid" parameter.  This is NOT
-the sequence number of messages.  It is simply a long value that is
-passed back when the response for the message is returned.  You may
-use it for anything you like.
-
-Responses come back in the function pointed to by the ipmi_recv_hndl
-field of the "handler" that you passed in to ipmi_create_user().
-Remember again, these may be running at interrupt level.  Remember to
-look at the receive type, too.
-
-From userland, you fill out an ipmi_req_t structure and use the
-IPMICTL_SEND_COMMAND ioctl.  For incoming stuff, you can use select()
-or poll() to wait for messages to come in.  However, you cannot use
-read() to get them, you must call the IPMICTL_RECEIVE_MSG with the
-ipmi_recv_t structure to actually get the message.  Remember that you
-must supply a pointer to a block of data in the msg.data field, and
-you must fill in the msg.data_len field with the size of the data.
-This gives the receiver a place to actually put the message.
-
-If the message cannot fit into the data you provide, you will get an
-EMSGSIZE error and the driver will leave the data in the receive
-queue.  If you want to get it and have it truncate the message, us
-the IPMICTL_RECEIVE_MSG_TRUNC ioctl.
-
-When you send a command (which is defined by the lowest-order bit of
-the netfn per the IPMI spec) on the IPMB bus, the driver will
-automatically assign the sequence number to the command and save the
-command.  If the response is not receive in the IPMI-specified 5
-seconds, it will generate a response automatically saying the command
-timed out.  If an unsolicited response comes in (if it was after 5
-seconds, for instance), that response will be ignored.
-
-In kernelland, after you receive a message and are done with it, you
-MUST call ipmi_free_recv_msg() on it, or you will leak messages.  Note
-that you should NEVER mess with the "done" field of a message, that is
-required to properly clean up the message.
-
-Note that when sending, there is an ipmi_request_supply_msgs() call
-that lets you supply the smi and receive message.  This is useful for
-pieces of code that need to work even if the system is out of buffers
-(the watchdog timer uses this, for instance).  You supply your own
-buffer and own free routines.  This is not recommended for normal use,
-though, since it is tricky to manage your own buffers.
-
-
-Events and Incoming Commands
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The driver takes care of polling for IPMI events and receiving
-commands (commands are messages that are not responses, they are
-commands that other things on the IPMB bus have sent you).  To receive
-these, you must register for them, they will not automatically be sent
-to you.
-
-To receive events, you must call ipmi_set_gets_events() and set the
-"val" to non-zero.  Any events that have been received by the driver
-since startup will immediately be delivered to the first user that
-registers for events.  After that, if multiple users are registered
-for events, they will all receive all events that come in.
-
-For receiving commands, you have to individually register commands you
-want to receive.  Call ipmi_register_for_cmd() and supply the netfn
-and command name for each command you want to receive.  You also
-specify a bitmask of the channels you want to receive the command from
-(or use IPMI_CHAN_ALL for all channels if you don't care).  Only one
-user may be registered for each netfn/cmd/channel, but different users
-may register for different commands, or the same command if the
-channel bitmasks do not overlap.
-
-From userland, equivalent IOCTLs are provided to do these functions.
-
-
-The Lower Layer (SMI) Interface
--------------------------------
-
-As mentioned before, multiple SMI interfaces may be registered to the
-message handler, each of these is assigned an interface number when
-they register with the message handler.  They are generally assigned
-in the order they register, although if an SMI unregisters and then
-another one registers, all bets are off.
-
-The ipmi_smi.h defines the interface for management interfaces, see
-that for more details.
-
-
-The SI Driver
--------------
-
-The SI driver allows KCS, BT, and SMIC interfaces to be configured
-in the system.  It discovers interfaces through a host of different
-methods, depending on the system.
-
-You can specify up to four interfaces on the module load line and
-control some module parameters::
-
-  modprobe ipmi_si.o type=<type1>,<type2>....
-       ports=<port1>,<port2>... addrs=<addr1>,<addr2>...
-       irqs=<irq1>,<irq2>...
-       regspacings=<sp1>,<sp2>,... regsizes=<size1>,<size2>,...
-       regshifts=<shift1>,<shift2>,...
-       slave_addrs=<addr1>,<addr2>,...
-       force_kipmid=<enable1>,<enable2>,...
-       kipmid_max_busy_us=<ustime1>,<ustime2>,...
-       unload_when_empty=[0|1]
-       trydmi=[0|1] tryacpi=[0|1]
-       tryplatform=[0|1] trypci=[0|1]
-
-Each of these except try... items is a list, the first item for the
-first interface, second item for the second interface, etc.
-
-The si_type may be either "kcs", "smic", or "bt".  If you leave it blank, it
-defaults to "kcs".
-
-If you specify addrs as non-zero for an interface, the driver will
-use the memory address given as the address of the device.  This
-overrides si_ports.
-
-If you specify ports as non-zero for an interface, the driver will
-use the I/O port given as the device address.
-
-If you specify irqs as non-zero for an interface, the driver will
-attempt to use the given interrupt for the device.
-
-The other try... items disable discovery by their corresponding
-names.  These are all enabled by default, set them to zero to disable
-them.  The tryplatform disables openfirmware.
-
-The next three parameters have to do with register layout.  The
-registers used by the interfaces may not appear at successive
-locations and they may not be in 8-bit registers.  These parameters
-allow the layout of the data in the registers to be more precisely
-specified.
-
-The regspacings parameter give the number of bytes between successive
-register start addresses.  For instance, if the regspacing is set to 4
-and the start address is 0xca2, then the address for the second
-register would be 0xca6.  This defaults to 1.
-
-The regsizes parameter gives the size of a register, in bytes.  The
-data used by IPMI is 8-bits wide, but it may be inside a larger
-register.  This parameter allows the read and write type to specified.
-It may be 1, 2, 4, or 8.  The default is 1.
-
-Since the register size may be larger than 32 bits, the IPMI data may not
-be in the lower 8 bits.  The regshifts parameter give the amount to shift
-the data to get to the actual IPMI data.
-
-The slave_addrs specifies the IPMI address of the local BMC.  This is
-usually 0x20 and the driver defaults to that, but in case it's not, it
-can be specified when the driver starts up.
-
-The force_ipmid parameter forcefully enables (if set to 1) or disables
-(if set to 0) the kernel IPMI daemon.  Normally this is auto-detected
-by the driver, but systems with broken interrupts might need an enable,
-or users that don't want the daemon (don't need the performance, don't
-want the CPU hit) can disable it.
-
-If unload_when_empty is set to 1, the driver will be unloaded if it
-doesn't find any interfaces or all the interfaces fail to work.  The
-default is one.  Setting to 0 is useful with the hotmod, but is
-obviously only useful for modules.
-
-When compiled into the kernel, the parameters can be specified on the
-kernel command line as::
-
-  ipmi_si.type=<type1>,<type2>...
-       ipmi_si.ports=<port1>,<port2>... ipmi_si.addrs=<addr1>,<addr2>...
-       ipmi_si.irqs=<irq1>,<irq2>...
-       ipmi_si.regspacings=<sp1>,<sp2>,...
-       ipmi_si.regsizes=<size1>,<size2>,...
-       ipmi_si.regshifts=<shift1>,<shift2>,...
-       ipmi_si.slave_addrs=<addr1>,<addr2>,...
-       ipmi_si.force_kipmid=<enable1>,<enable2>,...
-       ipmi_si.kipmid_max_busy_us=<ustime1>,<ustime2>,...
-
-It works the same as the module parameters of the same names.
-
-If your IPMI interface does not support interrupts and is a KCS or
-SMIC interface, the IPMI driver will start a kernel thread for the
-interface to help speed things up.  This is a low-priority kernel
-thread that constantly polls the IPMI driver while an IPMI operation
-is in progress.  The force_kipmid module parameter will all the user to
-force this thread on or off.  If you force it off and don't have
-interrupts, the driver will run VERY slowly.  Don't blame me,
-these interfaces suck.
-
-Unfortunately, this thread can use a lot of CPU depending on the
-interface's performance.  This can waste a lot of CPU and cause
-various issues with detecting idle CPU and using extra power.  To
-avoid this, the kipmid_max_busy_us sets the maximum amount of time, in
-microseconds, that kipmid will spin before sleeping for a tick.  This
-value sets a balance between performance and CPU waste and needs to be
-tuned to your needs.  Maybe, someday, auto-tuning will be added, but
-that's not a simple thing and even the auto-tuning would need to be
-tuned to the user's desired performance.
-
-The driver supports a hot add and remove of interfaces.  This way,
-interfaces can be added or removed after the kernel is up and running.
-This is done using /sys/modules/ipmi_si/parameters/hotmod, which is a
-write-only parameter.  You write a string to this interface.  The string
-has the format::
-
-   <op1>[:op2[:op3...]]
-
-The "op"s are::
-
-   add|remove,kcs|bt|smic,mem|i/o,<address>[,<opt1>[,<opt2>[,...]]]
-
-You can specify more than one interface on the line.  The "opt"s are::
-
-   rsp=<regspacing>
-   rsi=<regsize>
-   rsh=<regshift>
-   irq=<irq>
-   ipmb=<ipmb slave addr>
-
-and these have the same meanings as discussed above.  Note that you
-can also use this on the kernel command line for a more compact format
-for specifying an interface.  Note that when removing an interface,
-only the first three parameters (si type, address type, and address)
-are used for the comparison.  Any options are ignored for removing.
-
-The SMBus Driver (SSIF)
------------------------
-
-The SMBus driver allows up to 4 SMBus devices to be configured in the
-system.  By default, the driver will only register with something it
-finds in DMI or ACPI tables.  You can change this
-at module load time (for a module) with::
-
-  modprobe ipmi_ssif.o
-	addr=<i2caddr1>[,<i2caddr2>[,...]]
-	adapter=<adapter1>[,<adapter2>[...]]
-	dbg=<flags1>,<flags2>...
-	slave_addrs=<addr1>,<addr2>,...
-	tryacpi=[0|1] trydmi=[0|1]
-	[dbg_probe=1]
-
-The addresses are normal I2C addresses.  The adapter is the string
-name of the adapter, as shown in /sys/class/i2c-adapter/i2c-<n>/name.
-It is *NOT* i2c-<n> itself.  Also, the comparison is done ignoring
-spaces, so if the name is "This is an I2C chip" you can say
-adapter_name=ThisisanI2cchip.  This is because it's hard to pass in
-spaces in kernel parameters.
-
-The debug flags are bit flags for each BMC found, they are:
-IPMI messages: 1, driver state: 2, timing: 4, I2C probe: 8
-
-The tryxxx parameters can be used to disable detecting interfaces
-from various sources.
-
-Setting dbg_probe to 1 will enable debugging of the probing and
-detection process for BMCs on the SMBusses.
-
-The slave_addrs specifies the IPMI address of the local BMC.  This is
-usually 0x20 and the driver defaults to that, but in case it's not, it
-can be specified when the driver starts up.
-
-Discovering the IPMI compliant BMC on the SMBus can cause devices on
-the I2C bus to fail. The SMBus driver writes a "Get Device ID" IPMI
-message as a block write to the I2C bus and waits for a response.
-This action can be detrimental to some I2C devices. It is highly
-recommended that the known I2C address be given to the SMBus driver in
-the smb_addr parameter unless you have DMI or ACPI data to tell the
-driver what to use.
-
-When compiled into the kernel, the addresses can be specified on the
-kernel command line as::
-
-  ipmb_ssif.addr=<i2caddr1>[,<i2caddr2>[...]]
-	ipmi_ssif.adapter=<adapter1>[,<adapter2>[...]]
-	ipmi_ssif.dbg=<flags1>[,<flags2>[...]]
-	ipmi_ssif.dbg_probe=1
-	ipmi_ssif.slave_addrs=<addr1>[,<addr2>[...]]
-	ipmi_ssif.tryacpi=[0|1] ipmi_ssif.trydmi=[0|1]
-
-These are the same options as on the module command line.
-
-The I2C driver does not support non-blocking access or polling, so
-this driver cannod to IPMI panic events, extend the watchdog at panic
-time, or other panic-related IPMI functions without special kernel
-patches and driver modifications.  You can get those at the openipmi
-web page.
-
-The driver supports a hot add and remove of interfaces through the I2C
-sysfs interface.
-
-Other Pieces
-------------
-
-Get the detailed info related with the IPMI device
---------------------------------------------------
-
-Some users need more detailed information about a device, like where
-the address came from or the raw base device for the IPMI interface.
-You can use the IPMI smi_watcher to catch the IPMI interfaces as they
-come or go, and to grab the information, you can use the function
-ipmi_get_smi_info(), which returns the following structure::
-
-  struct ipmi_smi_info {
-	enum ipmi_addr_src addr_src;
-	struct device *dev;
-	union {
-		struct {
-			void *acpi_handle;
-		} acpi_info;
-	} addr_info;
-  };
-
-Currently special info for only for SI_ACPI address sources is
-returned.  Others may be added as necessary.
-
-Note that the dev pointer is included in the above structure, and
-assuming ipmi_smi_get_info returns success, you must call put_device
-on the dev pointer.
-
-
-Watchdog
---------
-
-A watchdog timer is provided that implements the Linux-standard
-watchdog timer interface.  It has three module parameters that can be
-used to control it::
-
-  modprobe ipmi_watchdog timeout=<t> pretimeout=<t> action=<action type>
-      preaction=<preaction type> preop=<preop type> start_now=x
-      nowayout=x ifnum_to_use=n panic_wdt_timeout=<t>
-
-ifnum_to_use specifies which interface the watchdog timer should use.
-The default is -1, which means to pick the first one registered.
-
-The timeout is the number of seconds to the action, and the pretimeout
-is the amount of seconds before the reset that the pre-timeout panic will
-occur (if pretimeout is zero, then pretimeout will not be enabled).  Note
-that the pretimeout is the time before the final timeout.  So if the
-timeout is 50 seconds and the pretimeout is 10 seconds, then the pretimeout
-will occur in 40 second (10 seconds before the timeout). The panic_wdt_timeout
-is the value of timeout which is set on kernel panic, in order to let actions
-such as kdump to occur during panic.
-
-The action may be "reset", "power_cycle", or "power_off", and
-specifies what to do when the timer times out, and defaults to
-"reset".
-
-The preaction may be "pre_smi" for an indication through the SMI
-interface, "pre_int" for an indication through the SMI with an
-interrupts, and "pre_nmi" for a NMI on a preaction.  This is how
-the driver is informed of the pretimeout.
-
-The preop may be set to "preop_none" for no operation on a pretimeout,
-"preop_panic" to set the preoperation to panic, or "preop_give_data"
-to provide data to read from the watchdog device when the pretimeout
-occurs.  A "pre_nmi" setting CANNOT be used with "preop_give_data"
-because you can't do data operations from an NMI.
-
-When preop is set to "preop_give_data", one byte comes ready to read
-on the device when the pretimeout occurs.  Select and fasync work on
-the device, as well.
-
-If start_now is set to 1, the watchdog timer will start running as
-soon as the driver is loaded.
-
-If nowayout is set to 1, the watchdog timer will not stop when the
-watchdog device is closed.  The default value of nowayout is true
-if the CONFIG_WATCHDOG_NOWAYOUT option is enabled, or false if not.
-
-When compiled into the kernel, the kernel command line is available
-for configuring the watchdog::
-
-  ipmi_watchdog.timeout=<t> ipmi_watchdog.pretimeout=<t>
-	ipmi_watchdog.action=<action type>
-	ipmi_watchdog.preaction=<preaction type>
-	ipmi_watchdog.preop=<preop type>
-	ipmi_watchdog.start_now=x
-	ipmi_watchdog.nowayout=x
-	ipmi_watchdog.panic_wdt_timeout=<t>
-
-The options are the same as the module parameter options.
-
-The watchdog will panic and start a 120 second reset timeout if it
-gets a pre-action.  During a panic or a reboot, the watchdog will
-start a 120 timer if it is running to make sure the reboot occurs.
-
-Note that if you use the NMI preaction for the watchdog, you MUST NOT
-use the nmi watchdog.  There is no reasonable way to tell if an NMI
-comes from the IPMI controller, so it must assume that if it gets an
-otherwise unhandled NMI, it must be from IPMI and it will panic
-immediately.
-
-Once you open the watchdog timer, you must write a 'V' character to the
-device to close it, or the timer will not stop.  This is a new semantic
-for the driver, but makes it consistent with the rest of the watchdog
-drivers in Linux.
-
-
-Panic Timeouts
---------------
-
-The OpenIPMI driver supports the ability to put semi-custom and custom
-events in the system event log if a panic occurs.  if you enable the
-'Generate a panic event to all BMCs on a panic' option, you will get
-one event on a panic in a standard IPMI event format.  If you enable
-the 'Generate OEM events containing the panic string' option, you will
-also get a bunch of OEM events holding the panic string.
-
-
-The field settings of the events are:
-
-* Generator ID: 0x21 (kernel)
-* EvM Rev: 0x03 (this event is formatting in IPMI 1.0 format)
-* Sensor Type: 0x20 (OS critical stop sensor)
-* Sensor #: The first byte of the panic string (0 if no panic string)
-* Event Dir | Event Type: 0x6f (Assertion, sensor-specific event info)
-* Event Data 1: 0xa1 (Runtime stop in OEM bytes 2 and 3)
-* Event data 2: second byte of panic string
-* Event data 3: third byte of panic string
-
-See the IPMI spec for the details of the event layout.  This event is
-always sent to the local management controller.  It will handle routing
-the message to the right place
-
-Other OEM events have the following format:
-
-* Record ID (bytes 0-1): Set by the SEL.
-* Record type (byte 2): 0xf0 (OEM non-timestamped)
-* byte 3: The slave address of the card saving the panic
-* byte 4: A sequence number (starting at zero)
-  The rest of the bytes (11 bytes) are the panic string.  If the panic string
-  is longer than 11 bytes, multiple messages will be sent with increasing
-  sequence numbers.
-
-Because you cannot send OEM events using the standard interface, this
-function will attempt to find an SEL and add the events there.  It
-will first query the capabilities of the local management controller.
-If it has an SEL, then they will be stored in the SEL of the local
-management controller.  If not, and the local management controller is
-an event generator, the event receiver from the local management
-controller will be queried and the events sent to the SEL on that
-device.  Otherwise, the events go nowhere since there is nowhere to
-send them.
-
-
-Poweroff
---------
-
-If the poweroff capability is selected, the IPMI driver will install
-a shutdown function into the standard poweroff function pointer.  This
-is in the ipmi_poweroff module.  When the system requests a powerdown,
-it will send the proper IPMI commands to do this.  This is supported on
-several platforms.
-
-There is a module parameter named "poweroff_powercycle" that may
-either be zero (do a power down) or non-zero (do a power cycle, power
-the system off, then power it on in a few seconds).  Setting
-ipmi_poweroff.poweroff_control=x will do the same thing on the kernel
-command line.  The parameter is also available via the proc filesystem
-in /proc/sys/dev/ipmi/poweroff_powercycle.  Note that if the system
-does not support power cycling, it will always do the power off.
-
-The "ifnum_to_use" parameter specifies which interface the poweroff
-code should use.  The default is -1, which means to pick the first one
-registered.
-
-Note that if you have ACPI enabled, the system will prefer using ACPI to
-power off.
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index d4e78cb3ef4d..20c431c8e7be 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -39,6 +39,7 @@ available subsections can be seen below.
    spi
    i2c
    ipmb
+   ipmi
    i3c/index
    interconnect
    devfreq
diff --git a/Documentation/driver-api/ipmi.rst b/Documentation/driver-api/ipmi.rst
new file mode 100644
index 000000000000..5ef1047e2e66
--- /dev/null
+++ b/Documentation/driver-api/ipmi.rst
@@ -0,0 +1,746 @@
+=====================
+The Linux IPMI Driver
+=====================
+
+:Author: Corey Minyard <minyard@mvista.com> / <minyard@acm.org>
+
+The Intelligent Platform Management Interface, or IPMI, is a
+standard for controlling intelligent devices that monitor a system.
+It provides for dynamic discovery of sensors in the system and the
+ability to monitor the sensors and be informed when the sensor's
+values change or go outside certain boundaries.  It also has a
+standardized database for field-replaceable units (FRUs) and a watchdog
+timer.
+
+To use this, you need an interface to an IPMI controller in your
+system (called a Baseboard Management Controller, or BMC) and
+management software that can use the IPMI system.
+
+This document describes how to use the IPMI driver for Linux.  If you
+are not familiar with IPMI itself, see the web site at
+http://www.intel.com/design/servers/ipmi/index.htm.  IPMI is a big
+subject and I can't cover it all here!
+
+Configuration
+-------------
+
+The Linux IPMI driver is modular, which means you have to pick several
+things to have it work right depending on your hardware.  Most of
+these are available in the 'Character Devices' menu then the IPMI
+menu.
+
+No matter what, you must pick 'IPMI top-level message handler' to use
+IPMI.  What you do beyond that depends on your needs and hardware.
+
+The message handler does not provide any user-level interfaces.
+Kernel code (like the watchdog) can still use it.  If you need access
+from userland, you need to select 'Device interface for IPMI' if you
+want access through a device driver.
+
+The driver interface depends on your hardware.  If your system
+properly provides the SMBIOS info for IPMI, the driver will detect it
+and just work.  If you have a board with a standard interface (These
+will generally be either "KCS", "SMIC", or "BT", consult your hardware
+manual), choose the 'IPMI SI handler' option.  A driver also exists
+for direct I2C access to the IPMI management controller.  Some boards
+support this, but it is unknown if it will work on every board.  For
+this, choose 'IPMI SMBus handler', but be ready to try to do some
+figuring to see if it will work on your system if the SMBIOS/APCI
+information is wrong or not present.  It is fairly safe to have both
+these enabled and let the drivers auto-detect what is present.
+
+You should generally enable ACPI on your system, as systems with IPMI
+can have ACPI tables describing them.
+
+If you have a standard interface and the board manufacturer has done
+their job correctly, the IPMI controller should be automatically
+detected (via ACPI or SMBIOS tables) and should just work.  Sadly,
+many boards do not have this information.  The driver attempts
+standard defaults, but they may not work.  If you fall into this
+situation, you need to read the section below named 'The SI Driver' or
+"The SMBus Driver" on how to hand-configure your system.
+
+IPMI defines a standard watchdog timer.  You can enable this with the
+'IPMI Watchdog Timer' config option.  If you compile the driver into
+the kernel, then via a kernel command-line option you can have the
+watchdog timer start as soon as it initializes.  It also have a lot
+of other options, see the 'Watchdog' section below for more details.
+Note that you can also have the watchdog continue to run if it is
+closed (by default it is disabled on close).  Go into the 'Watchdog
+Cards' menu, enable 'Watchdog Timer Support', and enable the option
+'Disable watchdog shutdown on close'.
+
+IPMI systems can often be powered off using IPMI commands.  Select
+'IPMI Poweroff' to do this.  The driver will auto-detect if the system
+can be powered off by IPMI.  It is safe to enable this even if your
+system doesn't support this option.  This works on ATCA systems, the
+Radisys CPI1 card, and any IPMI system that supports standard chassis
+management commands.
+
+If you want the driver to put an event into the event log on a panic,
+enable the 'Generate a panic event to all BMCs on a panic' option.  If
+you want the whole panic string put into the event log using OEM
+events, enable the 'Generate OEM events containing the panic string'
+option.  You can also enable these dynamically by setting the module
+parameter named "panic_op" in the ipmi_msghandler module to "event"
+or "string".  Setting that parameter to "none" disables this function.
+
+Basic Design
+------------
+
+The Linux IPMI driver is designed to be very modular and flexible, you
+only need to take the pieces you need and you can use it in many
+different ways.  Because of that, it's broken into many chunks of
+code.  These chunks (by module name) are:
+
+ipmi_msghandler - This is the central piece of software for the IPMI
+system.  It handles all messages, message timing, and responses.  The
+IPMI users tie into this, and the IPMI physical interfaces (called
+System Management Interfaces, or SMIs) also tie in here.  This
+provides the kernelland interface for IPMI, but does not provide an
+interface for use by application processes.
+
+ipmi_devintf - This provides a userland IOCTL interface for the IPMI
+driver, each open file for this device ties in to the message handler
+as an IPMI user.
+
+ipmi_si - A driver for various system interfaces.  This supports KCS,
+SMIC, and BT interfaces.  Unless you have an SMBus interface or your
+own custom interface, you probably need to use this.
+
+ipmi_ssif - A driver for accessing BMCs on the SMBus. It uses the
+I2C kernel driver's SMBus interfaces to send and receive IPMI messages
+over the SMBus.
+
+ipmi_powernv - A driver for access BMCs on POWERNV systems.
+
+ipmi_watchdog - IPMI requires systems to have a very capable watchdog
+timer.  This driver implements the standard Linux watchdog timer
+interface on top of the IPMI message handler.
+
+ipmi_poweroff - Some systems support the ability to be turned off via
+IPMI commands.
+
+bt-bmc - This is not part of the main driver, but instead a driver for
+accessing a BMC-side interface of a BT interface.  It is used on BMCs
+running Linux to provide an interface to the host.
+
+These are all individually selectable via configuration options.
+
+Much documentation for the interface is in the include files.  The
+IPMI include files are:
+
+linux/ipmi.h - Contains the user interface and IOCTL interface for IPMI.
+
+linux/ipmi_smi.h - Contains the interface for system management interfaces
+(things that interface to IPMI controllers) to use.
+
+linux/ipmi_msgdefs.h - General definitions for base IPMI messaging.
+
+
+Addressing
+----------
+
+The IPMI addressing works much like IP addresses, you have an overlay
+to handle the different address types.  The overlay is::
+
+  struct ipmi_addr
+  {
+	int   addr_type;
+	short channel;
+	char  data[IPMI_MAX_ADDR_SIZE];
+  };
+
+The addr_type determines what the address really is.  The driver
+currently understands two different types of addresses.
+
+"System Interface" addresses are defined as::
+
+  struct ipmi_system_interface_addr
+  {
+	int   addr_type;
+	short channel;
+  };
+
+and the type is IPMI_SYSTEM_INTERFACE_ADDR_TYPE.  This is used for talking
+straight to the BMC on the current card.  The channel must be
+IPMI_BMC_CHANNEL.
+
+Messages that are destined to go out on the IPMB bus use the
+IPMI_IPMB_ADDR_TYPE address type.  The format is::
+
+  struct ipmi_ipmb_addr
+  {
+	int           addr_type;
+	short         channel;
+	unsigned char slave_addr;
+	unsigned char lun;
+  };
+
+The "channel" here is generally zero, but some devices support more
+than one channel, it corresponds to the channel as defined in the IPMI
+spec.
+
+
+Messages
+--------
+
+Messages are defined as::
+
+  struct ipmi_msg
+  {
+	unsigned char netfn;
+	unsigned char lun;
+	unsigned char cmd;
+	unsigned char *data;
+	int           data_len;
+  };
+
+The driver takes care of adding/stripping the header information.  The
+data portion is just the data to be send (do NOT put addressing info
+here) or the response.  Note that the completion code of a response is
+the first item in "data", it is not stripped out because that is how
+all the messages are defined in the spec (and thus makes counting the
+offsets a little easier :-).
+
+When using the IOCTL interface from userland, you must provide a block
+of data for "data", fill it, and set data_len to the length of the
+block of data, even when receiving messages.  Otherwise the driver
+will have no place to put the message.
+
+Messages coming up from the message handler in kernelland will come in
+as::
+
+  struct ipmi_recv_msg
+  {
+	struct list_head link;
+
+	/* The type of message as defined in the "Receive Types"
+           defines above. */
+	int         recv_type;
+
+	ipmi_user_t      *user;
+	struct ipmi_addr addr;
+	long             msgid;
+	struct ipmi_msg  msg;
+
+	/* Call this when done with the message.  It will presumably free
+	   the message and do any other necessary cleanup. */
+	void (*done)(struct ipmi_recv_msg *msg);
+
+	/* Place-holder for the data, don't make any assumptions about
+	   the size or existence of this, since it may change. */
+	unsigned char   msg_data[IPMI_MAX_MSG_LENGTH];
+  };
+
+You should look at the receive type and handle the message
+appropriately.
+
+
+The Upper Layer Interface (Message Handler)
+-------------------------------------------
+
+The upper layer of the interface provides the users with a consistent
+view of the IPMI interfaces.  It allows multiple SMI interfaces to be
+addressed (because some boards actually have multiple BMCs on them)
+and the user should not have to care what type of SMI is below them.
+
+
+Watching For Interfaces
+^^^^^^^^^^^^^^^^^^^^^^^
+
+When your code comes up, the IPMI driver may or may not have detected
+if IPMI devices exist.  So you might have to defer your setup until
+the device is detected, or you might be able to do it immediately.
+To handle this, and to allow for discovery, you register an SMI
+watcher with ipmi_smi_watcher_register() to iterate over interfaces
+and tell you when they come and go.
+
+
+Creating the User
+^^^^^^^^^^^^^^^^^
+
+To use the message handler, you must first create a user using
+ipmi_create_user.  The interface number specifies which SMI you want
+to connect to, and you must supply callback functions to be called
+when data comes in.  The callback function can run at interrupt level,
+so be careful using the callbacks.  This also allows to you pass in a
+piece of data, the handler_data, that will be passed back to you on
+all calls.
+
+Once you are done, call ipmi_destroy_user() to get rid of the user.
+
+From userland, opening the device automatically creates a user, and
+closing the device automatically destroys the user.
+
+
+Messaging
+^^^^^^^^^
+
+To send a message from kernel-land, the ipmi_request_settime() call does
+pretty much all message handling.  Most of the parameter are
+self-explanatory.  However, it takes a "msgid" parameter.  This is NOT
+the sequence number of messages.  It is simply a long value that is
+passed back when the response for the message is returned.  You may
+use it for anything you like.
+
+Responses come back in the function pointed to by the ipmi_recv_hndl
+field of the "handler" that you passed in to ipmi_create_user().
+Remember again, these may be running at interrupt level.  Remember to
+look at the receive type, too.
+
+From userland, you fill out an ipmi_req_t structure and use the
+IPMICTL_SEND_COMMAND ioctl.  For incoming stuff, you can use select()
+or poll() to wait for messages to come in.  However, you cannot use
+read() to get them, you must call the IPMICTL_RECEIVE_MSG with the
+ipmi_recv_t structure to actually get the message.  Remember that you
+must supply a pointer to a block of data in the msg.data field, and
+you must fill in the msg.data_len field with the size of the data.
+This gives the receiver a place to actually put the message.
+
+If the message cannot fit into the data you provide, you will get an
+EMSGSIZE error and the driver will leave the data in the receive
+queue.  If you want to get it and have it truncate the message, us
+the IPMICTL_RECEIVE_MSG_TRUNC ioctl.
+
+When you send a command (which is defined by the lowest-order bit of
+the netfn per the IPMI spec) on the IPMB bus, the driver will
+automatically assign the sequence number to the command and save the
+command.  If the response is not receive in the IPMI-specified 5
+seconds, it will generate a response automatically saying the command
+timed out.  If an unsolicited response comes in (if it was after 5
+seconds, for instance), that response will be ignored.
+
+In kernelland, after you receive a message and are done with it, you
+MUST call ipmi_free_recv_msg() on it, or you will leak messages.  Note
+that you should NEVER mess with the "done" field of a message, that is
+required to properly clean up the message.
+
+Note that when sending, there is an ipmi_request_supply_msgs() call
+that lets you supply the smi and receive message.  This is useful for
+pieces of code that need to work even if the system is out of buffers
+(the watchdog timer uses this, for instance).  You supply your own
+buffer and own free routines.  This is not recommended for normal use,
+though, since it is tricky to manage your own buffers.
+
+
+Events and Incoming Commands
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The driver takes care of polling for IPMI events and receiving
+commands (commands are messages that are not responses, they are
+commands that other things on the IPMB bus have sent you).  To receive
+these, you must register for them, they will not automatically be sent
+to you.
+
+To receive events, you must call ipmi_set_gets_events() and set the
+"val" to non-zero.  Any events that have been received by the driver
+since startup will immediately be delivered to the first user that
+registers for events.  After that, if multiple users are registered
+for events, they will all receive all events that come in.
+
+For receiving commands, you have to individually register commands you
+want to receive.  Call ipmi_register_for_cmd() and supply the netfn
+and command name for each command you want to receive.  You also
+specify a bitmask of the channels you want to receive the command from
+(or use IPMI_CHAN_ALL for all channels if you don't care).  Only one
+user may be registered for each netfn/cmd/channel, but different users
+may register for different commands, or the same command if the
+channel bitmasks do not overlap.
+
+From userland, equivalent IOCTLs are provided to do these functions.
+
+
+The Lower Layer (SMI) Interface
+-------------------------------
+
+As mentioned before, multiple SMI interfaces may be registered to the
+message handler, each of these is assigned an interface number when
+they register with the message handler.  They are generally assigned
+in the order they register, although if an SMI unregisters and then
+another one registers, all bets are off.
+
+The ipmi_smi.h defines the interface for management interfaces, see
+that for more details.
+
+
+The SI Driver
+-------------
+
+The SI driver allows KCS, BT, and SMIC interfaces to be configured
+in the system.  It discovers interfaces through a host of different
+methods, depending on the system.
+
+You can specify up to four interfaces on the module load line and
+control some module parameters::
+
+  modprobe ipmi_si.o type=<type1>,<type2>....
+       ports=<port1>,<port2>... addrs=<addr1>,<addr2>...
+       irqs=<irq1>,<irq2>...
+       regspacings=<sp1>,<sp2>,... regsizes=<size1>,<size2>,...
+       regshifts=<shift1>,<shift2>,...
+       slave_addrs=<addr1>,<addr2>,...
+       force_kipmid=<enable1>,<enable2>,...
+       kipmid_max_busy_us=<ustime1>,<ustime2>,...
+       unload_when_empty=[0|1]
+       trydmi=[0|1] tryacpi=[0|1]
+       tryplatform=[0|1] trypci=[0|1]
+
+Each of these except try... items is a list, the first item for the
+first interface, second item for the second interface, etc.
+
+The si_type may be either "kcs", "smic", or "bt".  If you leave it blank, it
+defaults to "kcs".
+
+If you specify addrs as non-zero for an interface, the driver will
+use the memory address given as the address of the device.  This
+overrides si_ports.
+
+If you specify ports as non-zero for an interface, the driver will
+use the I/O port given as the device address.
+
+If you specify irqs as non-zero for an interface, the driver will
+attempt to use the given interrupt for the device.
+
+The other try... items disable discovery by their corresponding
+names.  These are all enabled by default, set them to zero to disable
+them.  The tryplatform disables openfirmware.
+
+The next three parameters have to do with register layout.  The
+registers used by the interfaces may not appear at successive
+locations and they may not be in 8-bit registers.  These parameters
+allow the layout of the data in the registers to be more precisely
+specified.
+
+The regspacings parameter give the number of bytes between successive
+register start addresses.  For instance, if the regspacing is set to 4
+and the start address is 0xca2, then the address for the second
+register would be 0xca6.  This defaults to 1.
+
+The regsizes parameter gives the size of a register, in bytes.  The
+data used by IPMI is 8-bits wide, but it may be inside a larger
+register.  This parameter allows the read and write type to specified.
+It may be 1, 2, 4, or 8.  The default is 1.
+
+Since the register size may be larger than 32 bits, the IPMI data may not
+be in the lower 8 bits.  The regshifts parameter give the amount to shift
+the data to get to the actual IPMI data.
+
+The slave_addrs specifies the IPMI address of the local BMC.  This is
+usually 0x20 and the driver defaults to that, but in case it's not, it
+can be specified when the driver starts up.
+
+The force_ipmid parameter forcefully enables (if set to 1) or disables
+(if set to 0) the kernel IPMI daemon.  Normally this is auto-detected
+by the driver, but systems with broken interrupts might need an enable,
+or users that don't want the daemon (don't need the performance, don't
+want the CPU hit) can disable it.
+
+If unload_when_empty is set to 1, the driver will be unloaded if it
+doesn't find any interfaces or all the interfaces fail to work.  The
+default is one.  Setting to 0 is useful with the hotmod, but is
+obviously only useful for modules.
+
+When compiled into the kernel, the parameters can be specified on the
+kernel command line as::
+
+  ipmi_si.type=<type1>,<type2>...
+       ipmi_si.ports=<port1>,<port2>... ipmi_si.addrs=<addr1>,<addr2>...
+       ipmi_si.irqs=<irq1>,<irq2>...
+       ipmi_si.regspacings=<sp1>,<sp2>,...
+       ipmi_si.regsizes=<size1>,<size2>,...
+       ipmi_si.regshifts=<shift1>,<shift2>,...
+       ipmi_si.slave_addrs=<addr1>,<addr2>,...
+       ipmi_si.force_kipmid=<enable1>,<enable2>,...
+       ipmi_si.kipmid_max_busy_us=<ustime1>,<ustime2>,...
+
+It works the same as the module parameters of the same names.
+
+If your IPMI interface does not support interrupts and is a KCS or
+SMIC interface, the IPMI driver will start a kernel thread for the
+interface to help speed things up.  This is a low-priority kernel
+thread that constantly polls the IPMI driver while an IPMI operation
+is in progress.  The force_kipmid module parameter will all the user to
+force this thread on or off.  If you force it off and don't have
+interrupts, the driver will run VERY slowly.  Don't blame me,
+these interfaces suck.
+
+Unfortunately, this thread can use a lot of CPU depending on the
+interface's performance.  This can waste a lot of CPU and cause
+various issues with detecting idle CPU and using extra power.  To
+avoid this, the kipmid_max_busy_us sets the maximum amount of time, in
+microseconds, that kipmid will spin before sleeping for a tick.  This
+value sets a balance between performance and CPU waste and needs to be
+tuned to your needs.  Maybe, someday, auto-tuning will be added, but
+that's not a simple thing and even the auto-tuning would need to be
+tuned to the user's desired performance.
+
+The driver supports a hot add and remove of interfaces.  This way,
+interfaces can be added or removed after the kernel is up and running.
+This is done using /sys/modules/ipmi_si/parameters/hotmod, which is a
+write-only parameter.  You write a string to this interface.  The string
+has the format::
+
+   <op1>[:op2[:op3...]]
+
+The "op"s are::
+
+   add|remove,kcs|bt|smic,mem|i/o,<address>[,<opt1>[,<opt2>[,...]]]
+
+You can specify more than one interface on the line.  The "opt"s are::
+
+   rsp=<regspacing>
+   rsi=<regsize>
+   rsh=<regshift>
+   irq=<irq>
+   ipmb=<ipmb slave addr>
+
+and these have the same meanings as discussed above.  Note that you
+can also use this on the kernel command line for a more compact format
+for specifying an interface.  Note that when removing an interface,
+only the first three parameters (si type, address type, and address)
+are used for the comparison.  Any options are ignored for removing.
+
+The SMBus Driver (SSIF)
+-----------------------
+
+The SMBus driver allows up to 4 SMBus devices to be configured in the
+system.  By default, the driver will only register with something it
+finds in DMI or ACPI tables.  You can change this
+at module load time (for a module) with::
+
+  modprobe ipmi_ssif.o
+	addr=<i2caddr1>[,<i2caddr2>[,...]]
+	adapter=<adapter1>[,<adapter2>[...]]
+	dbg=<flags1>,<flags2>...
+	slave_addrs=<addr1>,<addr2>,...
+	tryacpi=[0|1] trydmi=[0|1]
+	[dbg_probe=1]
+
+The addresses are normal I2C addresses.  The adapter is the string
+name of the adapter, as shown in /sys/class/i2c-adapter/i2c-<n>/name.
+It is *NOT* i2c-<n> itself.  Also, the comparison is done ignoring
+spaces, so if the name is "This is an I2C chip" you can say
+adapter_name=ThisisanI2cchip.  This is because it's hard to pass in
+spaces in kernel parameters.
+
+The debug flags are bit flags for each BMC found, they are:
+IPMI messages: 1, driver state: 2, timing: 4, I2C probe: 8
+
+The tryxxx parameters can be used to disable detecting interfaces
+from various sources.
+
+Setting dbg_probe to 1 will enable debugging of the probing and
+detection process for BMCs on the SMBusses.
+
+The slave_addrs specifies the IPMI address of the local BMC.  This is
+usually 0x20 and the driver defaults to that, but in case it's not, it
+can be specified when the driver starts up.
+
+Discovering the IPMI compliant BMC on the SMBus can cause devices on
+the I2C bus to fail. The SMBus driver writes a "Get Device ID" IPMI
+message as a block write to the I2C bus and waits for a response.
+This action can be detrimental to some I2C devices. It is highly
+recommended that the known I2C address be given to the SMBus driver in
+the smb_addr parameter unless you have DMI or ACPI data to tell the
+driver what to use.
+
+When compiled into the kernel, the addresses can be specified on the
+kernel command line as::
+
+  ipmb_ssif.addr=<i2caddr1>[,<i2caddr2>[...]]
+	ipmi_ssif.adapter=<adapter1>[,<adapter2>[...]]
+	ipmi_ssif.dbg=<flags1>[,<flags2>[...]]
+	ipmi_ssif.dbg_probe=1
+	ipmi_ssif.slave_addrs=<addr1>[,<addr2>[...]]
+	ipmi_ssif.tryacpi=[0|1] ipmi_ssif.trydmi=[0|1]
+
+These are the same options as on the module command line.
+
+The I2C driver does not support non-blocking access or polling, so
+this driver cannod to IPMI panic events, extend the watchdog at panic
+time, or other panic-related IPMI functions without special kernel
+patches and driver modifications.  You can get those at the openipmi
+web page.
+
+The driver supports a hot add and remove of interfaces through the I2C
+sysfs interface.
+
+Other Pieces
+------------
+
+Get the detailed info related with the IPMI device
+--------------------------------------------------
+
+Some users need more detailed information about a device, like where
+the address came from or the raw base device for the IPMI interface.
+You can use the IPMI smi_watcher to catch the IPMI interfaces as they
+come or go, and to grab the information, you can use the function
+ipmi_get_smi_info(), which returns the following structure::
+
+  struct ipmi_smi_info {
+	enum ipmi_addr_src addr_src;
+	struct device *dev;
+	union {
+		struct {
+			void *acpi_handle;
+		} acpi_info;
+	} addr_info;
+  };
+
+Currently special info for only for SI_ACPI address sources is
+returned.  Others may be added as necessary.
+
+Note that the dev pointer is included in the above structure, and
+assuming ipmi_smi_get_info returns success, you must call put_device
+on the dev pointer.
+
+
+Watchdog
+--------
+
+A watchdog timer is provided that implements the Linux-standard
+watchdog timer interface.  It has three module parameters that can be
+used to control it::
+
+  modprobe ipmi_watchdog timeout=<t> pretimeout=<t> action=<action type>
+      preaction=<preaction type> preop=<preop type> start_now=x
+      nowayout=x ifnum_to_use=n panic_wdt_timeout=<t>
+
+ifnum_to_use specifies which interface the watchdog timer should use.
+The default is -1, which means to pick the first one registered.
+
+The timeout is the number of seconds to the action, and the pretimeout
+is the amount of seconds before the reset that the pre-timeout panic will
+occur (if pretimeout is zero, then pretimeout will not be enabled).  Note
+that the pretimeout is the time before the final timeout.  So if the
+timeout is 50 seconds and the pretimeout is 10 seconds, then the pretimeout
+will occur in 40 second (10 seconds before the timeout). The panic_wdt_timeout
+is the value of timeout which is set on kernel panic, in order to let actions
+such as kdump to occur during panic.
+
+The action may be "reset", "power_cycle", or "power_off", and
+specifies what to do when the timer times out, and defaults to
+"reset".
+
+The preaction may be "pre_smi" for an indication through the SMI
+interface, "pre_int" for an indication through the SMI with an
+interrupts, and "pre_nmi" for a NMI on a preaction.  This is how
+the driver is informed of the pretimeout.
+
+The preop may be set to "preop_none" for no operation on a pretimeout,
+"preop_panic" to set the preoperation to panic, or "preop_give_data"
+to provide data to read from the watchdog device when the pretimeout
+occurs.  A "pre_nmi" setting CANNOT be used with "preop_give_data"
+because you can't do data operations from an NMI.
+
+When preop is set to "preop_give_data", one byte comes ready to read
+on the device when the pretimeout occurs.  Select and fasync work on
+the device, as well.
+
+If start_now is set to 1, the watchdog timer will start running as
+soon as the driver is loaded.
+
+If nowayout is set to 1, the watchdog timer will not stop when the
+watchdog device is closed.  The default value of nowayout is true
+if the CONFIG_WATCHDOG_NOWAYOUT option is enabled, or false if not.
+
+When compiled into the kernel, the kernel command line is available
+for configuring the watchdog::
+
+  ipmi_watchdog.timeout=<t> ipmi_watchdog.pretimeout=<t>
+	ipmi_watchdog.action=<action type>
+	ipmi_watchdog.preaction=<preaction type>
+	ipmi_watchdog.preop=<preop type>
+	ipmi_watchdog.start_now=x
+	ipmi_watchdog.nowayout=x
+	ipmi_watchdog.panic_wdt_timeout=<t>
+
+The options are the same as the module parameter options.
+
+The watchdog will panic and start a 120 second reset timeout if it
+gets a pre-action.  During a panic or a reboot, the watchdog will
+start a 120 timer if it is running to make sure the reboot occurs.
+
+Note that if you use the NMI preaction for the watchdog, you MUST NOT
+use the nmi watchdog.  There is no reasonable way to tell if an NMI
+comes from the IPMI controller, so it must assume that if it gets an
+otherwise unhandled NMI, it must be from IPMI and it will panic
+immediately.
+
+Once you open the watchdog timer, you must write a 'V' character to the
+device to close it, or the timer will not stop.  This is a new semantic
+for the driver, but makes it consistent with the rest of the watchdog
+drivers in Linux.
+
+
+Panic Timeouts
+--------------
+
+The OpenIPMI driver supports the ability to put semi-custom and custom
+events in the system event log if a panic occurs.  if you enable the
+'Generate a panic event to all BMCs on a panic' option, you will get
+one event on a panic in a standard IPMI event format.  If you enable
+the 'Generate OEM events containing the panic string' option, you will
+also get a bunch of OEM events holding the panic string.
+
+
+The field settings of the events are:
+
+* Generator ID: 0x21 (kernel)
+* EvM Rev: 0x03 (this event is formatting in IPMI 1.0 format)
+* Sensor Type: 0x20 (OS critical stop sensor)
+* Sensor #: The first byte of the panic string (0 if no panic string)
+* Event Dir | Event Type: 0x6f (Assertion, sensor-specific event info)
+* Event Data 1: 0xa1 (Runtime stop in OEM bytes 2 and 3)
+* Event data 2: second byte of panic string
+* Event data 3: third byte of panic string
+
+See the IPMI spec for the details of the event layout.  This event is
+always sent to the local management controller.  It will handle routing
+the message to the right place
+
+Other OEM events have the following format:
+
+* Record ID (bytes 0-1): Set by the SEL.
+* Record type (byte 2): 0xf0 (OEM non-timestamped)
+* byte 3: The slave address of the card saving the panic
+* byte 4: A sequence number (starting at zero)
+  The rest of the bytes (11 bytes) are the panic string.  If the panic string
+  is longer than 11 bytes, multiple messages will be sent with increasing
+  sequence numbers.
+
+Because you cannot send OEM events using the standard interface, this
+function will attempt to find an SEL and add the events there.  It
+will first query the capabilities of the local management controller.
+If it has an SEL, then they will be stored in the SEL of the local
+management controller.  If not, and the local management controller is
+an event generator, the event receiver from the local management
+controller will be queried and the events sent to the SEL on that
+device.  Otherwise, the events go nowhere since there is nowhere to
+send them.
+
+
+Poweroff
+--------
+
+If the poweroff capability is selected, the IPMI driver will install
+a shutdown function into the standard poweroff function pointer.  This
+is in the ipmi_poweroff module.  When the system requests a powerdown,
+it will send the proper IPMI commands to do this.  This is supported on
+several platforms.
+
+There is a module parameter named "poweroff_powercycle" that may
+either be zero (do a power down) or non-zero (do a power cycle, power
+the system off, then power it on in a few seconds).  Setting
+ipmi_poweroff.poweroff_control=x will do the same thing on the kernel
+command line.  The parameter is also available via the proc filesystem
+in /proc/sys/dev/ipmi/poweroff_powercycle.  Note that if the system
+does not support power cycling, it will always do the power off.
+
+The "ifnum_to_use" parameter specifies which interface the poweroff
+code should use.  The default is -1, which means to pick the first one
+registered.
+
+Note that if you have ACPI enabled, the system will prefer using ACPI to
+power off.
-- 
cgit v1.2.3


From 283b69bf7948c2470b5b7085bf291f834f3b9d95 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 17:37:49 +0200
Subject: docs: fix references for ipmi.rst file

As this file got moved, fix references for it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/b6c1ded590f27198de15b16237509128e55fa810.1588345503.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 MAINTAINERS                        | 2 +-
 drivers/char/ipmi/Kconfig          | 2 +-
 drivers/char/ipmi/ipmi_si_hotmod.c | 2 +-
 drivers/char/ipmi/ipmi_si_intf.c   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index ffd14379f81e..e4d2e2f01c7f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8910,7 +8910,7 @@ M:	Corey Minyard <minyard@acm.org>
 L:	openipmi-developer@lists.sourceforge.net (moderated for non-subscribers)
 S:	Supported
 W:	http://openipmi.sourceforge.net/
-F:	Documentation/IPMI.txt
+F:	Documentation/driver-api/ipmi.rst
 F:	Documentation/devicetree/bindings/ipmi/
 F:	drivers/char/ipmi/
 F:	include/linux/ipmi*
diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig
index 7dc2c3ec4051..07847d9a459a 100644
--- a/drivers/char/ipmi/Kconfig
+++ b/drivers/char/ipmi/Kconfig
@@ -14,7 +14,7 @@ menuconfig IPMI_HANDLER
 	  IPMI is a standard for managing sensors (temperature,
 	  voltage, etc.) in a system.
 
-	  See <file:Documentation/IPMI.txt> for more details on the driver.
+	  See <file:Documentation/driver-api/ipmi.rst> for more details on the driver.
 
 	  If unsure, say N.
 
diff --git a/drivers/char/ipmi/ipmi_si_hotmod.c b/drivers/char/ipmi/ipmi_si_hotmod.c
index 42a925f8cf69..4fbb4e18bae2 100644
--- a/drivers/char/ipmi/ipmi_si_hotmod.c
+++ b/drivers/char/ipmi/ipmi_si_hotmod.c
@@ -18,7 +18,7 @@ static int hotmod_handler(const char *val, const struct kernel_param *kp);
 
 module_param_call(hotmod, hotmod_handler, NULL, NULL, 0200);
 MODULE_PARM_DESC(hotmod, "Add and remove interfaces.  See"
-		 " Documentation/IPMI.txt in the kernel sources for the"
+		 " Documentation/driver-api/ipmi.rst in the kernel sources for the"
 		 " gory details.");
 
 /*
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index c7cc8538b84a..77b8d551ae7f 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -968,7 +968,7 @@ static inline bool ipmi_thread_busy_wait(enum si_sm_result smi_result,
  * that are not BT and do not have interrupts.  It starts spinning
  * when an operation is complete or until max_busy tells it to stop
  * (if that is enabled).  See the paragraph on kimid_max_busy_us in
- * Documentation/IPMI.txt for details.
+ * Documentation/driver-api/ipmi.rst for details.
  */
 static int ipmi_thread(void *data)
 {
-- 
cgit v1.2.3


From a74e2a226452ea75d26b1f83860bff91a11da1ac Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 17:37:50 +0200
Subject: docs: debugging-via-ohci1394.txt: add it to the core-api book

There is an special chapter inside the core-api book about
some debug infrastructure like tracepoints and debug objects.

It sounded to me that this is the best place to add a chapter
explaining how to use a FireWire controller to do remote
kernel debugging, as explained on this document.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/9b489d36d08ad89d3ad5aefef1f52a0715b29716.1588345503.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/kernel-parameters.txt   |   2 +-
 Documentation/core-api/debugging-via-ohci1394.rst | 185 ++++++++++++++++++++++
 Documentation/core-api/index.rst                  |   1 +
 Documentation/debugging-via-ohci1394.txt          | 185 ----------------------
 lib/Kconfig.debug                                 |   2 +-
 5 files changed, 188 insertions(+), 187 deletions(-)
 create mode 100644 Documentation/core-api/debugging-via-ohci1394.rst
 delete mode 100644 Documentation/debugging-via-ohci1394.txt

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f2a93c8679e8..d81d29b5dda7 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3329,7 +3329,7 @@
 			See Documentation/admin-guide/sysctl/vm.rst for details.
 
 	ohci1394_dma=early	[HW] enable debugging via the ohci1394 driver.
-			See Documentation/debugging-via-ohci1394.txt for more
+			See Documentation/core-api/debugging-via-ohci1394.rst for more
 			info.
 
 	olpc_ec_timeout= [OLPC] ms delay when issuing EC commands
diff --git a/Documentation/core-api/debugging-via-ohci1394.rst b/Documentation/core-api/debugging-via-ohci1394.rst
new file mode 100644
index 000000000000..981ad4f89fd3
--- /dev/null
+++ b/Documentation/core-api/debugging-via-ohci1394.rst
@@ -0,0 +1,185 @@
+===========================================================================
+Using physical DMA provided by OHCI-1394 FireWire controllers for debugging
+===========================================================================
+
+Introduction
+------------
+
+Basically all FireWire controllers which are in use today are compliant
+to the OHCI-1394 specification which defines the controller to be a PCI
+bus master which uses DMA to offload data transfers from the CPU and has
+a "Physical Response Unit" which executes specific requests by employing
+PCI-Bus master DMA after applying filters defined by the OHCI-1394 driver.
+
+Once properly configured, remote machines can send these requests to
+ask the OHCI-1394 controller to perform read and write requests on
+physical system memory and, for read requests, send the result of
+the physical memory read back to the requester.
+
+With that, it is possible to debug issues by reading interesting memory
+locations such as buffers like the printk buffer or the process table.
+
+Retrieving a full system memory dump is also possible over the FireWire,
+using data transfer rates in the order of 10MB/s or more.
+
+With most FireWire controllers, memory access is limited to the low 4 GB
+of physical address space.  This can be a problem on IA64 machines where
+memory is located mostly above that limit, but it is rarely a problem on
+more common hardware such as x86, x86-64 and PowerPC.
+
+At least LSI FW643e and FW643e2 controllers are known to support access to
+physical addresses above 4 GB, but this feature is currently not enabled by
+Linux.
+
+Together with a early initialization of the OHCI-1394 controller for debugging,
+this facility proved most useful for examining long debugs logs in the printk
+buffer on to debug early boot problems in areas like ACPI where the system
+fails to boot and other means for debugging (serial port) are either not
+available (notebooks) or too slow for extensive debug information (like ACPI).
+
+Drivers
+-------
+
+The firewire-ohci driver in drivers/firewire uses filtered physical
+DMA by default, which is more secure but not suitable for remote debugging.
+Pass the remote_dma=1 parameter to the driver to get unfiltered physical DMA.
+
+Because the firewire-ohci driver depends on the PCI enumeration to be
+completed, an initialization routine which runs pretty early has been
+implemented for x86.  This routine runs long before console_init() can be
+called, i.e. before the printk buffer appears on the console.
+
+To activate it, enable CONFIG_PROVIDE_OHCI1394_DMA_INIT (Kernel hacking menu:
+Remote debugging over FireWire early on boot) and pass the parameter
+"ohci1394_dma=early" to the recompiled kernel on boot.
+
+Tools
+-----
+
+firescope - Originally developed by Benjamin Herrenschmidt, Andi Kleen ported
+it from PowerPC to x86 and x86_64 and added functionality, firescope can now
+be used to view the printk buffer of a remote machine, even with live update.
+
+Bernhard Kaindl enhanced firescope to support accessing 64-bit machines
+from 32-bit firescope and vice versa:
+- http://v3.sk/~lkundrak/firescope/
+
+and he implemented fast system dump (alpha version - read README.txt):
+- http://halobates.de/firewire/firedump-0.1.tar.bz2
+
+There is also a gdb proxy for firewire which allows to use gdb to access
+data which can be referenced from symbols found by gdb in vmlinux:
+- http://halobates.de/firewire/fireproxy-0.33.tar.bz2
+
+The latest version of this gdb proxy (fireproxy-0.34) can communicate (not
+yet stable) with kgdb over an memory-based communication module (kgdbom).
+
+Getting Started
+---------------
+
+The OHCI-1394 specification regulates that the OHCI-1394 controller must
+disable all physical DMA on each bus reset.
+
+This means that if you want to debug an issue in a system state where
+interrupts are disabled and where no polling of the OHCI-1394 controller
+for bus resets takes place, you have to establish any FireWire cable
+connections and fully initialize all FireWire hardware __before__ the
+system enters such state.
+
+Step-by-step instructions for using firescope with early OHCI initialization:
+
+1) Verify that your hardware is supported:
+
+   Load the firewire-ohci module and check your kernel logs.
+   You should see a line similar to::
+
+     firewire_ohci 0000:15:00.1: added OHCI v1.0 device as card 2, 4 IR + 4 IT
+     ... contexts, quirks 0x11
+
+   when loading the driver. If you have no supported controller, many PCI,
+   CardBus and even some Express cards which are fully compliant to OHCI-1394
+   specification are available. If it requires no driver for Windows operating
+   systems, it most likely is. Only specialized shops have cards which are not
+   compliant, they are based on TI PCILynx chips and require drivers for Windows
+   operating systems.
+
+   The mentioned kernel log message contains the string "physUB" if the
+   controller implements a writable Physical Upper Bound register.  This is
+   required for physical DMA above 4 GB (but not utilized by Linux yet).
+
+2) Establish a working FireWire cable connection:
+
+   Any FireWire cable, as long at it provides electrically and mechanically
+   stable connection and has matching connectors (there are small 4-pin and
+   large 6-pin FireWire ports) will do.
+
+   If an driver is running on both machines you should see a line like::
+
+     firewire_core 0000:15:00.1: created device fw1: GUID 00061b0020105917, S400
+
+   on both machines in the kernel log when the cable is plugged in
+   and connects the two machines.
+
+3) Test physical DMA using firescope:
+
+   On the debug host, make sure that /dev/fw* is accessible,
+   then start firescope::
+
+	$ firescope
+	Port 0 (/dev/fw1) opened, 2 nodes detected
+
+	FireScope
+	---------
+	Target : <unspecified>
+	Gen    : 1
+	[Ctrl-T] choose target
+	[Ctrl-H] this menu
+	[Ctrl-Q] quit
+
+    ------> Press Ctrl-T now, the output should be similar to:
+
+	2 nodes available, local node is: 0
+	 0: ffc0, uuid: 00000000 00000000 [LOCAL]
+	 1: ffc1, uuid: 00279000 ba4bb801
+
+   Besides the [LOCAL] node, it must show another node without error message.
+
+4) Prepare for debugging with early OHCI-1394 initialization:
+
+   4.1) Kernel compilation and installation on debug target
+
+   Compile the kernel to be debugged with CONFIG_PROVIDE_OHCI1394_DMA_INIT
+   (Kernel hacking: Provide code for enabling DMA over FireWire early on boot)
+   enabled and install it on the machine to be debugged (debug target).
+
+   4.2) Transfer the System.map of the debugged kernel to the debug host
+
+   Copy the System.map of the kernel be debugged to the debug host (the host
+   which is connected to the debugged machine over the FireWire cable).
+
+5) Retrieving the printk buffer contents:
+
+   With the FireWire cable connected, the OHCI-1394 driver on the debugging
+   host loaded, reboot the debugged machine, booting the kernel which has
+   CONFIG_PROVIDE_OHCI1394_DMA_INIT enabled, with the option ohci1394_dma=early.
+
+   Then, on the debugging host, run firescope, for example by using -A::
+
+	firescope -A System.map-of-debug-target-kernel
+
+   Note: -A automatically attaches to the first non-local node. It only works
+   reliably if only connected two machines are connected using FireWire.
+
+   After having attached to the debug target, press Ctrl-D to view the
+   complete printk buffer or Ctrl-U to enter auto update mode and get an
+   updated live view of recent kernel messages logged on the debug target.
+
+   Call "firescope -h" to get more information on firescope's options.
+
+Notes
+-----
+
+Documentation and specifications: http://halobates.de/firewire/
+
+FireWire is a trademark of Apple Inc. - for more information please refer to:
+https://en.wikipedia.org/wiki/FireWire
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index c00aef843341..2cfd07a34173 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -98,6 +98,7 @@ Interfaces for kernel debugging
 
    debug-objects
    tracepoint
+   debugging-via-ohci1394
 
 Everything else
 ===============
diff --git a/Documentation/debugging-via-ohci1394.txt b/Documentation/debugging-via-ohci1394.txt
deleted file mode 100644
index 981ad4f89fd3..000000000000
--- a/Documentation/debugging-via-ohci1394.txt
+++ /dev/null
@@ -1,185 +0,0 @@
-===========================================================================
-Using physical DMA provided by OHCI-1394 FireWire controllers for debugging
-===========================================================================
-
-Introduction
-------------
-
-Basically all FireWire controllers which are in use today are compliant
-to the OHCI-1394 specification which defines the controller to be a PCI
-bus master which uses DMA to offload data transfers from the CPU and has
-a "Physical Response Unit" which executes specific requests by employing
-PCI-Bus master DMA after applying filters defined by the OHCI-1394 driver.
-
-Once properly configured, remote machines can send these requests to
-ask the OHCI-1394 controller to perform read and write requests on
-physical system memory and, for read requests, send the result of
-the physical memory read back to the requester.
-
-With that, it is possible to debug issues by reading interesting memory
-locations such as buffers like the printk buffer or the process table.
-
-Retrieving a full system memory dump is also possible over the FireWire,
-using data transfer rates in the order of 10MB/s or more.
-
-With most FireWire controllers, memory access is limited to the low 4 GB
-of physical address space.  This can be a problem on IA64 machines where
-memory is located mostly above that limit, but it is rarely a problem on
-more common hardware such as x86, x86-64 and PowerPC.
-
-At least LSI FW643e and FW643e2 controllers are known to support access to
-physical addresses above 4 GB, but this feature is currently not enabled by
-Linux.
-
-Together with a early initialization of the OHCI-1394 controller for debugging,
-this facility proved most useful for examining long debugs logs in the printk
-buffer on to debug early boot problems in areas like ACPI where the system
-fails to boot and other means for debugging (serial port) are either not
-available (notebooks) or too slow for extensive debug information (like ACPI).
-
-Drivers
--------
-
-The firewire-ohci driver in drivers/firewire uses filtered physical
-DMA by default, which is more secure but not suitable for remote debugging.
-Pass the remote_dma=1 parameter to the driver to get unfiltered physical DMA.
-
-Because the firewire-ohci driver depends on the PCI enumeration to be
-completed, an initialization routine which runs pretty early has been
-implemented for x86.  This routine runs long before console_init() can be
-called, i.e. before the printk buffer appears on the console.
-
-To activate it, enable CONFIG_PROVIDE_OHCI1394_DMA_INIT (Kernel hacking menu:
-Remote debugging over FireWire early on boot) and pass the parameter
-"ohci1394_dma=early" to the recompiled kernel on boot.
-
-Tools
------
-
-firescope - Originally developed by Benjamin Herrenschmidt, Andi Kleen ported
-it from PowerPC to x86 and x86_64 and added functionality, firescope can now
-be used to view the printk buffer of a remote machine, even with live update.
-
-Bernhard Kaindl enhanced firescope to support accessing 64-bit machines
-from 32-bit firescope and vice versa:
-- http://v3.sk/~lkundrak/firescope/
-
-and he implemented fast system dump (alpha version - read README.txt):
-- http://halobates.de/firewire/firedump-0.1.tar.bz2
-
-There is also a gdb proxy for firewire which allows to use gdb to access
-data which can be referenced from symbols found by gdb in vmlinux:
-- http://halobates.de/firewire/fireproxy-0.33.tar.bz2
-
-The latest version of this gdb proxy (fireproxy-0.34) can communicate (not
-yet stable) with kgdb over an memory-based communication module (kgdbom).
-
-Getting Started
----------------
-
-The OHCI-1394 specification regulates that the OHCI-1394 controller must
-disable all physical DMA on each bus reset.
-
-This means that if you want to debug an issue in a system state where
-interrupts are disabled and where no polling of the OHCI-1394 controller
-for bus resets takes place, you have to establish any FireWire cable
-connections and fully initialize all FireWire hardware __before__ the
-system enters such state.
-
-Step-by-step instructions for using firescope with early OHCI initialization:
-
-1) Verify that your hardware is supported:
-
-   Load the firewire-ohci module and check your kernel logs.
-   You should see a line similar to::
-
-     firewire_ohci 0000:15:00.1: added OHCI v1.0 device as card 2, 4 IR + 4 IT
-     ... contexts, quirks 0x11
-
-   when loading the driver. If you have no supported controller, many PCI,
-   CardBus and even some Express cards which are fully compliant to OHCI-1394
-   specification are available. If it requires no driver for Windows operating
-   systems, it most likely is. Only specialized shops have cards which are not
-   compliant, they are based on TI PCILynx chips and require drivers for Windows
-   operating systems.
-
-   The mentioned kernel log message contains the string "physUB" if the
-   controller implements a writable Physical Upper Bound register.  This is
-   required for physical DMA above 4 GB (but not utilized by Linux yet).
-
-2) Establish a working FireWire cable connection:
-
-   Any FireWire cable, as long at it provides electrically and mechanically
-   stable connection and has matching connectors (there are small 4-pin and
-   large 6-pin FireWire ports) will do.
-
-   If an driver is running on both machines you should see a line like::
-
-     firewire_core 0000:15:00.1: created device fw1: GUID 00061b0020105917, S400
-
-   on both machines in the kernel log when the cable is plugged in
-   and connects the two machines.
-
-3) Test physical DMA using firescope:
-
-   On the debug host, make sure that /dev/fw* is accessible,
-   then start firescope::
-
-	$ firescope
-	Port 0 (/dev/fw1) opened, 2 nodes detected
-
-	FireScope
-	---------
-	Target : <unspecified>
-	Gen    : 1
-	[Ctrl-T] choose target
-	[Ctrl-H] this menu
-	[Ctrl-Q] quit
-
-    ------> Press Ctrl-T now, the output should be similar to:
-
-	2 nodes available, local node is: 0
-	 0: ffc0, uuid: 00000000 00000000 [LOCAL]
-	 1: ffc1, uuid: 00279000 ba4bb801
-
-   Besides the [LOCAL] node, it must show another node without error message.
-
-4) Prepare for debugging with early OHCI-1394 initialization:
-
-   4.1) Kernel compilation and installation on debug target
-
-   Compile the kernel to be debugged with CONFIG_PROVIDE_OHCI1394_DMA_INIT
-   (Kernel hacking: Provide code for enabling DMA over FireWire early on boot)
-   enabled and install it on the machine to be debugged (debug target).
-
-   4.2) Transfer the System.map of the debugged kernel to the debug host
-
-   Copy the System.map of the kernel be debugged to the debug host (the host
-   which is connected to the debugged machine over the FireWire cable).
-
-5) Retrieving the printk buffer contents:
-
-   With the FireWire cable connected, the OHCI-1394 driver on the debugging
-   host loaded, reboot the debugged machine, booting the kernel which has
-   CONFIG_PROVIDE_OHCI1394_DMA_INIT enabled, with the option ohci1394_dma=early.
-
-   Then, on the debugging host, run firescope, for example by using -A::
-
-	firescope -A System.map-of-debug-target-kernel
-
-   Note: -A automatically attaches to the first non-local node. It only works
-   reliably if only connected two machines are connected using FireWire.
-
-   After having attached to the debug target, press Ctrl-D to view the
-   complete printk buffer or Ctrl-U to enter auto update mode and get an
-   updated live view of recent kernel messages logged on the debug target.
-
-   Call "firescope -h" to get more information on firescope's options.
-
-Notes
------
-
-Documentation and specifications: http://halobates.de/firewire/
-
-FireWire is a trademark of Apple Inc. - for more information please refer to:
-https://en.wikipedia.org/wiki/FireWire
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 50c1f5f08e6f..298e81357c8a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1508,7 +1508,7 @@ config PROVIDE_OHCI1394_DMA_INIT
 	  This code (~1k) is freed after boot. By then, the firewire stack
 	  in charge of the OHCI-1394 controllers should be used instead.
 
-	  See Documentation/debugging-via-ohci1394.txt for more information.
+	  See Documentation/core-api/debugging-via-ohci1394.rst for more information.
 
 source "samples/Kconfig"
 
-- 
cgit v1.2.3


From e00b0ab86c79c4e82eb821ac6d6a3daef2e3e600 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 17:37:51 +0200
Subject: docs: add IRQ documentation at the core-api book

There are 4 IRQ documentation files under Documentation/*.txt.

Move them into a new directory (core-api/irq) and add a new
index file for it.

While here, use a title markup for the Debugging section of the
irq-domain.rst file.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/2da7485c3718e1442e6b4c2dd66857b776e8899b.1588345503.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/IRQ-affinity.txt                     |  70 ------
 Documentation/IRQ-domain.txt                       | 269 --------------------
 Documentation/IRQ.txt                              |  24 --
 Documentation/admin-guide/hw-vuln/l1tf.rst         |   2 +-
 .../admin-guide/kernel-per-CPU-kthreads.rst        |   2 +-
 Documentation/core-api/index.rst                   |   1 +
 Documentation/core-api/irq/concepts.rst            |  24 ++
 Documentation/core-api/irq/index.rst               |  11 +
 Documentation/core-api/irq/irq-affinity.rst        |  70 ++++++
 Documentation/core-api/irq/irq-domain.rst          | 270 +++++++++++++++++++++
 Documentation/core-api/irq/irqflags-tracing.rst    |  52 ++++
 Documentation/ia64/irq-redir.rst                   |   2 +-
 Documentation/irqflags-tracing.txt                 |  52 ----
 Documentation/networking/scaling.rst               |   4 +-
 Documentation/translations/zh_CN/IRQ.txt           |   4 +-
 MAINTAINERS                                        |   2 +-
 16 files changed, 436 insertions(+), 423 deletions(-)
 delete mode 100644 Documentation/IRQ-affinity.txt
 delete mode 100644 Documentation/IRQ-domain.txt
 delete mode 100644 Documentation/IRQ.txt
 create mode 100644 Documentation/core-api/irq/concepts.rst
 create mode 100644 Documentation/core-api/irq/index.rst
 create mode 100644 Documentation/core-api/irq/irq-affinity.rst
 create mode 100644 Documentation/core-api/irq/irq-domain.rst
 create mode 100644 Documentation/core-api/irq/irqflags-tracing.rst
 delete mode 100644 Documentation/irqflags-tracing.txt

diff --git a/Documentation/IRQ-affinity.txt b/Documentation/IRQ-affinity.txt
deleted file mode 100644
index 29da5000836a..000000000000
--- a/Documentation/IRQ-affinity.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-================
-SMP IRQ affinity
-================
-
-ChangeLog:
-	- Started by Ingo Molnar <mingo@redhat.com>
-	- Update by Max Krasnyansky <maxk@qualcomm.com>
-
-
-/proc/irq/IRQ#/smp_affinity and /proc/irq/IRQ#/smp_affinity_list specify
-which target CPUs are permitted for a given IRQ source.  It's a bitmask
-(smp_affinity) or cpu list (smp_affinity_list) of allowed CPUs.  It's not
-allowed to turn off all CPUs, and if an IRQ controller does not support
-IRQ affinity then the value will not change from the default of all cpus.
-
-/proc/irq/default_smp_affinity specifies default affinity mask that applies
-to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask
-will be set to the default mask. It can then be changed as described above.
-Default mask is 0xffffffff.
-
-Here is an example of restricting IRQ44 (eth1) to CPU0-3 then restricting
-it to CPU4-7 (this is an 8-CPU SMP box)::
-
-	[root@moon 44]# cd /proc/irq/44
-	[root@moon 44]# cat smp_affinity
-	ffffffff
-
-	[root@moon 44]# echo 0f > smp_affinity
-	[root@moon 44]# cat smp_affinity
-	0000000f
-	[root@moon 44]# ping -f h
-	PING hell (195.4.7.3): 56 data bytes
-	...
-	--- hell ping statistics ---
-	6029 packets transmitted, 6027 packets received, 0% packet loss
-	round-trip min/avg/max = 0.1/0.1/0.4 ms
-	[root@moon 44]# cat /proc/interrupts | grep 'CPU\|44:'
-		CPU0       CPU1       CPU2       CPU3      CPU4       CPU5        CPU6       CPU7
-	44:       1068       1785       1785       1783         0          0           0         0    IO-APIC-level  eth1
-
-As can be seen from the line above IRQ44 was delivered only to the first four
-processors (0-3).
-Now lets restrict that IRQ to CPU(4-7).
-
-::
-
-	[root@moon 44]# echo f0 > smp_affinity
-	[root@moon 44]# cat smp_affinity
-	000000f0
-	[root@moon 44]# ping -f h
-	PING hell (195.4.7.3): 56 data bytes
-	..
-	--- hell ping statistics ---
-	2779 packets transmitted, 2777 packets received, 0% packet loss
-	round-trip min/avg/max = 0.1/0.5/585.4 ms
-	[root@moon 44]# cat /proc/interrupts |  'CPU\|44:'
-		CPU0       CPU1       CPU2       CPU3      CPU4       CPU5        CPU6       CPU7
-	44:       1068       1785       1785       1783      1784       1069        1070       1069   IO-APIC-level  eth1
-
-This time around IRQ44 was delivered only to the last four processors.
-i.e counters for the CPU0-3 did not change.
-
-Here is an example of limiting that same irq (44) to cpus 1024 to 1031::
-
-	[root@moon 44]# echo 1024-1031 > smp_affinity_list
-	[root@moon 44]# cat smp_affinity_list
-	1024-1031
-
-Note that to do this with a bitmask would require 32 bitmasks of zero
-to follow the pertinent one.
diff --git a/Documentation/IRQ-domain.txt b/Documentation/IRQ-domain.txt
deleted file mode 100644
index 507775cce753..000000000000
--- a/Documentation/IRQ-domain.txt
+++ /dev/null
@@ -1,269 +0,0 @@
-===============================================
-The irq_domain interrupt number mapping library
-===============================================
-
-The current design of the Linux kernel uses a single large number
-space where each separate IRQ source is assigned a different number.
-This is simple when there is only one interrupt controller, but in
-systems with multiple interrupt controllers the kernel must ensure
-that each one gets assigned non-overlapping allocations of Linux
-IRQ numbers.
-
-The number of interrupt controllers registered as unique irqchips
-show a rising tendency: for example subdrivers of different kinds
-such as GPIO controllers avoid reimplementing identical callback
-mechanisms as the IRQ core system by modelling their interrupt
-handlers as irqchips, i.e. in effect cascading interrupt controllers.
-
-Here the interrupt number loose all kind of correspondence to
-hardware interrupt numbers: whereas in the past, IRQ numbers could
-be chosen so they matched the hardware IRQ line into the root
-interrupt controller (i.e. the component actually fireing the
-interrupt line to the CPU) nowadays this number is just a number.
-
-For this reason we need a mechanism to separate controller-local
-interrupt numbers, called hardware irq's, from Linux IRQ numbers.
-
-The irq_alloc_desc*() and irq_free_desc*() APIs provide allocation of
-irq numbers, but they don't provide any support for reverse mapping of
-the controller-local IRQ (hwirq) number into the Linux IRQ number
-space.
-
-The irq_domain library adds mapping between hwirq and IRQ numbers on
-top of the irq_alloc_desc*() API.  An irq_domain to manage mapping is
-preferred over interrupt controller drivers open coding their own
-reverse mapping scheme.
-
-irq_domain also implements translation from an abstract irq_fwspec
-structure to hwirq numbers (Device Tree and ACPI GSI so far), and can
-be easily extended to support other IRQ topology data sources.
-
-irq_domain usage
-================
-
-An interrupt controller driver creates and registers an irq_domain by
-calling one of the irq_domain_add_*() functions (each mapping method
-has a different allocator function, more on that later).  The function
-will return a pointer to the irq_domain on success.  The caller must
-provide the allocator function with an irq_domain_ops structure.
-
-In most cases, the irq_domain will begin empty without any mappings
-between hwirq and IRQ numbers.  Mappings are added to the irq_domain
-by calling irq_create_mapping() which accepts the irq_domain and a
-hwirq number as arguments.  If a mapping for the hwirq doesn't already
-exist then it will allocate a new Linux irq_desc, associate it with
-the hwirq, and call the .map() callback so the driver can perform any
-required hardware setup.
-
-When an interrupt is received, irq_find_mapping() function should
-be used to find the Linux IRQ number from the hwirq number.
-
-The irq_create_mapping() function must be called *atleast once*
-before any call to irq_find_mapping(), lest the descriptor will not
-be allocated.
-
-If the driver has the Linux IRQ number or the irq_data pointer, and
-needs to know the associated hwirq number (such as in the irq_chip
-callbacks) then it can be directly obtained from irq_data->hwirq.
-
-Types of irq_domain mappings
-============================
-
-There are several mechanisms available for reverse mapping from hwirq
-to Linux irq, and each mechanism uses a different allocation function.
-Which reverse map type should be used depends on the use case.  Each
-of the reverse map types are described below:
-
-Linear
-------
-
-::
-
-	irq_domain_add_linear()
-	irq_domain_create_linear()
-
-The linear reverse map maintains a fixed size table indexed by the
-hwirq number.  When a hwirq is mapped, an irq_desc is allocated for
-the hwirq, and the IRQ number is stored in the table.
-
-The Linear map is a good choice when the maximum number of hwirqs is
-fixed and a relatively small number (~ < 256).  The advantages of this
-map are fixed time lookup for IRQ numbers, and irq_descs are only
-allocated for in-use IRQs.  The disadvantage is that the table must be
-as large as the largest possible hwirq number.
-
-irq_domain_add_linear() and irq_domain_create_linear() are functionally
-equivalent, except for the first argument is different - the former
-accepts an Open Firmware specific 'struct device_node', while the latter
-accepts a more general abstraction 'struct fwnode_handle'.
-
-The majority of drivers should use the linear map.
-
-Tree
-----
-
-::
-
-	irq_domain_add_tree()
-	irq_domain_create_tree()
-
-The irq_domain maintains a radix tree map from hwirq numbers to Linux
-IRQs.  When an hwirq is mapped, an irq_desc is allocated and the
-hwirq is used as the lookup key for the radix tree.
-
-The tree map is a good choice if the hwirq number can be very large
-since it doesn't need to allocate a table as large as the largest
-hwirq number.  The disadvantage is that hwirq to IRQ number lookup is
-dependent on how many entries are in the table.
-
-irq_domain_add_tree() and irq_domain_create_tree() are functionally
-equivalent, except for the first argument is different - the former
-accepts an Open Firmware specific 'struct device_node', while the latter
-accepts a more general abstraction 'struct fwnode_handle'.
-
-Very few drivers should need this mapping.
-
-No Map
-------
-
-::
-
-	irq_domain_add_nomap()
-
-The No Map mapping is to be used when the hwirq number is
-programmable in the hardware.  In this case it is best to program the
-Linux IRQ number into the hardware itself so that no mapping is
-required.  Calling irq_create_direct_mapping() will allocate a Linux
-IRQ number and call the .map() callback so that driver can program the
-Linux IRQ number into the hardware.
-
-Most drivers cannot use this mapping.
-
-Legacy
-------
-
-::
-
-	irq_domain_add_simple()
-	irq_domain_add_legacy()
-	irq_domain_add_legacy_isa()
-
-The Legacy mapping is a special case for drivers that already have a
-range of irq_descs allocated for the hwirqs.  It is used when the
-driver cannot be immediately converted to use the linear mapping.  For
-example, many embedded system board support files use a set of #defines
-for IRQ numbers that are passed to struct device registrations.  In that
-case the Linux IRQ numbers cannot be dynamically assigned and the legacy
-mapping should be used.
-
-The legacy map assumes a contiguous range of IRQ numbers has already
-been allocated for the controller and that the IRQ number can be
-calculated by adding a fixed offset to the hwirq number, and
-visa-versa.  The disadvantage is that it requires the interrupt
-controller to manage IRQ allocations and it requires an irq_desc to be
-allocated for every hwirq, even if it is unused.
-
-The legacy map should only be used if fixed IRQ mappings must be
-supported.  For example, ISA controllers would use the legacy map for
-mapping Linux IRQs 0-15 so that existing ISA drivers get the correct IRQ
-numbers.
-
-Most users of legacy mappings should use irq_domain_add_simple() which
-will use a legacy domain only if an IRQ range is supplied by the
-system and will otherwise use a linear domain mapping. The semantics
-of this call are such that if an IRQ range is specified then
-descriptors will be allocated on-the-fly for it, and if no range is
-specified it will fall through to irq_domain_add_linear() which means
-*no* irq descriptors will be allocated.
-
-A typical use case for simple domains is where an irqchip provider
-is supporting both dynamic and static IRQ assignments.
-
-In order to avoid ending up in a situation where a linear domain is
-used and no descriptor gets allocated it is very important to make sure
-that the driver using the simple domain call irq_create_mapping()
-before any irq_find_mapping() since the latter will actually work
-for the static IRQ assignment case.
-
-Hierarchy IRQ domain
---------------------
-
-On some architectures, there may be multiple interrupt controllers
-involved in delivering an interrupt from the device to the target CPU.
-Let's look at a typical interrupt delivering path on x86 platforms::
-
-  Device --> IOAPIC -> Interrupt remapping Controller -> Local APIC -> CPU
-
-There are three interrupt controllers involved:
-
-1) IOAPIC controller
-2) Interrupt remapping controller
-3) Local APIC controller
-
-To support such a hardware topology and make software architecture match
-hardware architecture, an irq_domain data structure is built for each
-interrupt controller and those irq_domains are organized into hierarchy.
-When building irq_domain hierarchy, the irq_domain near to the device is
-child and the irq_domain near to CPU is parent. So a hierarchy structure
-as below will be built for the example above::
-
-	CPU Vector irq_domain (root irq_domain to manage CPU vectors)
-		^
-		|
-	Interrupt Remapping irq_domain (manage irq_remapping entries)
-		^
-		|
-	IOAPIC irq_domain (manage IOAPIC delivery entries/pins)
-
-There are four major interfaces to use hierarchy irq_domain:
-
-1) irq_domain_alloc_irqs(): allocate IRQ descriptors and interrupt
-   controller related resources to deliver these interrupts.
-2) irq_domain_free_irqs(): free IRQ descriptors and interrupt controller
-   related resources associated with these interrupts.
-3) irq_domain_activate_irq(): activate interrupt controller hardware to
-   deliver the interrupt.
-4) irq_domain_deactivate_irq(): deactivate interrupt controller hardware
-   to stop delivering the interrupt.
-
-Following changes are needed to support hierarchy irq_domain:
-
-1) a new field 'parent' is added to struct irq_domain; it's used to
-   maintain irq_domain hierarchy information.
-2) a new field 'parent_data' is added to struct irq_data; it's used to
-   build hierarchy irq_data to match hierarchy irq_domains. The irq_data
-   is used to store irq_domain pointer and hardware irq number.
-3) new callbacks are added to struct irq_domain_ops to support hierarchy
-   irq_domain operations.
-
-With support of hierarchy irq_domain and hierarchy irq_data ready, an
-irq_domain structure is built for each interrupt controller, and an
-irq_data structure is allocated for each irq_domain associated with an
-IRQ. Now we could go one step further to support stacked(hierarchy)
-irq_chip. That is, an irq_chip is associated with each irq_data along
-the hierarchy. A child irq_chip may implement a required action by
-itself or by cooperating with its parent irq_chip.
-
-With stacked irq_chip, interrupt controller driver only needs to deal
-with the hardware managed by itself and may ask for services from its
-parent irq_chip when needed. So we could achieve a much cleaner
-software architecture.
-
-For an interrupt controller driver to support hierarchy irq_domain, it
-needs to:
-
-1) Implement irq_domain_ops.alloc and irq_domain_ops.free
-2) Optionally implement irq_domain_ops.activate and
-   irq_domain_ops.deactivate.
-3) Optionally implement an irq_chip to manage the interrupt controller
-   hardware.
-4) No need to implement irq_domain_ops.map and irq_domain_ops.unmap,
-   they are unused with hierarchy irq_domain.
-
-Hierarchy irq_domain is in no way x86 specific, and is heavily used to
-support other architectures, such as ARM, ARM64 etc.
-
-=== Debugging ===
-
-Most of the internals of the IRQ subsystem are exposed in debugfs by
-turning CONFIG_GENERIC_IRQ_DEBUGFS on.
diff --git a/Documentation/IRQ.txt b/Documentation/IRQ.txt
deleted file mode 100644
index 4273806a606b..000000000000
--- a/Documentation/IRQ.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-===============
-What is an IRQ?
-===============
-
-An IRQ is an interrupt request from a device.
-Currently they can come in over a pin, or over a packet.
-Several devices may be connected to the same pin thus
-sharing an IRQ.
-
-An IRQ number is a kernel identifier used to talk about a hardware
-interrupt source.  Typically this is an index into the global irq_desc
-array, but except for what linux/interrupt.h implements the details
-are architecture specific.
-
-An IRQ number is an enumeration of the possible interrupt sources on a
-machine.  Typically what is enumerated is the number of input pins on
-all of the interrupt controller in the system.  In the case of ISA
-what is enumerated are the 16 input pins on the two i8259 interrupt
-controllers.
-
-Architectures can assign additional meaning to the IRQ numbers, and
-are encouraged to in the case  where there is any manual configuration
-of the hardware involved.  The ISA IRQs are a classic example of
-assigning this kind of additional meaning.
diff --git a/Documentation/admin-guide/hw-vuln/l1tf.rst b/Documentation/admin-guide/hw-vuln/l1tf.rst
index f83212fae4d5..3eeeb488d955 100644
--- a/Documentation/admin-guide/hw-vuln/l1tf.rst
+++ b/Documentation/admin-guide/hw-vuln/l1tf.rst
@@ -268,7 +268,7 @@ Guest mitigation mechanisms
    /proc/irq/$NR/smp_affinity[_list] files. Limited documentation is
    available at:
 
-   https://www.kernel.org/doc/Documentation/IRQ-affinity.txt
+   https://www.kernel.org/doc/Documentation/core-api/irq/irq-affinity.rst
 
 .. _smt_control:
 
diff --git a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
index 21818aca4708..dc36aeb65d0a 100644
--- a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
+++ b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst
@@ -10,7 +10,7 @@ them to a "housekeeping" CPU dedicated to such work.
 References
 ==========
 
--	Documentation/IRQ-affinity.txt:  Binding interrupts to sets of CPUs.
+-	Documentation/core-api/irq/irq-affinity.rst:  Binding interrupts to sets of CPUs.
 
 -	Documentation/admin-guide/cgroup-v1:  Using cgroups to bind tasks to sets of CPUs.
 
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 2cfd07a34173..0caaed576225 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -52,6 +52,7 @@ How Linux keeps everything from happening at the same time.  See
 
    atomic_ops
    refcount-vs-atomic
+   irq/index
    local_ops
    padata
    ../RCU/index
diff --git a/Documentation/core-api/irq/concepts.rst b/Documentation/core-api/irq/concepts.rst
new file mode 100644
index 000000000000..4273806a606b
--- /dev/null
+++ b/Documentation/core-api/irq/concepts.rst
@@ -0,0 +1,24 @@
+===============
+What is an IRQ?
+===============
+
+An IRQ is an interrupt request from a device.
+Currently they can come in over a pin, or over a packet.
+Several devices may be connected to the same pin thus
+sharing an IRQ.
+
+An IRQ number is a kernel identifier used to talk about a hardware
+interrupt source.  Typically this is an index into the global irq_desc
+array, but except for what linux/interrupt.h implements the details
+are architecture specific.
+
+An IRQ number is an enumeration of the possible interrupt sources on a
+machine.  Typically what is enumerated is the number of input pins on
+all of the interrupt controller in the system.  In the case of ISA
+what is enumerated are the 16 input pins on the two i8259 interrupt
+controllers.
+
+Architectures can assign additional meaning to the IRQ numbers, and
+are encouraged to in the case  where there is any manual configuration
+of the hardware involved.  The ISA IRQs are a classic example of
+assigning this kind of additional meaning.
diff --git a/Documentation/core-api/irq/index.rst b/Documentation/core-api/irq/index.rst
new file mode 100644
index 000000000000..0d65d11e5420
--- /dev/null
+++ b/Documentation/core-api/irq/index.rst
@@ -0,0 +1,11 @@
+====
+IRQs
+====
+
+.. toctree::
+   :maxdepth: 1
+
+   concepts
+   irq-affinity
+   irq-domain
+   irqflags-tracing
diff --git a/Documentation/core-api/irq/irq-affinity.rst b/Documentation/core-api/irq/irq-affinity.rst
new file mode 100644
index 000000000000..29da5000836a
--- /dev/null
+++ b/Documentation/core-api/irq/irq-affinity.rst
@@ -0,0 +1,70 @@
+================
+SMP IRQ affinity
+================
+
+ChangeLog:
+	- Started by Ingo Molnar <mingo@redhat.com>
+	- Update by Max Krasnyansky <maxk@qualcomm.com>
+
+
+/proc/irq/IRQ#/smp_affinity and /proc/irq/IRQ#/smp_affinity_list specify
+which target CPUs are permitted for a given IRQ source.  It's a bitmask
+(smp_affinity) or cpu list (smp_affinity_list) of allowed CPUs.  It's not
+allowed to turn off all CPUs, and if an IRQ controller does not support
+IRQ affinity then the value will not change from the default of all cpus.
+
+/proc/irq/default_smp_affinity specifies default affinity mask that applies
+to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask
+will be set to the default mask. It can then be changed as described above.
+Default mask is 0xffffffff.
+
+Here is an example of restricting IRQ44 (eth1) to CPU0-3 then restricting
+it to CPU4-7 (this is an 8-CPU SMP box)::
+
+	[root@moon 44]# cd /proc/irq/44
+	[root@moon 44]# cat smp_affinity
+	ffffffff
+
+	[root@moon 44]# echo 0f > smp_affinity
+	[root@moon 44]# cat smp_affinity
+	0000000f
+	[root@moon 44]# ping -f h
+	PING hell (195.4.7.3): 56 data bytes
+	...
+	--- hell ping statistics ---
+	6029 packets transmitted, 6027 packets received, 0% packet loss
+	round-trip min/avg/max = 0.1/0.1/0.4 ms
+	[root@moon 44]# cat /proc/interrupts | grep 'CPU\|44:'
+		CPU0       CPU1       CPU2       CPU3      CPU4       CPU5        CPU6       CPU7
+	44:       1068       1785       1785       1783         0          0           0         0    IO-APIC-level  eth1
+
+As can be seen from the line above IRQ44 was delivered only to the first four
+processors (0-3).
+Now lets restrict that IRQ to CPU(4-7).
+
+::
+
+	[root@moon 44]# echo f0 > smp_affinity
+	[root@moon 44]# cat smp_affinity
+	000000f0
+	[root@moon 44]# ping -f h
+	PING hell (195.4.7.3): 56 data bytes
+	..
+	--- hell ping statistics ---
+	2779 packets transmitted, 2777 packets received, 0% packet loss
+	round-trip min/avg/max = 0.1/0.5/585.4 ms
+	[root@moon 44]# cat /proc/interrupts |  'CPU\|44:'
+		CPU0       CPU1       CPU2       CPU3      CPU4       CPU5        CPU6       CPU7
+	44:       1068       1785       1785       1783      1784       1069        1070       1069   IO-APIC-level  eth1
+
+This time around IRQ44 was delivered only to the last four processors.
+i.e counters for the CPU0-3 did not change.
+
+Here is an example of limiting that same irq (44) to cpus 1024 to 1031::
+
+	[root@moon 44]# echo 1024-1031 > smp_affinity_list
+	[root@moon 44]# cat smp_affinity_list
+	1024-1031
+
+Note that to do this with a bitmask would require 32 bitmasks of zero
+to follow the pertinent one.
diff --git a/Documentation/core-api/irq/irq-domain.rst b/Documentation/core-api/irq/irq-domain.rst
new file mode 100644
index 000000000000..096db12f32d5
--- /dev/null
+++ b/Documentation/core-api/irq/irq-domain.rst
@@ -0,0 +1,270 @@
+===============================================
+The irq_domain interrupt number mapping library
+===============================================
+
+The current design of the Linux kernel uses a single large number
+space where each separate IRQ source is assigned a different number.
+This is simple when there is only one interrupt controller, but in
+systems with multiple interrupt controllers the kernel must ensure
+that each one gets assigned non-overlapping allocations of Linux
+IRQ numbers.
+
+The number of interrupt controllers registered as unique irqchips
+show a rising tendency: for example subdrivers of different kinds
+such as GPIO controllers avoid reimplementing identical callback
+mechanisms as the IRQ core system by modelling their interrupt
+handlers as irqchips, i.e. in effect cascading interrupt controllers.
+
+Here the interrupt number loose all kind of correspondence to
+hardware interrupt numbers: whereas in the past, IRQ numbers could
+be chosen so they matched the hardware IRQ line into the root
+interrupt controller (i.e. the component actually fireing the
+interrupt line to the CPU) nowadays this number is just a number.
+
+For this reason we need a mechanism to separate controller-local
+interrupt numbers, called hardware irq's, from Linux IRQ numbers.
+
+The irq_alloc_desc*() and irq_free_desc*() APIs provide allocation of
+irq numbers, but they don't provide any support for reverse mapping of
+the controller-local IRQ (hwirq) number into the Linux IRQ number
+space.
+
+The irq_domain library adds mapping between hwirq and IRQ numbers on
+top of the irq_alloc_desc*() API.  An irq_domain to manage mapping is
+preferred over interrupt controller drivers open coding their own
+reverse mapping scheme.
+
+irq_domain also implements translation from an abstract irq_fwspec
+structure to hwirq numbers (Device Tree and ACPI GSI so far), and can
+be easily extended to support other IRQ topology data sources.
+
+irq_domain usage
+================
+
+An interrupt controller driver creates and registers an irq_domain by
+calling one of the irq_domain_add_*() functions (each mapping method
+has a different allocator function, more on that later).  The function
+will return a pointer to the irq_domain on success.  The caller must
+provide the allocator function with an irq_domain_ops structure.
+
+In most cases, the irq_domain will begin empty without any mappings
+between hwirq and IRQ numbers.  Mappings are added to the irq_domain
+by calling irq_create_mapping() which accepts the irq_domain and a
+hwirq number as arguments.  If a mapping for the hwirq doesn't already
+exist then it will allocate a new Linux irq_desc, associate it with
+the hwirq, and call the .map() callback so the driver can perform any
+required hardware setup.
+
+When an interrupt is received, irq_find_mapping() function should
+be used to find the Linux IRQ number from the hwirq number.
+
+The irq_create_mapping() function must be called *atleast once*
+before any call to irq_find_mapping(), lest the descriptor will not
+be allocated.
+
+If the driver has the Linux IRQ number or the irq_data pointer, and
+needs to know the associated hwirq number (such as in the irq_chip
+callbacks) then it can be directly obtained from irq_data->hwirq.
+
+Types of irq_domain mappings
+============================
+
+There are several mechanisms available for reverse mapping from hwirq
+to Linux irq, and each mechanism uses a different allocation function.
+Which reverse map type should be used depends on the use case.  Each
+of the reverse map types are described below:
+
+Linear
+------
+
+::
+
+	irq_domain_add_linear()
+	irq_domain_create_linear()
+
+The linear reverse map maintains a fixed size table indexed by the
+hwirq number.  When a hwirq is mapped, an irq_desc is allocated for
+the hwirq, and the IRQ number is stored in the table.
+
+The Linear map is a good choice when the maximum number of hwirqs is
+fixed and a relatively small number (~ < 256).  The advantages of this
+map are fixed time lookup for IRQ numbers, and irq_descs are only
+allocated for in-use IRQs.  The disadvantage is that the table must be
+as large as the largest possible hwirq number.
+
+irq_domain_add_linear() and irq_domain_create_linear() are functionally
+equivalent, except for the first argument is different - the former
+accepts an Open Firmware specific 'struct device_node', while the latter
+accepts a more general abstraction 'struct fwnode_handle'.
+
+The majority of drivers should use the linear map.
+
+Tree
+----
+
+::
+
+	irq_domain_add_tree()
+	irq_domain_create_tree()
+
+The irq_domain maintains a radix tree map from hwirq numbers to Linux
+IRQs.  When an hwirq is mapped, an irq_desc is allocated and the
+hwirq is used as the lookup key for the radix tree.
+
+The tree map is a good choice if the hwirq number can be very large
+since it doesn't need to allocate a table as large as the largest
+hwirq number.  The disadvantage is that hwirq to IRQ number lookup is
+dependent on how many entries are in the table.
+
+irq_domain_add_tree() and irq_domain_create_tree() are functionally
+equivalent, except for the first argument is different - the former
+accepts an Open Firmware specific 'struct device_node', while the latter
+accepts a more general abstraction 'struct fwnode_handle'.
+
+Very few drivers should need this mapping.
+
+No Map
+------
+
+::
+
+	irq_domain_add_nomap()
+
+The No Map mapping is to be used when the hwirq number is
+programmable in the hardware.  In this case it is best to program the
+Linux IRQ number into the hardware itself so that no mapping is
+required.  Calling irq_create_direct_mapping() will allocate a Linux
+IRQ number and call the .map() callback so that driver can program the
+Linux IRQ number into the hardware.
+
+Most drivers cannot use this mapping.
+
+Legacy
+------
+
+::
+
+	irq_domain_add_simple()
+	irq_domain_add_legacy()
+	irq_domain_add_legacy_isa()
+
+The Legacy mapping is a special case for drivers that already have a
+range of irq_descs allocated for the hwirqs.  It is used when the
+driver cannot be immediately converted to use the linear mapping.  For
+example, many embedded system board support files use a set of #defines
+for IRQ numbers that are passed to struct device registrations.  In that
+case the Linux IRQ numbers cannot be dynamically assigned and the legacy
+mapping should be used.
+
+The legacy map assumes a contiguous range of IRQ numbers has already
+been allocated for the controller and that the IRQ number can be
+calculated by adding a fixed offset to the hwirq number, and
+visa-versa.  The disadvantage is that it requires the interrupt
+controller to manage IRQ allocations and it requires an irq_desc to be
+allocated for every hwirq, even if it is unused.
+
+The legacy map should only be used if fixed IRQ mappings must be
+supported.  For example, ISA controllers would use the legacy map for
+mapping Linux IRQs 0-15 so that existing ISA drivers get the correct IRQ
+numbers.
+
+Most users of legacy mappings should use irq_domain_add_simple() which
+will use a legacy domain only if an IRQ range is supplied by the
+system and will otherwise use a linear domain mapping. The semantics
+of this call are such that if an IRQ range is specified then
+descriptors will be allocated on-the-fly for it, and if no range is
+specified it will fall through to irq_domain_add_linear() which means
+*no* irq descriptors will be allocated.
+
+A typical use case for simple domains is where an irqchip provider
+is supporting both dynamic and static IRQ assignments.
+
+In order to avoid ending up in a situation where a linear domain is
+used and no descriptor gets allocated it is very important to make sure
+that the driver using the simple domain call irq_create_mapping()
+before any irq_find_mapping() since the latter will actually work
+for the static IRQ assignment case.
+
+Hierarchy IRQ domain
+--------------------
+
+On some architectures, there may be multiple interrupt controllers
+involved in delivering an interrupt from the device to the target CPU.
+Let's look at a typical interrupt delivering path on x86 platforms::
+
+  Device --> IOAPIC -> Interrupt remapping Controller -> Local APIC -> CPU
+
+There are three interrupt controllers involved:
+
+1) IOAPIC controller
+2) Interrupt remapping controller
+3) Local APIC controller
+
+To support such a hardware topology and make software architecture match
+hardware architecture, an irq_domain data structure is built for each
+interrupt controller and those irq_domains are organized into hierarchy.
+When building irq_domain hierarchy, the irq_domain near to the device is
+child and the irq_domain near to CPU is parent. So a hierarchy structure
+as below will be built for the example above::
+
+	CPU Vector irq_domain (root irq_domain to manage CPU vectors)
+		^
+		|
+	Interrupt Remapping irq_domain (manage irq_remapping entries)
+		^
+		|
+	IOAPIC irq_domain (manage IOAPIC delivery entries/pins)
+
+There are four major interfaces to use hierarchy irq_domain:
+
+1) irq_domain_alloc_irqs(): allocate IRQ descriptors and interrupt
+   controller related resources to deliver these interrupts.
+2) irq_domain_free_irqs(): free IRQ descriptors and interrupt controller
+   related resources associated with these interrupts.
+3) irq_domain_activate_irq(): activate interrupt controller hardware to
+   deliver the interrupt.
+4) irq_domain_deactivate_irq(): deactivate interrupt controller hardware
+   to stop delivering the interrupt.
+
+Following changes are needed to support hierarchy irq_domain:
+
+1) a new field 'parent' is added to struct irq_domain; it's used to
+   maintain irq_domain hierarchy information.
+2) a new field 'parent_data' is added to struct irq_data; it's used to
+   build hierarchy irq_data to match hierarchy irq_domains. The irq_data
+   is used to store irq_domain pointer and hardware irq number.
+3) new callbacks are added to struct irq_domain_ops to support hierarchy
+   irq_domain operations.
+
+With support of hierarchy irq_domain and hierarchy irq_data ready, an
+irq_domain structure is built for each interrupt controller, and an
+irq_data structure is allocated for each irq_domain associated with an
+IRQ. Now we could go one step further to support stacked(hierarchy)
+irq_chip. That is, an irq_chip is associated with each irq_data along
+the hierarchy. A child irq_chip may implement a required action by
+itself or by cooperating with its parent irq_chip.
+
+With stacked irq_chip, interrupt controller driver only needs to deal
+with the hardware managed by itself and may ask for services from its
+parent irq_chip when needed. So we could achieve a much cleaner
+software architecture.
+
+For an interrupt controller driver to support hierarchy irq_domain, it
+needs to:
+
+1) Implement irq_domain_ops.alloc and irq_domain_ops.free
+2) Optionally implement irq_domain_ops.activate and
+   irq_domain_ops.deactivate.
+3) Optionally implement an irq_chip to manage the interrupt controller
+   hardware.
+4) No need to implement irq_domain_ops.map and irq_domain_ops.unmap,
+   they are unused with hierarchy irq_domain.
+
+Hierarchy irq_domain is in no way x86 specific, and is heavily used to
+support other architectures, such as ARM, ARM64 etc.
+
+Debugging
+=========
+
+Most of the internals of the IRQ subsystem are exposed in debugfs by
+turning CONFIG_GENERIC_IRQ_DEBUGFS on.
diff --git a/Documentation/core-api/irq/irqflags-tracing.rst b/Documentation/core-api/irq/irqflags-tracing.rst
new file mode 100644
index 000000000000..bdd208259fb3
--- /dev/null
+++ b/Documentation/core-api/irq/irqflags-tracing.rst
@@ -0,0 +1,52 @@
+=======================
+IRQ-flags state tracing
+=======================
+
+:Author: started by Ingo Molnar <mingo@redhat.com>
+
+The "irq-flags tracing" feature "traces" hardirq and softirq state, in
+that it gives interested subsystems an opportunity to be notified of
+every hardirqs-off/hardirqs-on, softirqs-off/softirqs-on event that
+happens in the kernel.
+
+CONFIG_TRACE_IRQFLAGS_SUPPORT is needed for CONFIG_PROVE_SPIN_LOCKING
+and CONFIG_PROVE_RW_LOCKING to be offered by the generic lock debugging
+code. Otherwise only CONFIG_PROVE_MUTEX_LOCKING and
+CONFIG_PROVE_RWSEM_LOCKING will be offered on an architecture - these
+are locking APIs that are not used in IRQ context. (the one exception
+for rwsems is worked around)
+
+Architecture support for this is certainly not in the "trivial"
+category, because lots of lowlevel assembly code deal with irq-flags
+state changes. But an architecture can be irq-flags-tracing enabled in a
+rather straightforward and risk-free manner.
+
+Architectures that want to support this need to do a couple of
+code-organizational changes first:
+
+- add and enable TRACE_IRQFLAGS_SUPPORT in their arch level Kconfig file
+
+and then a couple of functional changes are needed as well to implement
+irq-flags-tracing support:
+
+- in lowlevel entry code add (build-conditional) calls to the
+  trace_hardirqs_off()/trace_hardirqs_on() functions. The lock validator
+  closely guards whether the 'real' irq-flags matches the 'virtual'
+  irq-flags state, and complains loudly (and turns itself off) if the
+  two do not match. Usually most of the time for arch support for
+  irq-flags-tracing is spent in this state: look at the lockdep
+  complaint, try to figure out the assembly code we did not cover yet,
+  fix and repeat. Once the system has booted up and works without a
+  lockdep complaint in the irq-flags-tracing functions arch support is
+  complete.
+- if the architecture has non-maskable interrupts then those need to be
+  excluded from the irq-tracing [and lock validation] mechanism via
+  lockdep_off()/lockdep_on().
+
+In general there is no risk from having an incomplete irq-flags-tracing
+implementation in an architecture: lockdep will detect that and will
+turn itself off. I.e. the lock validator will still be reliable. There
+should be no crashes due to irq-tracing bugs. (except if the assembly
+changes break other code by modifying conditions or registers that
+shouldn't be)
+
diff --git a/Documentation/ia64/irq-redir.rst b/Documentation/ia64/irq-redir.rst
index 39bf94484a15..6bbbbe4f73ef 100644
--- a/Documentation/ia64/irq-redir.rst
+++ b/Documentation/ia64/irq-redir.rst
@@ -7,7 +7,7 @@ IRQ affinity on IA64 platforms
 
 By writing to /proc/irq/IRQ#/smp_affinity the interrupt routing can be
 controlled. The behavior on IA64 platforms is slightly different from
-that described in Documentation/IRQ-affinity.txt for i386 systems.
+that described in Documentation/core-api/irq/irq-affinity.rst for i386 systems.
 
 Because of the usage of SAPIC mode and physical destination mode the
 IRQ target is one particular CPU and cannot be a mask of several
diff --git a/Documentation/irqflags-tracing.txt b/Documentation/irqflags-tracing.txt
deleted file mode 100644
index bdd208259fb3..000000000000
--- a/Documentation/irqflags-tracing.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-=======================
-IRQ-flags state tracing
-=======================
-
-:Author: started by Ingo Molnar <mingo@redhat.com>
-
-The "irq-flags tracing" feature "traces" hardirq and softirq state, in
-that it gives interested subsystems an opportunity to be notified of
-every hardirqs-off/hardirqs-on, softirqs-off/softirqs-on event that
-happens in the kernel.
-
-CONFIG_TRACE_IRQFLAGS_SUPPORT is needed for CONFIG_PROVE_SPIN_LOCKING
-and CONFIG_PROVE_RW_LOCKING to be offered by the generic lock debugging
-code. Otherwise only CONFIG_PROVE_MUTEX_LOCKING and
-CONFIG_PROVE_RWSEM_LOCKING will be offered on an architecture - these
-are locking APIs that are not used in IRQ context. (the one exception
-for rwsems is worked around)
-
-Architecture support for this is certainly not in the "trivial"
-category, because lots of lowlevel assembly code deal with irq-flags
-state changes. But an architecture can be irq-flags-tracing enabled in a
-rather straightforward and risk-free manner.
-
-Architectures that want to support this need to do a couple of
-code-organizational changes first:
-
-- add and enable TRACE_IRQFLAGS_SUPPORT in their arch level Kconfig file
-
-and then a couple of functional changes are needed as well to implement
-irq-flags-tracing support:
-
-- in lowlevel entry code add (build-conditional) calls to the
-  trace_hardirqs_off()/trace_hardirqs_on() functions. The lock validator
-  closely guards whether the 'real' irq-flags matches the 'virtual'
-  irq-flags state, and complains loudly (and turns itself off) if the
-  two do not match. Usually most of the time for arch support for
-  irq-flags-tracing is spent in this state: look at the lockdep
-  complaint, try to figure out the assembly code we did not cover yet,
-  fix and repeat. Once the system has booted up and works without a
-  lockdep complaint in the irq-flags-tracing functions arch support is
-  complete.
-- if the architecture has non-maskable interrupts then those need to be
-  excluded from the irq-tracing [and lock validation] mechanism via
-  lockdep_off()/lockdep_on().
-
-In general there is no risk from having an incomplete irq-flags-tracing
-implementation in an architecture: lockdep will detect that and will
-turn itself off. I.e. the lock validator will still be reliable. There
-should be no crashes due to irq-tracing bugs. (except if the assembly
-changes break other code by modifying conditions or registers that
-shouldn't be)
-
diff --git a/Documentation/networking/scaling.rst b/Documentation/networking/scaling.rst
index f78d7bf27ff5..8f0347b9fb3d 100644
--- a/Documentation/networking/scaling.rst
+++ b/Documentation/networking/scaling.rst
@@ -81,7 +81,7 @@ of queues to IRQs can be determined from /proc/interrupts. By default,
 an IRQ may be handled on any CPU. Because a non-negligible part of packet
 processing takes place in receive interrupt handling, it is advantageous
 to spread receive interrupts between CPUs. To manually adjust the IRQ
-affinity of each interrupt see Documentation/IRQ-affinity.txt. Some systems
+affinity of each interrupt see Documentation/core-api/irq/irq-affinity.rst. Some systems
 will be running irqbalance, a daemon that dynamically optimizes IRQ
 assignments and as a result may override any manual settings.
 
@@ -160,7 +160,7 @@ can be configured for each receive queue using a sysfs file entry::
 
 This file implements a bitmap of CPUs. RPS is disabled when it is zero
 (the default), in which case packets are processed on the interrupting
-CPU. Documentation/IRQ-affinity.txt explains how CPUs are assigned to
+CPU. Documentation/core-api/irq/irq-affinity.rst explains how CPUs are assigned to
 the bitmap.
 
 
diff --git a/Documentation/translations/zh_CN/IRQ.txt b/Documentation/translations/zh_CN/IRQ.txt
index 956026d5cf82..9aec8dca4fcf 100644
--- a/Documentation/translations/zh_CN/IRQ.txt
+++ b/Documentation/translations/zh_CN/IRQ.txt
@@ -1,4 +1,4 @@
-Chinese translated version of Documentation/IRQ.txt
+Chinese translated version of Documentation/core-api/irq/index.rst
 
 If you have any comment or update to the content, please contact the
 original document maintainer directly.  However, if you have a problem
@@ -9,7 +9,7 @@ or if there is a problem with the translation.
 Maintainer: Eric W. Biederman <ebiederman@xmission.com>
 Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
 ---------------------------------------------------------------------
-Documentation/IRQ.txt 的中文翻译
+Documentation/core-api/irq/index.rst 的中文翻译
 
 如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
 交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
diff --git a/MAINTAINERS b/MAINTAINERS
index e4d2e2f01c7f..3e4b174a88e1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8952,7 +8952,7 @@ IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY)
 M:	Marc Zyngier <maz@kernel.org>
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core
-F:	Documentation/IRQ-domain.txt
+F:	Documentation/core-api/irq/irq-domain.rst
 F:	include/linux/irqdomain.h
 F:	kernel/irq/irqdomain.c
 F:	kernel/irq/msi.c
-- 
cgit v1.2.3


From 1ac00669c35e02c3ce3a5f7c02e65ac6edca6595 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 17:37:52 +0200
Subject: docs: move the kref doc into the core-api book

This document covers core kernel objects. So, add it into the core-api
book.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/f385af13b4a6d3ff8c89beedd4506900e79ca72e.1588345503.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/index.rst   |   1 +
 Documentation/core-api/kobject.rst |   2 +-
 Documentation/core-api/kref.rst    | 323 +++++++++++++++++++++++++++++++++++++
 Documentation/kref.txt             | 323 -------------------------------------
 4 files changed, 325 insertions(+), 324 deletions(-)
 create mode 100644 Documentation/core-api/kref.rst
 delete mode 100644 Documentation/kref.txt

diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 0caaed576225..15ab86112627 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -31,6 +31,7 @@ Library functionality that is used throughout the kernel.
    :maxdepth: 1
 
    kobject
+   kref
    assoc_array
    xarray
    idr
diff --git a/Documentation/core-api/kobject.rst b/Documentation/core-api/kobject.rst
index c3ac2963283b..e93dc8cf52dd 100644
--- a/Documentation/core-api/kobject.rst
+++ b/Documentation/core-api/kobject.rst
@@ -210,7 +210,7 @@ statically and will warn the developer of this improper usage.
 If all that you want to use a kobject for is to provide a reference counter
 for your structure, please use the struct kref instead; a kobject would be
 overkill.  For more information on how to use struct kref, please see the
-file Documentation/kref.txt in the Linux kernel source tree.
+file Documentation/core-api/kref.rst in the Linux kernel source tree.
 
 
 Creating "simple" kobjects
diff --git a/Documentation/core-api/kref.rst b/Documentation/core-api/kref.rst
new file mode 100644
index 000000000000..c61eea6f1bf2
--- /dev/null
+++ b/Documentation/core-api/kref.rst
@@ -0,0 +1,323 @@
+===================================================
+Adding reference counters (krefs) to kernel objects
+===================================================
+
+:Author: Corey Minyard <minyard@acm.org>
+:Author: Thomas Hellstrom <thellstrom@vmware.com>
+
+A lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and
+presentation on krefs, which can be found at:
+
+  - http://www.kroah.com/linux/talks/ols_2004_kref_paper/Reprint-Kroah-Hartman-OLS2004.pdf
+  - http://www.kroah.com/linux/talks/ols_2004_kref_talk/
+
+Introduction
+============
+
+krefs allow you to add reference counters to your objects.  If you
+have objects that are used in multiple places and passed around, and
+you don't have refcounts, your code is almost certainly broken.  If
+you want refcounts, krefs are the way to go.
+
+To use a kref, add one to your data structures like::
+
+    struct my_data
+    {
+	.
+	.
+	struct kref refcount;
+	.
+	.
+    };
+
+The kref can occur anywhere within the data structure.
+
+Initialization
+==============
+
+You must initialize the kref after you allocate it.  To do this, call
+kref_init as so::
+
+     struct my_data *data;
+
+     data = kmalloc(sizeof(*data), GFP_KERNEL);
+     if (!data)
+            return -ENOMEM;
+     kref_init(&data->refcount);
+
+This sets the refcount in the kref to 1.
+
+Kref rules
+==========
+
+Once you have an initialized kref, you must follow the following
+rules:
+
+1) If you make a non-temporary copy of a pointer, especially if
+   it can be passed to another thread of execution, you must
+   increment the refcount with kref_get() before passing it off::
+
+       kref_get(&data->refcount);
+
+   If you already have a valid pointer to a kref-ed structure (the
+   refcount cannot go to zero) you may do this without a lock.
+
+2) When you are done with a pointer, you must call kref_put()::
+
+       kref_put(&data->refcount, data_release);
+
+   If this is the last reference to the pointer, the release
+   routine will be called.  If the code never tries to get
+   a valid pointer to a kref-ed structure without already
+   holding a valid pointer, it is safe to do this without
+   a lock.
+
+3) If the code attempts to gain a reference to a kref-ed structure
+   without already holding a valid pointer, it must serialize access
+   where a kref_put() cannot occur during the kref_get(), and the
+   structure must remain valid during the kref_get().
+
+For example, if you allocate some data and then pass it to another
+thread to process::
+
+    void data_release(struct kref *ref)
+    {
+	struct my_data *data = container_of(ref, struct my_data, refcount);
+	kfree(data);
+    }
+
+    void more_data_handling(void *cb_data)
+    {
+	struct my_data *data = cb_data;
+	.
+	. do stuff with data here
+	.
+	kref_put(&data->refcount, data_release);
+    }
+
+    int my_data_handler(void)
+    {
+	int rv = 0;
+	struct my_data *data;
+	struct task_struct *task;
+	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	kref_init(&data->refcount);
+
+	kref_get(&data->refcount);
+	task = kthread_run(more_data_handling, data, "more_data_handling");
+	if (task == ERR_PTR(-ENOMEM)) {
+		rv = -ENOMEM;
+	        kref_put(&data->refcount, data_release);
+		goto out;
+	}
+
+	.
+	. do stuff with data here
+	.
+    out:
+	kref_put(&data->refcount, data_release);
+	return rv;
+    }
+
+This way, it doesn't matter what order the two threads handle the
+data, the kref_put() handles knowing when the data is not referenced
+any more and releasing it.  The kref_get() does not require a lock,
+since we already have a valid pointer that we own a refcount for.  The
+put needs no lock because nothing tries to get the data without
+already holding a pointer.
+
+In the above example, kref_put() will be called 2 times in both success
+and error paths. This is necessary because the reference count got
+incremented 2 times by kref_init() and kref_get().
+
+Note that the "before" in rule 1 is very important.  You should never
+do something like::
+
+	task = kthread_run(more_data_handling, data, "more_data_handling");
+	if (task == ERR_PTR(-ENOMEM)) {
+		rv = -ENOMEM;
+		goto out;
+	} else
+		/* BAD BAD BAD - get is after the handoff */
+		kref_get(&data->refcount);
+
+Don't assume you know what you are doing and use the above construct.
+First of all, you may not know what you are doing.  Second, you may
+know what you are doing (there are some situations where locking is
+involved where the above may be legal) but someone else who doesn't
+know what they are doing may change the code or copy the code.  It's
+bad style.  Don't do it.
+
+There are some situations where you can optimize the gets and puts.
+For instance, if you are done with an object and enqueuing it for
+something else or passing it off to something else, there is no reason
+to do a get then a put::
+
+	/* Silly extra get and put */
+	kref_get(&obj->ref);
+	enqueue(obj);
+	kref_put(&obj->ref, obj_cleanup);
+
+Just do the enqueue.  A comment about this is always welcome::
+
+	enqueue(obj);
+	/* We are done with obj, so we pass our refcount off
+	   to the queue.  DON'T TOUCH obj AFTER HERE! */
+
+The last rule (rule 3) is the nastiest one to handle.  Say, for
+instance, you have a list of items that are each kref-ed, and you wish
+to get the first one.  You can't just pull the first item off the list
+and kref_get() it.  That violates rule 3 because you are not already
+holding a valid pointer.  You must add a mutex (or some other lock).
+For instance::
+
+	static DEFINE_MUTEX(mutex);
+	static LIST_HEAD(q);
+	struct my_data
+	{
+		struct kref      refcount;
+		struct list_head link;
+	};
+
+	static struct my_data *get_entry()
+	{
+		struct my_data *entry = NULL;
+		mutex_lock(&mutex);
+		if (!list_empty(&q)) {
+			entry = container_of(q.next, struct my_data, link);
+			kref_get(&entry->refcount);
+		}
+		mutex_unlock(&mutex);
+		return entry;
+	}
+
+	static void release_entry(struct kref *ref)
+	{
+		struct my_data *entry = container_of(ref, struct my_data, refcount);
+
+		list_del(&entry->link);
+		kfree(entry);
+	}
+
+	static void put_entry(struct my_data *entry)
+	{
+		mutex_lock(&mutex);
+		kref_put(&entry->refcount, release_entry);
+		mutex_unlock(&mutex);
+	}
+
+The kref_put() return value is useful if you do not want to hold the
+lock during the whole release operation.  Say you didn't want to call
+kfree() with the lock held in the example above (since it is kind of
+pointless to do so).  You could use kref_put() as follows::
+
+	static void release_entry(struct kref *ref)
+	{
+		/* All work is done after the return from kref_put(). */
+	}
+
+	static void put_entry(struct my_data *entry)
+	{
+		mutex_lock(&mutex);
+		if (kref_put(&entry->refcount, release_entry)) {
+			list_del(&entry->link);
+			mutex_unlock(&mutex);
+			kfree(entry);
+		} else
+			mutex_unlock(&mutex);
+	}
+
+This is really more useful if you have to call other routines as part
+of the free operations that could take a long time or might claim the
+same lock.  Note that doing everything in the release routine is still
+preferred as it is a little neater.
+
+The above example could also be optimized using kref_get_unless_zero() in
+the following way::
+
+	static struct my_data *get_entry()
+	{
+		struct my_data *entry = NULL;
+		mutex_lock(&mutex);
+		if (!list_empty(&q)) {
+			entry = container_of(q.next, struct my_data, link);
+			if (!kref_get_unless_zero(&entry->refcount))
+				entry = NULL;
+		}
+		mutex_unlock(&mutex);
+		return entry;
+	}
+
+	static void release_entry(struct kref *ref)
+	{
+		struct my_data *entry = container_of(ref, struct my_data, refcount);
+
+		mutex_lock(&mutex);
+		list_del(&entry->link);
+		mutex_unlock(&mutex);
+		kfree(entry);
+	}
+
+	static void put_entry(struct my_data *entry)
+	{
+		kref_put(&entry->refcount, release_entry);
+	}
+
+Which is useful to remove the mutex lock around kref_put() in put_entry(), but
+it's important that kref_get_unless_zero is enclosed in the same critical
+section that finds the entry in the lookup table,
+otherwise kref_get_unless_zero may reference already freed memory.
+Note that it is illegal to use kref_get_unless_zero without checking its
+return value. If you are sure (by already having a valid pointer) that
+kref_get_unless_zero() will return true, then use kref_get() instead.
+
+Krefs and RCU
+=============
+
+The function kref_get_unless_zero also makes it possible to use rcu
+locking for lookups in the above example::
+
+	struct my_data
+	{
+		struct rcu_head rhead;
+		.
+		struct kref refcount;
+		.
+		.
+	};
+
+	static struct my_data *get_entry_rcu()
+	{
+		struct my_data *entry = NULL;
+		rcu_read_lock();
+		if (!list_empty(&q)) {
+			entry = container_of(q.next, struct my_data, link);
+			if (!kref_get_unless_zero(&entry->refcount))
+				entry = NULL;
+		}
+		rcu_read_unlock();
+		return entry;
+	}
+
+	static void release_entry_rcu(struct kref *ref)
+	{
+		struct my_data *entry = container_of(ref, struct my_data, refcount);
+
+		mutex_lock(&mutex);
+		list_del_rcu(&entry->link);
+		mutex_unlock(&mutex);
+		kfree_rcu(entry, rhead);
+	}
+
+	static void put_entry(struct my_data *entry)
+	{
+		kref_put(&entry->refcount, release_entry_rcu);
+	}
+
+But note that the struct kref member needs to remain in valid memory for a
+rcu grace period after release_entry_rcu was called. That can be accomplished
+by using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu()
+before using kfree, but note that synchronize_rcu() may sleep for a
+substantial amount of time.
diff --git a/Documentation/kref.txt b/Documentation/kref.txt
deleted file mode 100644
index c61eea6f1bf2..000000000000
--- a/Documentation/kref.txt
+++ /dev/null
@@ -1,323 +0,0 @@
-===================================================
-Adding reference counters (krefs) to kernel objects
-===================================================
-
-:Author: Corey Minyard <minyard@acm.org>
-:Author: Thomas Hellstrom <thellstrom@vmware.com>
-
-A lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and
-presentation on krefs, which can be found at:
-
-  - http://www.kroah.com/linux/talks/ols_2004_kref_paper/Reprint-Kroah-Hartman-OLS2004.pdf
-  - http://www.kroah.com/linux/talks/ols_2004_kref_talk/
-
-Introduction
-============
-
-krefs allow you to add reference counters to your objects.  If you
-have objects that are used in multiple places and passed around, and
-you don't have refcounts, your code is almost certainly broken.  If
-you want refcounts, krefs are the way to go.
-
-To use a kref, add one to your data structures like::
-
-    struct my_data
-    {
-	.
-	.
-	struct kref refcount;
-	.
-	.
-    };
-
-The kref can occur anywhere within the data structure.
-
-Initialization
-==============
-
-You must initialize the kref after you allocate it.  To do this, call
-kref_init as so::
-
-     struct my_data *data;
-
-     data = kmalloc(sizeof(*data), GFP_KERNEL);
-     if (!data)
-            return -ENOMEM;
-     kref_init(&data->refcount);
-
-This sets the refcount in the kref to 1.
-
-Kref rules
-==========
-
-Once you have an initialized kref, you must follow the following
-rules:
-
-1) If you make a non-temporary copy of a pointer, especially if
-   it can be passed to another thread of execution, you must
-   increment the refcount with kref_get() before passing it off::
-
-       kref_get(&data->refcount);
-
-   If you already have a valid pointer to a kref-ed structure (the
-   refcount cannot go to zero) you may do this without a lock.
-
-2) When you are done with a pointer, you must call kref_put()::
-
-       kref_put(&data->refcount, data_release);
-
-   If this is the last reference to the pointer, the release
-   routine will be called.  If the code never tries to get
-   a valid pointer to a kref-ed structure without already
-   holding a valid pointer, it is safe to do this without
-   a lock.
-
-3) If the code attempts to gain a reference to a kref-ed structure
-   without already holding a valid pointer, it must serialize access
-   where a kref_put() cannot occur during the kref_get(), and the
-   structure must remain valid during the kref_get().
-
-For example, if you allocate some data and then pass it to another
-thread to process::
-
-    void data_release(struct kref *ref)
-    {
-	struct my_data *data = container_of(ref, struct my_data, refcount);
-	kfree(data);
-    }
-
-    void more_data_handling(void *cb_data)
-    {
-	struct my_data *data = cb_data;
-	.
-	. do stuff with data here
-	.
-	kref_put(&data->refcount, data_release);
-    }
-
-    int my_data_handler(void)
-    {
-	int rv = 0;
-	struct my_data *data;
-	struct task_struct *task;
-	data = kmalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-	kref_init(&data->refcount);
-
-	kref_get(&data->refcount);
-	task = kthread_run(more_data_handling, data, "more_data_handling");
-	if (task == ERR_PTR(-ENOMEM)) {
-		rv = -ENOMEM;
-	        kref_put(&data->refcount, data_release);
-		goto out;
-	}
-
-	.
-	. do stuff with data here
-	.
-    out:
-	kref_put(&data->refcount, data_release);
-	return rv;
-    }
-
-This way, it doesn't matter what order the two threads handle the
-data, the kref_put() handles knowing when the data is not referenced
-any more and releasing it.  The kref_get() does not require a lock,
-since we already have a valid pointer that we own a refcount for.  The
-put needs no lock because nothing tries to get the data without
-already holding a pointer.
-
-In the above example, kref_put() will be called 2 times in both success
-and error paths. This is necessary because the reference count got
-incremented 2 times by kref_init() and kref_get().
-
-Note that the "before" in rule 1 is very important.  You should never
-do something like::
-
-	task = kthread_run(more_data_handling, data, "more_data_handling");
-	if (task == ERR_PTR(-ENOMEM)) {
-		rv = -ENOMEM;
-		goto out;
-	} else
-		/* BAD BAD BAD - get is after the handoff */
-		kref_get(&data->refcount);
-
-Don't assume you know what you are doing and use the above construct.
-First of all, you may not know what you are doing.  Second, you may
-know what you are doing (there are some situations where locking is
-involved where the above may be legal) but someone else who doesn't
-know what they are doing may change the code or copy the code.  It's
-bad style.  Don't do it.
-
-There are some situations where you can optimize the gets and puts.
-For instance, if you are done with an object and enqueuing it for
-something else or passing it off to something else, there is no reason
-to do a get then a put::
-
-	/* Silly extra get and put */
-	kref_get(&obj->ref);
-	enqueue(obj);
-	kref_put(&obj->ref, obj_cleanup);
-
-Just do the enqueue.  A comment about this is always welcome::
-
-	enqueue(obj);
-	/* We are done with obj, so we pass our refcount off
-	   to the queue.  DON'T TOUCH obj AFTER HERE! */
-
-The last rule (rule 3) is the nastiest one to handle.  Say, for
-instance, you have a list of items that are each kref-ed, and you wish
-to get the first one.  You can't just pull the first item off the list
-and kref_get() it.  That violates rule 3 because you are not already
-holding a valid pointer.  You must add a mutex (or some other lock).
-For instance::
-
-	static DEFINE_MUTEX(mutex);
-	static LIST_HEAD(q);
-	struct my_data
-	{
-		struct kref      refcount;
-		struct list_head link;
-	};
-
-	static struct my_data *get_entry()
-	{
-		struct my_data *entry = NULL;
-		mutex_lock(&mutex);
-		if (!list_empty(&q)) {
-			entry = container_of(q.next, struct my_data, link);
-			kref_get(&entry->refcount);
-		}
-		mutex_unlock(&mutex);
-		return entry;
-	}
-
-	static void release_entry(struct kref *ref)
-	{
-		struct my_data *entry = container_of(ref, struct my_data, refcount);
-
-		list_del(&entry->link);
-		kfree(entry);
-	}
-
-	static void put_entry(struct my_data *entry)
-	{
-		mutex_lock(&mutex);
-		kref_put(&entry->refcount, release_entry);
-		mutex_unlock(&mutex);
-	}
-
-The kref_put() return value is useful if you do not want to hold the
-lock during the whole release operation.  Say you didn't want to call
-kfree() with the lock held in the example above (since it is kind of
-pointless to do so).  You could use kref_put() as follows::
-
-	static void release_entry(struct kref *ref)
-	{
-		/* All work is done after the return from kref_put(). */
-	}
-
-	static void put_entry(struct my_data *entry)
-	{
-		mutex_lock(&mutex);
-		if (kref_put(&entry->refcount, release_entry)) {
-			list_del(&entry->link);
-			mutex_unlock(&mutex);
-			kfree(entry);
-		} else
-			mutex_unlock(&mutex);
-	}
-
-This is really more useful if you have to call other routines as part
-of the free operations that could take a long time or might claim the
-same lock.  Note that doing everything in the release routine is still
-preferred as it is a little neater.
-
-The above example could also be optimized using kref_get_unless_zero() in
-the following way::
-
-	static struct my_data *get_entry()
-	{
-		struct my_data *entry = NULL;
-		mutex_lock(&mutex);
-		if (!list_empty(&q)) {
-			entry = container_of(q.next, struct my_data, link);
-			if (!kref_get_unless_zero(&entry->refcount))
-				entry = NULL;
-		}
-		mutex_unlock(&mutex);
-		return entry;
-	}
-
-	static void release_entry(struct kref *ref)
-	{
-		struct my_data *entry = container_of(ref, struct my_data, refcount);
-
-		mutex_lock(&mutex);
-		list_del(&entry->link);
-		mutex_unlock(&mutex);
-		kfree(entry);
-	}
-
-	static void put_entry(struct my_data *entry)
-	{
-		kref_put(&entry->refcount, release_entry);
-	}
-
-Which is useful to remove the mutex lock around kref_put() in put_entry(), but
-it's important that kref_get_unless_zero is enclosed in the same critical
-section that finds the entry in the lookup table,
-otherwise kref_get_unless_zero may reference already freed memory.
-Note that it is illegal to use kref_get_unless_zero without checking its
-return value. If you are sure (by already having a valid pointer) that
-kref_get_unless_zero() will return true, then use kref_get() instead.
-
-Krefs and RCU
-=============
-
-The function kref_get_unless_zero also makes it possible to use rcu
-locking for lookups in the above example::
-
-	struct my_data
-	{
-		struct rcu_head rhead;
-		.
-		struct kref refcount;
-		.
-		.
-	};
-
-	static struct my_data *get_entry_rcu()
-	{
-		struct my_data *entry = NULL;
-		rcu_read_lock();
-		if (!list_empty(&q)) {
-			entry = container_of(q.next, struct my_data, link);
-			if (!kref_get_unless_zero(&entry->refcount))
-				entry = NULL;
-		}
-		rcu_read_unlock();
-		return entry;
-	}
-
-	static void release_entry_rcu(struct kref *ref)
-	{
-		struct my_data *entry = container_of(ref, struct my_data, refcount);
-
-		mutex_lock(&mutex);
-		list_del_rcu(&entry->link);
-		mutex_unlock(&mutex);
-		kfree_rcu(entry, rhead);
-	}
-
-	static void put_entry(struct my_data *entry)
-	{
-		kref_put(&entry->refcount, release_entry_rcu);
-	}
-
-But note that the struct kref member needs to remain in valid memory for a
-rcu grace period after release_entry_rcu was called. That can be accomplished
-by using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu()
-before using kfree, but note that synchronize_rcu() may sleep for a
-substantial amount of time.
-- 
cgit v1.2.3


From 9184027f0aaf6c95856bb57d04d0fa0b16fd9981 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 17:37:53 +0200
Subject: docs: move digsig docs to the security book

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/6af5365404c7bd9d008e7e3a77ba83587fd33012.1588345503.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/digsig.txt          | 101 --------------------------------------
 Documentation/security/digsig.rst | 101 ++++++++++++++++++++++++++++++++++++++
 Documentation/security/index.rst  |   1 +
 3 files changed, 102 insertions(+), 101 deletions(-)
 delete mode 100644 Documentation/digsig.txt
 create mode 100644 Documentation/security/digsig.rst

diff --git a/Documentation/digsig.txt b/Documentation/digsig.txt
deleted file mode 100644
index f6a8902d3ef7..000000000000
--- a/Documentation/digsig.txt
+++ /dev/null
@@ -1,101 +0,0 @@
-==================================
-Digital Signature Verification API
-==================================
-
-:Author: Dmitry Kasatkin
-:Date: 06.10.2011
-
-
-.. CONTENTS
-
-   1. Introduction
-   2. API
-   3. User-space utilities
-
-
-Introduction
-============
-
-Digital signature verification API provides a method to verify digital signature.
-Currently digital signatures are used by the IMA/EVM integrity protection subsystem.
-
-Digital signature verification is implemented using cut-down kernel port of
-GnuPG multi-precision integers (MPI) library. The kernel port provides
-memory allocation errors handling, has been refactored according to kernel
-coding style, and checkpatch.pl reported errors and warnings have been fixed.
-
-Public key and signature consist of header and MPIs::
-
-	struct pubkey_hdr {
-		uint8_t		version;	/* key format version */
-		time_t		timestamp;	/* key made, always 0 for now */
-		uint8_t		algo;
-		uint8_t		nmpi;
-		char		mpi[0];
-	} __packed;
-
-	struct signature_hdr {
-		uint8_t		version;	/* signature format version */
-		time_t		timestamp;	/* signature made */
-		uint8_t		algo;
-		uint8_t		hash;
-		uint8_t		keyid[8];
-		uint8_t		nmpi;
-		char		mpi[0];
-	} __packed;
-
-keyid equals to SHA1[12-19] over the total key content.
-Signature header is used as an input to generate a signature.
-Such approach insures that key or signature header could not be changed.
-It protects timestamp from been changed and can be used for rollback
-protection.
-
-API
-===
-
-API currently includes only 1 function::
-
-	digsig_verify() - digital signature verification with public key
-
-
-	/**
-	* digsig_verify() - digital signature verification with public key
-	* @keyring:	keyring to search key in
-	* @sig:	digital signature
-	* @sigen:	length of the signature
-	* @data:	data
-	* @datalen:	length of the data
-	* @return:	0 on success, -EINVAL otherwise
-	*
-	* Verifies data integrity against digital signature.
-	* Currently only RSA is supported.
-	* Normally hash of the content is used as a data for this function.
-	*
-	*/
-	int digsig_verify(struct key *keyring, const char *sig, int siglen,
-			  const char *data, int datalen);
-
-User-space utilities
-====================
-
-The signing and key management utilities evm-utils provide functionality
-to generate signatures, to load keys into the kernel keyring.
-Keys can be in PEM or converted to the kernel format.
-When the key is added to the kernel keyring, the keyid defines the name
-of the key: 5D2B05FC633EE3E8 in the example bellow.
-
-Here is example output of the keyctl utility::
-
-	$ keyctl show
-	Session Keyring
-	-3 --alswrv      0     0  keyring: _ses
-	603976250 --alswrv      0    -1   \_ keyring: _uid.0
-	817777377 --alswrv      0     0       \_ user: kmk
-	891974900 --alswrv      0     0       \_ encrypted: evm-key
-	170323636 --alswrv      0     0       \_ keyring: _module
-	548221616 --alswrv      0     0       \_ keyring: _ima
-	128198054 --alswrv      0     0       \_ keyring: _evm
-
-	$ keyctl list 128198054
-	1 key in keyring:
-	620789745: --alswrv     0     0 user: 5D2B05FC633EE3E8
diff --git a/Documentation/security/digsig.rst b/Documentation/security/digsig.rst
new file mode 100644
index 000000000000..f6a8902d3ef7
--- /dev/null
+++ b/Documentation/security/digsig.rst
@@ -0,0 +1,101 @@
+==================================
+Digital Signature Verification API
+==================================
+
+:Author: Dmitry Kasatkin
+:Date: 06.10.2011
+
+
+.. CONTENTS
+
+   1. Introduction
+   2. API
+   3. User-space utilities
+
+
+Introduction
+============
+
+Digital signature verification API provides a method to verify digital signature.
+Currently digital signatures are used by the IMA/EVM integrity protection subsystem.
+
+Digital signature verification is implemented using cut-down kernel port of
+GnuPG multi-precision integers (MPI) library. The kernel port provides
+memory allocation errors handling, has been refactored according to kernel
+coding style, and checkpatch.pl reported errors and warnings have been fixed.
+
+Public key and signature consist of header and MPIs::
+
+	struct pubkey_hdr {
+		uint8_t		version;	/* key format version */
+		time_t		timestamp;	/* key made, always 0 for now */
+		uint8_t		algo;
+		uint8_t		nmpi;
+		char		mpi[0];
+	} __packed;
+
+	struct signature_hdr {
+		uint8_t		version;	/* signature format version */
+		time_t		timestamp;	/* signature made */
+		uint8_t		algo;
+		uint8_t		hash;
+		uint8_t		keyid[8];
+		uint8_t		nmpi;
+		char		mpi[0];
+	} __packed;
+
+keyid equals to SHA1[12-19] over the total key content.
+Signature header is used as an input to generate a signature.
+Such approach insures that key or signature header could not be changed.
+It protects timestamp from been changed and can be used for rollback
+protection.
+
+API
+===
+
+API currently includes only 1 function::
+
+	digsig_verify() - digital signature verification with public key
+
+
+	/**
+	* digsig_verify() - digital signature verification with public key
+	* @keyring:	keyring to search key in
+	* @sig:	digital signature
+	* @sigen:	length of the signature
+	* @data:	data
+	* @datalen:	length of the data
+	* @return:	0 on success, -EINVAL otherwise
+	*
+	* Verifies data integrity against digital signature.
+	* Currently only RSA is supported.
+	* Normally hash of the content is used as a data for this function.
+	*
+	*/
+	int digsig_verify(struct key *keyring, const char *sig, int siglen,
+			  const char *data, int datalen);
+
+User-space utilities
+====================
+
+The signing and key management utilities evm-utils provide functionality
+to generate signatures, to load keys into the kernel keyring.
+Keys can be in PEM or converted to the kernel format.
+When the key is added to the kernel keyring, the keyid defines the name
+of the key: 5D2B05FC633EE3E8 in the example bellow.
+
+Here is example output of the keyctl utility::
+
+	$ keyctl show
+	Session Keyring
+	-3 --alswrv      0     0  keyring: _ses
+	603976250 --alswrv      0    -1   \_ keyring: _uid.0
+	817777377 --alswrv      0     0       \_ user: kmk
+	891974900 --alswrv      0     0       \_ encrypted: evm-key
+	170323636 --alswrv      0     0       \_ keyring: _module
+	548221616 --alswrv      0     0       \_ keyring: _ima
+	128198054 --alswrv      0     0       \_ keyring: _evm
+
+	$ keyctl list 128198054
+	1 key in keyring:
+	620789745: --alswrv     0     0 user: 5D2B05FC633EE3E8
diff --git a/Documentation/security/index.rst b/Documentation/security/index.rst
index fc503dd689a7..8129405eb2cc 100644
--- a/Documentation/security/index.rst
+++ b/Documentation/security/index.rst
@@ -15,3 +15,4 @@ Security Documentation
    self-protection
    siphash
    tpm/index
+   digsig
-- 
cgit v1.2.3


From 95ca6d73a8a97ba343082746dbf935863b76375a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 17:37:54 +0200
Subject: docs: move locking-specific documents to locking/

Several files under Documentation/*.txt describe some type of
locking API. Move them to locking/ subdir and add to the
locking/index.rst index file.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/dd833a10bbd0b2c1461d78913f5ec28a7e27f00b.1588345503.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/futex-requeue-pi.txt            | 132 -------
 Documentation/hwspinlock.txt                  | 485 --------------------------
 Documentation/locking/futex-requeue-pi.rst    | 132 +++++++
 Documentation/locking/hwspinlock.rst          | 485 ++++++++++++++++++++++++++
 Documentation/locking/index.rst               |   7 +
 Documentation/locking/percpu-rw-semaphore.rst |  28 ++
 Documentation/locking/pi-futex.rst            | 122 +++++++
 Documentation/locking/preempt-locking.rst     | 144 ++++++++
 Documentation/locking/robust-futex-ABI.rst    | 184 ++++++++++
 Documentation/locking/robust-futexes.rst      | 221 ++++++++++++
 Documentation/locking/rt-mutex.rst            |   2 +-
 Documentation/percpu-rw-semaphore.txt         |  28 --
 Documentation/pi-futex.txt                    | 122 -------
 Documentation/preempt-locking.txt             | 144 --------
 Documentation/robust-futex-ABI.txt            | 184 ----------
 Documentation/robust-futexes.txt              | 221 ------------
 MAINTAINERS                                   |   6 +-
 17 files changed, 1327 insertions(+), 1320 deletions(-)
 delete mode 100644 Documentation/futex-requeue-pi.txt
 delete mode 100644 Documentation/hwspinlock.txt
 create mode 100644 Documentation/locking/futex-requeue-pi.rst
 create mode 100644 Documentation/locking/hwspinlock.rst
 create mode 100644 Documentation/locking/percpu-rw-semaphore.rst
 create mode 100644 Documentation/locking/pi-futex.rst
 create mode 100644 Documentation/locking/preempt-locking.rst
 create mode 100644 Documentation/locking/robust-futex-ABI.rst
 create mode 100644 Documentation/locking/robust-futexes.rst
 delete mode 100644 Documentation/percpu-rw-semaphore.txt
 delete mode 100644 Documentation/pi-futex.txt
 delete mode 100644 Documentation/preempt-locking.txt
 delete mode 100644 Documentation/robust-futex-ABI.txt
 delete mode 100644 Documentation/robust-futexes.txt

diff --git a/Documentation/futex-requeue-pi.txt b/Documentation/futex-requeue-pi.txt
deleted file mode 100644
index 14ab5787b9a7..000000000000
--- a/Documentation/futex-requeue-pi.txt
+++ /dev/null
@@ -1,132 +0,0 @@
-================
-Futex Requeue PI
-================
-
-Requeueing of tasks from a non-PI futex to a PI futex requires
-special handling in order to ensure the underlying rt_mutex is never
-left without an owner if it has waiters; doing so would break the PI
-boosting logic [see rt-mutex-desgin.txt] For the purposes of
-brevity, this action will be referred to as "requeue_pi" throughout
-this document.  Priority inheritance is abbreviated throughout as
-"PI".
-
-Motivation
-----------
-
-Without requeue_pi, the glibc implementation of
-pthread_cond_broadcast() must resort to waking all the tasks waiting
-on a pthread_condvar and letting them try to sort out which task
-gets to run first in classic thundering-herd formation.  An ideal
-implementation would wake the highest-priority waiter, and leave the
-rest to the natural wakeup inherent in unlocking the mutex
-associated with the condvar.
-
-Consider the simplified glibc calls::
-
-	/* caller must lock mutex */
-	pthread_cond_wait(cond, mutex)
-	{
-		lock(cond->__data.__lock);
-		unlock(mutex);
-		do {
-		unlock(cond->__data.__lock);
-		futex_wait(cond->__data.__futex);
-		lock(cond->__data.__lock);
-		} while(...)
-		unlock(cond->__data.__lock);
-		lock(mutex);
-	}
-
-	pthread_cond_broadcast(cond)
-	{
-		lock(cond->__data.__lock);
-		unlock(cond->__data.__lock);
-		futex_requeue(cond->data.__futex, cond->mutex);
-	}
-
-Once pthread_cond_broadcast() requeues the tasks, the cond->mutex
-has waiters. Note that pthread_cond_wait() attempts to lock the
-mutex only after it has returned to user space.  This will leave the
-underlying rt_mutex with waiters, and no owner, breaking the
-previously mentioned PI-boosting algorithms.
-
-In order to support PI-aware pthread_condvar's, the kernel needs to
-be able to requeue tasks to PI futexes.  This support implies that
-upon a successful futex_wait system call, the caller would return to
-user space already holding the PI futex.  The glibc implementation
-would be modified as follows::
-
-
-	/* caller must lock mutex */
-	pthread_cond_wait_pi(cond, mutex)
-	{
-		lock(cond->__data.__lock);
-		unlock(mutex);
-		do {
-		unlock(cond->__data.__lock);
-		futex_wait_requeue_pi(cond->__data.__futex);
-		lock(cond->__data.__lock);
-		} while(...)
-		unlock(cond->__data.__lock);
-		/* the kernel acquired the mutex for us */
-	}
-
-	pthread_cond_broadcast_pi(cond)
-	{
-		lock(cond->__data.__lock);
-		unlock(cond->__data.__lock);
-		futex_requeue_pi(cond->data.__futex, cond->mutex);
-	}
-
-The actual glibc implementation will likely test for PI and make the
-necessary changes inside the existing calls rather than creating new
-calls for the PI cases.  Similar changes are needed for
-pthread_cond_timedwait() and pthread_cond_signal().
-
-Implementation
---------------
-
-In order to ensure the rt_mutex has an owner if it has waiters, it
-is necessary for both the requeue code, as well as the waiting code,
-to be able to acquire the rt_mutex before returning to user space.
-The requeue code cannot simply wake the waiter and leave it to
-acquire the rt_mutex as it would open a race window between the
-requeue call returning to user space and the waiter waking and
-starting to run.  This is especially true in the uncontended case.
-
-The solution involves two new rt_mutex helper routines,
-rt_mutex_start_proxy_lock() and rt_mutex_finish_proxy_lock(), which
-allow the requeue code to acquire an uncontended rt_mutex on behalf
-of the waiter and to enqueue the waiter on a contended rt_mutex.
-Two new system calls provide the kernel<->user interface to
-requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI.
-
-FUTEX_WAIT_REQUEUE_PI is called by the waiter (pthread_cond_wait()
-and pthread_cond_timedwait()) to block on the initial futex and wait
-to be requeued to a PI-aware futex.  The implementation is the
-result of a high-speed collision between futex_wait() and
-futex_lock_pi(), with some extra logic to check for the additional
-wake-up scenarios.
-
-FUTEX_CMP_REQUEUE_PI is called by the waker
-(pthread_cond_broadcast() and pthread_cond_signal()) to requeue and
-possibly wake the waiting tasks. Internally, this system call is
-still handled by futex_requeue (by passing requeue_pi=1).  Before
-requeueing, futex_requeue() attempts to acquire the requeue target
-PI futex on behalf of the top waiter.  If it can, this waiter is
-woken.  futex_requeue() then proceeds to requeue the remaining
-nr_wake+nr_requeue tasks to the PI futex, calling
-rt_mutex_start_proxy_lock() prior to each requeue to prepare the
-task as a waiter on the underlying rt_mutex.  It is possible that
-the lock can be acquired at this stage as well, if so, the next
-waiter is woken to finish the acquisition of the lock.
-
-FUTEX_CMP_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but
-their sum is all that really matters.  futex_requeue() will wake or
-requeue up to nr_wake + nr_requeue tasks.  It will wake only as many
-tasks as it can acquire the lock for, which in the majority of cases
-should be 0 as good programming practice dictates that the caller of
-either pthread_cond_broadcast() or pthread_cond_signal() acquire the
-mutex prior to making the call. FUTEX_CMP_REQUEUE_PI requires that
-nr_wake=1.  nr_requeue should be INT_MAX for broadcast and 0 for
-signal.
diff --git a/Documentation/hwspinlock.txt b/Documentation/hwspinlock.txt
deleted file mode 100644
index 6f03713b7003..000000000000
--- a/Documentation/hwspinlock.txt
+++ /dev/null
@@ -1,485 +0,0 @@
-===========================
-Hardware Spinlock Framework
-===========================
-
-Introduction
-============
-
-Hardware spinlock modules provide hardware assistance for synchronization
-and mutual exclusion between heterogeneous processors and those not operating
-under a single, shared operating system.
-
-For example, OMAP4 has dual Cortex-A9, dual Cortex-M3 and a C64x+ DSP,
-each of which is running a different Operating System (the master, A9,
-is usually running Linux and the slave processors, the M3 and the DSP,
-are running some flavor of RTOS).
-
-A generic hwspinlock framework allows platform-independent drivers to use
-the hwspinlock device in order to access data structures that are shared
-between remote processors, that otherwise have no alternative mechanism
-to accomplish synchronization and mutual exclusion operations.
-
-This is necessary, for example, for Inter-processor communications:
-on OMAP4, cpu-intensive multimedia tasks are offloaded by the host to the
-remote M3 and/or C64x+ slave processors (by an IPC subsystem called Syslink).
-
-To achieve fast message-based communications, a minimal kernel support
-is needed to deliver messages arriving from a remote processor to the
-appropriate user process.
-
-This communication is based on simple data structures that is shared between
-the remote processors, and access to it is synchronized using the hwspinlock
-module (remote processor directly places new messages in this shared data
-structure).
-
-A common hwspinlock interface makes it possible to have generic, platform-
-independent, drivers.
-
-User API
-========
-
-::
-
-  struct hwspinlock *hwspin_lock_request(void);
-
-Dynamically assign an hwspinlock and return its address, or NULL
-in case an unused hwspinlock isn't available. Users of this
-API will usually want to communicate the lock's id to the remote core
-before it can be used to achieve synchronization.
-
-Should be called from a process context (might sleep).
-
-::
-
-  struct hwspinlock *hwspin_lock_request_specific(unsigned int id);
-
-Assign a specific hwspinlock id and return its address, or NULL
-if that hwspinlock is already in use. Usually board code will
-be calling this function in order to reserve specific hwspinlock
-ids for predefined purposes.
-
-Should be called from a process context (might sleep).
-
-::
-
-  int of_hwspin_lock_get_id(struct device_node *np, int index);
-
-Retrieve the global lock id for an OF phandle-based specific lock.
-This function provides a means for DT users of a hwspinlock module
-to get the global lock id of a specific hwspinlock, so that it can
-be requested using the normal hwspin_lock_request_specific() API.
-
-The function returns a lock id number on success, -EPROBE_DEFER if
-the hwspinlock device is not yet registered with the core, or other
-error values.
-
-Should be called from a process context (might sleep).
-
-::
-
-  int hwspin_lock_free(struct hwspinlock *hwlock);
-
-Free a previously-assigned hwspinlock; returns 0 on success, or an
-appropriate error code on failure (e.g. -EINVAL if the hwspinlock
-is already free).
-
-Should be called from a process context (might sleep).
-
-::
-
-  int hwspin_lock_timeout(struct hwspinlock *hwlock, unsigned int timeout);
-
-Lock a previously-assigned hwspinlock with a timeout limit (specified in
-msecs). If the hwspinlock is already taken, the function will busy loop
-waiting for it to be released, but give up when the timeout elapses.
-Upon a successful return from this function, preemption is disabled so
-the caller must not sleep, and is advised to release the hwspinlock as
-soon as possible, in order to minimize remote cores polling on the
-hardware interconnect.
-
-Returns 0 when successful and an appropriate error code otherwise (most
-notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
-The function will never sleep.
-
-::
-
-  int hwspin_lock_timeout_irq(struct hwspinlock *hwlock, unsigned int timeout);
-
-Lock a previously-assigned hwspinlock with a timeout limit (specified in
-msecs). If the hwspinlock is already taken, the function will busy loop
-waiting for it to be released, but give up when the timeout elapses.
-Upon a successful return from this function, preemption and the local
-interrupts are disabled, so the caller must not sleep, and is advised to
-release the hwspinlock as soon as possible.
-
-Returns 0 when successful and an appropriate error code otherwise (most
-notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
-The function will never sleep.
-
-::
-
-  int hwspin_lock_timeout_irqsave(struct hwspinlock *hwlock, unsigned int to,
-				  unsigned long *flags);
-
-Lock a previously-assigned hwspinlock with a timeout limit (specified in
-msecs). If the hwspinlock is already taken, the function will busy loop
-waiting for it to be released, but give up when the timeout elapses.
-Upon a successful return from this function, preemption is disabled,
-local interrupts are disabled and their previous state is saved at the
-given flags placeholder. The caller must not sleep, and is advised to
-release the hwspinlock as soon as possible.
-
-Returns 0 when successful and an appropriate error code otherwise (most
-notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
-
-The function will never sleep.
-
-::
-
-  int hwspin_lock_timeout_raw(struct hwspinlock *hwlock, unsigned int timeout);
-
-Lock a previously-assigned hwspinlock with a timeout limit (specified in
-msecs). If the hwspinlock is already taken, the function will busy loop
-waiting for it to be released, but give up when the timeout elapses.
-
-Caution: User must protect the routine of getting hardware lock with mutex
-or spinlock to avoid dead-lock, that will let user can do some time-consuming
-or sleepable operations under the hardware lock.
-
-Returns 0 when successful and an appropriate error code otherwise (most
-notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
-
-The function will never sleep.
-
-::
-
-  int hwspin_lock_timeout_in_atomic(struct hwspinlock *hwlock, unsigned int to);
-
-Lock a previously-assigned hwspinlock with a timeout limit (specified in
-msecs). If the hwspinlock is already taken, the function will busy loop
-waiting for it to be released, but give up when the timeout elapses.
-
-This function shall be called only from an atomic context and the timeout
-value shall not exceed a few msecs.
-
-Returns 0 when successful and an appropriate error code otherwise (most
-notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
-
-The function will never sleep.
-
-::
-
-  int hwspin_trylock(struct hwspinlock *hwlock);
-
-
-Attempt to lock a previously-assigned hwspinlock, but immediately fail if
-it is already taken.
-
-Upon a successful return from this function, preemption is disabled so
-caller must not sleep, and is advised to release the hwspinlock as soon as
-possible, in order to minimize remote cores polling on the hardware
-interconnect.
-
-Returns 0 on success and an appropriate error code otherwise (most
-notably -EBUSY if the hwspinlock was already taken).
-The function will never sleep.
-
-::
-
-  int hwspin_trylock_irq(struct hwspinlock *hwlock);
-
-
-Attempt to lock a previously-assigned hwspinlock, but immediately fail if
-it is already taken.
-
-Upon a successful return from this function, preemption and the local
-interrupts are disabled so caller must not sleep, and is advised to
-release the hwspinlock as soon as possible.
-
-Returns 0 on success and an appropriate error code otherwise (most
-notably -EBUSY if the hwspinlock was already taken).
-
-The function will never sleep.
-
-::
-
-  int hwspin_trylock_irqsave(struct hwspinlock *hwlock, unsigned long *flags);
-
-Attempt to lock a previously-assigned hwspinlock, but immediately fail if
-it is already taken.
-
-Upon a successful return from this function, preemption is disabled,
-the local interrupts are disabled and their previous state is saved
-at the given flags placeholder. The caller must not sleep, and is advised
-to release the hwspinlock as soon as possible.
-
-Returns 0 on success and an appropriate error code otherwise (most
-notably -EBUSY if the hwspinlock was already taken).
-The function will never sleep.
-
-::
-
-  int hwspin_trylock_raw(struct hwspinlock *hwlock);
-
-Attempt to lock a previously-assigned hwspinlock, but immediately fail if
-it is already taken.
-
-Caution: User must protect the routine of getting hardware lock with mutex
-or spinlock to avoid dead-lock, that will let user can do some time-consuming
-or sleepable operations under the hardware lock.
-
-Returns 0 on success and an appropriate error code otherwise (most
-notably -EBUSY if the hwspinlock was already taken).
-The function will never sleep.
-
-::
-
-  int hwspin_trylock_in_atomic(struct hwspinlock *hwlock);
-
-Attempt to lock a previously-assigned hwspinlock, but immediately fail if
-it is already taken.
-
-This function shall be called only from an atomic context.
-
-Returns 0 on success and an appropriate error code otherwise (most
-notably -EBUSY if the hwspinlock was already taken).
-The function will never sleep.
-
-::
-
-  void hwspin_unlock(struct hwspinlock *hwlock);
-
-Unlock a previously-locked hwspinlock. Always succeed, and can be called
-from any context (the function never sleeps).
-
-.. note::
-
-  code should **never** unlock an hwspinlock which is already unlocked
-  (there is no protection against this).
-
-::
-
-  void hwspin_unlock_irq(struct hwspinlock *hwlock);
-
-Unlock a previously-locked hwspinlock and enable local interrupts.
-The caller should **never** unlock an hwspinlock which is already unlocked.
-
-Doing so is considered a bug (there is no protection against this).
-Upon a successful return from this function, preemption and local
-interrupts are enabled. This function will never sleep.
-
-::
-
-  void
-  hwspin_unlock_irqrestore(struct hwspinlock *hwlock, unsigned long *flags);
-
-Unlock a previously-locked hwspinlock.
-
-The caller should **never** unlock an hwspinlock which is already unlocked.
-Doing so is considered a bug (there is no protection against this).
-Upon a successful return from this function, preemption is reenabled,
-and the state of the local interrupts is restored to the state saved at
-the given flags. This function will never sleep.
-
-::
-
-  void hwspin_unlock_raw(struct hwspinlock *hwlock);
-
-Unlock a previously-locked hwspinlock.
-
-The caller should **never** unlock an hwspinlock which is already unlocked.
-Doing so is considered a bug (there is no protection against this).
-This function will never sleep.
-
-::
-
-  void hwspin_unlock_in_atomic(struct hwspinlock *hwlock);
-
-Unlock a previously-locked hwspinlock.
-
-The caller should **never** unlock an hwspinlock which is already unlocked.
-Doing so is considered a bug (there is no protection against this).
-This function will never sleep.
-
-::
-
-  int hwspin_lock_get_id(struct hwspinlock *hwlock);
-
-Retrieve id number of a given hwspinlock. This is needed when an
-hwspinlock is dynamically assigned: before it can be used to achieve
-mutual exclusion with a remote cpu, the id number should be communicated
-to the remote task with which we want to synchronize.
-
-Returns the hwspinlock id number, or -EINVAL if hwlock is null.
-
-Typical usage
-=============
-
-::
-
-	#include <linux/hwspinlock.h>
-	#include <linux/err.h>
-
-	int hwspinlock_example1(void)
-	{
-		struct hwspinlock *hwlock;
-		int ret;
-
-		/* dynamically assign a hwspinlock */
-		hwlock = hwspin_lock_request();
-		if (!hwlock)
-			...
-
-		id = hwspin_lock_get_id(hwlock);
-		/* probably need to communicate id to a remote processor now */
-
-		/* take the lock, spin for 1 sec if it's already taken */
-		ret = hwspin_lock_timeout(hwlock, 1000);
-		if (ret)
-			...
-
-		/*
-		* we took the lock, do our thing now, but do NOT sleep
-		*/
-
-		/* release the lock */
-		hwspin_unlock(hwlock);
-
-		/* free the lock */
-		ret = hwspin_lock_free(hwlock);
-		if (ret)
-			...
-
-		return ret;
-	}
-
-	int hwspinlock_example2(void)
-	{
-		struct hwspinlock *hwlock;
-		int ret;
-
-		/*
-		* assign a specific hwspinlock id - this should be called early
-		* by board init code.
-		*/
-		hwlock = hwspin_lock_request_specific(PREDEFINED_LOCK_ID);
-		if (!hwlock)
-			...
-
-		/* try to take it, but don't spin on it */
-		ret = hwspin_trylock(hwlock);
-		if (!ret) {
-			pr_info("lock is already taken\n");
-			return -EBUSY;
-		}
-
-		/*
-		* we took the lock, do our thing now, but do NOT sleep
-		*/
-
-		/* release the lock */
-		hwspin_unlock(hwlock);
-
-		/* free the lock */
-		ret = hwspin_lock_free(hwlock);
-		if (ret)
-			...
-
-		return ret;
-	}
-
-
-API for implementors
-====================
-
-::
-
-  int hwspin_lock_register(struct hwspinlock_device *bank, struct device *dev,
-		const struct hwspinlock_ops *ops, int base_id, int num_locks);
-
-To be called from the underlying platform-specific implementation, in
-order to register a new hwspinlock device (which is usually a bank of
-numerous locks). Should be called from a process context (this function
-might sleep).
-
-Returns 0 on success, or appropriate error code on failure.
-
-::
-
-  int hwspin_lock_unregister(struct hwspinlock_device *bank);
-
-To be called from the underlying vendor-specific implementation, in order
-to unregister an hwspinlock device (which is usually a bank of numerous
-locks).
-
-Should be called from a process context (this function might sleep).
-
-Returns the address of hwspinlock on success, or NULL on error (e.g.
-if the hwspinlock is still in use).
-
-Important structs
-=================
-
-struct hwspinlock_device is a device which usually contains a bank
-of hardware locks. It is registered by the underlying hwspinlock
-implementation using the hwspin_lock_register() API.
-
-::
-
-	/**
-	* struct hwspinlock_device - a device which usually spans numerous hwspinlocks
-	* @dev: underlying device, will be used to invoke runtime PM api
-	* @ops: platform-specific hwspinlock handlers
-	* @base_id: id index of the first lock in this device
-	* @num_locks: number of locks in this device
-	* @lock: dynamically allocated array of 'struct hwspinlock'
-	*/
-	struct hwspinlock_device {
-		struct device *dev;
-		const struct hwspinlock_ops *ops;
-		int base_id;
-		int num_locks;
-		struct hwspinlock lock[0];
-	};
-
-struct hwspinlock_device contains an array of hwspinlock structs, each
-of which represents a single hardware lock::
-
-	/**
-	* struct hwspinlock - this struct represents a single hwspinlock instance
-	* @bank: the hwspinlock_device structure which owns this lock
-	* @lock: initialized and used by hwspinlock core
-	* @priv: private data, owned by the underlying platform-specific hwspinlock drv
-	*/
-	struct hwspinlock {
-		struct hwspinlock_device *bank;
-		spinlock_t lock;
-		void *priv;
-	};
-
-When registering a bank of locks, the hwspinlock driver only needs to
-set the priv members of the locks. The rest of the members are set and
-initialized by the hwspinlock core itself.
-
-Implementation callbacks
-========================
-
-There are three possible callbacks defined in 'struct hwspinlock_ops'::
-
-	struct hwspinlock_ops {
-		int (*trylock)(struct hwspinlock *lock);
-		void (*unlock)(struct hwspinlock *lock);
-		void (*relax)(struct hwspinlock *lock);
-	};
-
-The first two callbacks are mandatory:
-
-The ->trylock() callback should make a single attempt to take the lock, and
-return 0 on failure and 1 on success. This callback may **not** sleep.
-
-The ->unlock() callback releases the lock. It always succeed, and it, too,
-may **not** sleep.
-
-The ->relax() callback is optional. It is called by hwspinlock core while
-spinning on a lock, and can be used by the underlying implementation to force
-a delay between two successive invocations of ->trylock(). It may **not** sleep.
diff --git a/Documentation/locking/futex-requeue-pi.rst b/Documentation/locking/futex-requeue-pi.rst
new file mode 100644
index 000000000000..14ab5787b9a7
--- /dev/null
+++ b/Documentation/locking/futex-requeue-pi.rst
@@ -0,0 +1,132 @@
+================
+Futex Requeue PI
+================
+
+Requeueing of tasks from a non-PI futex to a PI futex requires
+special handling in order to ensure the underlying rt_mutex is never
+left without an owner if it has waiters; doing so would break the PI
+boosting logic [see rt-mutex-desgin.txt] For the purposes of
+brevity, this action will be referred to as "requeue_pi" throughout
+this document.  Priority inheritance is abbreviated throughout as
+"PI".
+
+Motivation
+----------
+
+Without requeue_pi, the glibc implementation of
+pthread_cond_broadcast() must resort to waking all the tasks waiting
+on a pthread_condvar and letting them try to sort out which task
+gets to run first in classic thundering-herd formation.  An ideal
+implementation would wake the highest-priority waiter, and leave the
+rest to the natural wakeup inherent in unlocking the mutex
+associated with the condvar.
+
+Consider the simplified glibc calls::
+
+	/* caller must lock mutex */
+	pthread_cond_wait(cond, mutex)
+	{
+		lock(cond->__data.__lock);
+		unlock(mutex);
+		do {
+		unlock(cond->__data.__lock);
+		futex_wait(cond->__data.__futex);
+		lock(cond->__data.__lock);
+		} while(...)
+		unlock(cond->__data.__lock);
+		lock(mutex);
+	}
+
+	pthread_cond_broadcast(cond)
+	{
+		lock(cond->__data.__lock);
+		unlock(cond->__data.__lock);
+		futex_requeue(cond->data.__futex, cond->mutex);
+	}
+
+Once pthread_cond_broadcast() requeues the tasks, the cond->mutex
+has waiters. Note that pthread_cond_wait() attempts to lock the
+mutex only after it has returned to user space.  This will leave the
+underlying rt_mutex with waiters, and no owner, breaking the
+previously mentioned PI-boosting algorithms.
+
+In order to support PI-aware pthread_condvar's, the kernel needs to
+be able to requeue tasks to PI futexes.  This support implies that
+upon a successful futex_wait system call, the caller would return to
+user space already holding the PI futex.  The glibc implementation
+would be modified as follows::
+
+
+	/* caller must lock mutex */
+	pthread_cond_wait_pi(cond, mutex)
+	{
+		lock(cond->__data.__lock);
+		unlock(mutex);
+		do {
+		unlock(cond->__data.__lock);
+		futex_wait_requeue_pi(cond->__data.__futex);
+		lock(cond->__data.__lock);
+		} while(...)
+		unlock(cond->__data.__lock);
+		/* the kernel acquired the mutex for us */
+	}
+
+	pthread_cond_broadcast_pi(cond)
+	{
+		lock(cond->__data.__lock);
+		unlock(cond->__data.__lock);
+		futex_requeue_pi(cond->data.__futex, cond->mutex);
+	}
+
+The actual glibc implementation will likely test for PI and make the
+necessary changes inside the existing calls rather than creating new
+calls for the PI cases.  Similar changes are needed for
+pthread_cond_timedwait() and pthread_cond_signal().
+
+Implementation
+--------------
+
+In order to ensure the rt_mutex has an owner if it has waiters, it
+is necessary for both the requeue code, as well as the waiting code,
+to be able to acquire the rt_mutex before returning to user space.
+The requeue code cannot simply wake the waiter and leave it to
+acquire the rt_mutex as it would open a race window between the
+requeue call returning to user space and the waiter waking and
+starting to run.  This is especially true in the uncontended case.
+
+The solution involves two new rt_mutex helper routines,
+rt_mutex_start_proxy_lock() and rt_mutex_finish_proxy_lock(), which
+allow the requeue code to acquire an uncontended rt_mutex on behalf
+of the waiter and to enqueue the waiter on a contended rt_mutex.
+Two new system calls provide the kernel<->user interface to
+requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI.
+
+FUTEX_WAIT_REQUEUE_PI is called by the waiter (pthread_cond_wait()
+and pthread_cond_timedwait()) to block on the initial futex and wait
+to be requeued to a PI-aware futex.  The implementation is the
+result of a high-speed collision between futex_wait() and
+futex_lock_pi(), with some extra logic to check for the additional
+wake-up scenarios.
+
+FUTEX_CMP_REQUEUE_PI is called by the waker
+(pthread_cond_broadcast() and pthread_cond_signal()) to requeue and
+possibly wake the waiting tasks. Internally, this system call is
+still handled by futex_requeue (by passing requeue_pi=1).  Before
+requeueing, futex_requeue() attempts to acquire the requeue target
+PI futex on behalf of the top waiter.  If it can, this waiter is
+woken.  futex_requeue() then proceeds to requeue the remaining
+nr_wake+nr_requeue tasks to the PI futex, calling
+rt_mutex_start_proxy_lock() prior to each requeue to prepare the
+task as a waiter on the underlying rt_mutex.  It is possible that
+the lock can be acquired at this stage as well, if so, the next
+waiter is woken to finish the acquisition of the lock.
+
+FUTEX_CMP_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but
+their sum is all that really matters.  futex_requeue() will wake or
+requeue up to nr_wake + nr_requeue tasks.  It will wake only as many
+tasks as it can acquire the lock for, which in the majority of cases
+should be 0 as good programming practice dictates that the caller of
+either pthread_cond_broadcast() or pthread_cond_signal() acquire the
+mutex prior to making the call. FUTEX_CMP_REQUEUE_PI requires that
+nr_wake=1.  nr_requeue should be INT_MAX for broadcast and 0 for
+signal.
diff --git a/Documentation/locking/hwspinlock.rst b/Documentation/locking/hwspinlock.rst
new file mode 100644
index 000000000000..6f03713b7003
--- /dev/null
+++ b/Documentation/locking/hwspinlock.rst
@@ -0,0 +1,485 @@
+===========================
+Hardware Spinlock Framework
+===========================
+
+Introduction
+============
+
+Hardware spinlock modules provide hardware assistance for synchronization
+and mutual exclusion between heterogeneous processors and those not operating
+under a single, shared operating system.
+
+For example, OMAP4 has dual Cortex-A9, dual Cortex-M3 and a C64x+ DSP,
+each of which is running a different Operating System (the master, A9,
+is usually running Linux and the slave processors, the M3 and the DSP,
+are running some flavor of RTOS).
+
+A generic hwspinlock framework allows platform-independent drivers to use
+the hwspinlock device in order to access data structures that are shared
+between remote processors, that otherwise have no alternative mechanism
+to accomplish synchronization and mutual exclusion operations.
+
+This is necessary, for example, for Inter-processor communications:
+on OMAP4, cpu-intensive multimedia tasks are offloaded by the host to the
+remote M3 and/or C64x+ slave processors (by an IPC subsystem called Syslink).
+
+To achieve fast message-based communications, a minimal kernel support
+is needed to deliver messages arriving from a remote processor to the
+appropriate user process.
+
+This communication is based on simple data structures that is shared between
+the remote processors, and access to it is synchronized using the hwspinlock
+module (remote processor directly places new messages in this shared data
+structure).
+
+A common hwspinlock interface makes it possible to have generic, platform-
+independent, drivers.
+
+User API
+========
+
+::
+
+  struct hwspinlock *hwspin_lock_request(void);
+
+Dynamically assign an hwspinlock and return its address, or NULL
+in case an unused hwspinlock isn't available. Users of this
+API will usually want to communicate the lock's id to the remote core
+before it can be used to achieve synchronization.
+
+Should be called from a process context (might sleep).
+
+::
+
+  struct hwspinlock *hwspin_lock_request_specific(unsigned int id);
+
+Assign a specific hwspinlock id and return its address, or NULL
+if that hwspinlock is already in use. Usually board code will
+be calling this function in order to reserve specific hwspinlock
+ids for predefined purposes.
+
+Should be called from a process context (might sleep).
+
+::
+
+  int of_hwspin_lock_get_id(struct device_node *np, int index);
+
+Retrieve the global lock id for an OF phandle-based specific lock.
+This function provides a means for DT users of a hwspinlock module
+to get the global lock id of a specific hwspinlock, so that it can
+be requested using the normal hwspin_lock_request_specific() API.
+
+The function returns a lock id number on success, -EPROBE_DEFER if
+the hwspinlock device is not yet registered with the core, or other
+error values.
+
+Should be called from a process context (might sleep).
+
+::
+
+  int hwspin_lock_free(struct hwspinlock *hwlock);
+
+Free a previously-assigned hwspinlock; returns 0 on success, or an
+appropriate error code on failure (e.g. -EINVAL if the hwspinlock
+is already free).
+
+Should be called from a process context (might sleep).
+
+::
+
+  int hwspin_lock_timeout(struct hwspinlock *hwlock, unsigned int timeout);
+
+Lock a previously-assigned hwspinlock with a timeout limit (specified in
+msecs). If the hwspinlock is already taken, the function will busy loop
+waiting for it to be released, but give up when the timeout elapses.
+Upon a successful return from this function, preemption is disabled so
+the caller must not sleep, and is advised to release the hwspinlock as
+soon as possible, in order to minimize remote cores polling on the
+hardware interconnect.
+
+Returns 0 when successful and an appropriate error code otherwise (most
+notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
+The function will never sleep.
+
+::
+
+  int hwspin_lock_timeout_irq(struct hwspinlock *hwlock, unsigned int timeout);
+
+Lock a previously-assigned hwspinlock with a timeout limit (specified in
+msecs). If the hwspinlock is already taken, the function will busy loop
+waiting for it to be released, but give up when the timeout elapses.
+Upon a successful return from this function, preemption and the local
+interrupts are disabled, so the caller must not sleep, and is advised to
+release the hwspinlock as soon as possible.
+
+Returns 0 when successful and an appropriate error code otherwise (most
+notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
+The function will never sleep.
+
+::
+
+  int hwspin_lock_timeout_irqsave(struct hwspinlock *hwlock, unsigned int to,
+				  unsigned long *flags);
+
+Lock a previously-assigned hwspinlock with a timeout limit (specified in
+msecs). If the hwspinlock is already taken, the function will busy loop
+waiting for it to be released, but give up when the timeout elapses.
+Upon a successful return from this function, preemption is disabled,
+local interrupts are disabled and their previous state is saved at the
+given flags placeholder. The caller must not sleep, and is advised to
+release the hwspinlock as soon as possible.
+
+Returns 0 when successful and an appropriate error code otherwise (most
+notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
+
+The function will never sleep.
+
+::
+
+  int hwspin_lock_timeout_raw(struct hwspinlock *hwlock, unsigned int timeout);
+
+Lock a previously-assigned hwspinlock with a timeout limit (specified in
+msecs). If the hwspinlock is already taken, the function will busy loop
+waiting for it to be released, but give up when the timeout elapses.
+
+Caution: User must protect the routine of getting hardware lock with mutex
+or spinlock to avoid dead-lock, that will let user can do some time-consuming
+or sleepable operations under the hardware lock.
+
+Returns 0 when successful and an appropriate error code otherwise (most
+notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
+
+The function will never sleep.
+
+::
+
+  int hwspin_lock_timeout_in_atomic(struct hwspinlock *hwlock, unsigned int to);
+
+Lock a previously-assigned hwspinlock with a timeout limit (specified in
+msecs). If the hwspinlock is already taken, the function will busy loop
+waiting for it to be released, but give up when the timeout elapses.
+
+This function shall be called only from an atomic context and the timeout
+value shall not exceed a few msecs.
+
+Returns 0 when successful and an appropriate error code otherwise (most
+notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
+
+The function will never sleep.
+
+::
+
+  int hwspin_trylock(struct hwspinlock *hwlock);
+
+
+Attempt to lock a previously-assigned hwspinlock, but immediately fail if
+it is already taken.
+
+Upon a successful return from this function, preemption is disabled so
+caller must not sleep, and is advised to release the hwspinlock as soon as
+possible, in order to minimize remote cores polling on the hardware
+interconnect.
+
+Returns 0 on success and an appropriate error code otherwise (most
+notably -EBUSY if the hwspinlock was already taken).
+The function will never sleep.
+
+::
+
+  int hwspin_trylock_irq(struct hwspinlock *hwlock);
+
+
+Attempt to lock a previously-assigned hwspinlock, but immediately fail if
+it is already taken.
+
+Upon a successful return from this function, preemption and the local
+interrupts are disabled so caller must not sleep, and is advised to
+release the hwspinlock as soon as possible.
+
+Returns 0 on success and an appropriate error code otherwise (most
+notably -EBUSY if the hwspinlock was already taken).
+
+The function will never sleep.
+
+::
+
+  int hwspin_trylock_irqsave(struct hwspinlock *hwlock, unsigned long *flags);
+
+Attempt to lock a previously-assigned hwspinlock, but immediately fail if
+it is already taken.
+
+Upon a successful return from this function, preemption is disabled,
+the local interrupts are disabled and their previous state is saved
+at the given flags placeholder. The caller must not sleep, and is advised
+to release the hwspinlock as soon as possible.
+
+Returns 0 on success and an appropriate error code otherwise (most
+notably -EBUSY if the hwspinlock was already taken).
+The function will never sleep.
+
+::
+
+  int hwspin_trylock_raw(struct hwspinlock *hwlock);
+
+Attempt to lock a previously-assigned hwspinlock, but immediately fail if
+it is already taken.
+
+Caution: User must protect the routine of getting hardware lock with mutex
+or spinlock to avoid dead-lock, that will let user can do some time-consuming
+or sleepable operations under the hardware lock.
+
+Returns 0 on success and an appropriate error code otherwise (most
+notably -EBUSY if the hwspinlock was already taken).
+The function will never sleep.
+
+::
+
+  int hwspin_trylock_in_atomic(struct hwspinlock *hwlock);
+
+Attempt to lock a previously-assigned hwspinlock, but immediately fail if
+it is already taken.
+
+This function shall be called only from an atomic context.
+
+Returns 0 on success and an appropriate error code otherwise (most
+notably -EBUSY if the hwspinlock was already taken).
+The function will never sleep.
+
+::
+
+  void hwspin_unlock(struct hwspinlock *hwlock);
+
+Unlock a previously-locked hwspinlock. Always succeed, and can be called
+from any context (the function never sleeps).
+
+.. note::
+
+  code should **never** unlock an hwspinlock which is already unlocked
+  (there is no protection against this).
+
+::
+
+  void hwspin_unlock_irq(struct hwspinlock *hwlock);
+
+Unlock a previously-locked hwspinlock and enable local interrupts.
+The caller should **never** unlock an hwspinlock which is already unlocked.
+
+Doing so is considered a bug (there is no protection against this).
+Upon a successful return from this function, preemption and local
+interrupts are enabled. This function will never sleep.
+
+::
+
+  void
+  hwspin_unlock_irqrestore(struct hwspinlock *hwlock, unsigned long *flags);
+
+Unlock a previously-locked hwspinlock.
+
+The caller should **never** unlock an hwspinlock which is already unlocked.
+Doing so is considered a bug (there is no protection against this).
+Upon a successful return from this function, preemption is reenabled,
+and the state of the local interrupts is restored to the state saved at
+the given flags. This function will never sleep.
+
+::
+
+  void hwspin_unlock_raw(struct hwspinlock *hwlock);
+
+Unlock a previously-locked hwspinlock.
+
+The caller should **never** unlock an hwspinlock which is already unlocked.
+Doing so is considered a bug (there is no protection against this).
+This function will never sleep.
+
+::
+
+  void hwspin_unlock_in_atomic(struct hwspinlock *hwlock);
+
+Unlock a previously-locked hwspinlock.
+
+The caller should **never** unlock an hwspinlock which is already unlocked.
+Doing so is considered a bug (there is no protection against this).
+This function will never sleep.
+
+::
+
+  int hwspin_lock_get_id(struct hwspinlock *hwlock);
+
+Retrieve id number of a given hwspinlock. This is needed when an
+hwspinlock is dynamically assigned: before it can be used to achieve
+mutual exclusion with a remote cpu, the id number should be communicated
+to the remote task with which we want to synchronize.
+
+Returns the hwspinlock id number, or -EINVAL if hwlock is null.
+
+Typical usage
+=============
+
+::
+
+	#include <linux/hwspinlock.h>
+	#include <linux/err.h>
+
+	int hwspinlock_example1(void)
+	{
+		struct hwspinlock *hwlock;
+		int ret;
+
+		/* dynamically assign a hwspinlock */
+		hwlock = hwspin_lock_request();
+		if (!hwlock)
+			...
+
+		id = hwspin_lock_get_id(hwlock);
+		/* probably need to communicate id to a remote processor now */
+
+		/* take the lock, spin for 1 sec if it's already taken */
+		ret = hwspin_lock_timeout(hwlock, 1000);
+		if (ret)
+			...
+
+		/*
+		* we took the lock, do our thing now, but do NOT sleep
+		*/
+
+		/* release the lock */
+		hwspin_unlock(hwlock);
+
+		/* free the lock */
+		ret = hwspin_lock_free(hwlock);
+		if (ret)
+			...
+
+		return ret;
+	}
+
+	int hwspinlock_example2(void)
+	{
+		struct hwspinlock *hwlock;
+		int ret;
+
+		/*
+		* assign a specific hwspinlock id - this should be called early
+		* by board init code.
+		*/
+		hwlock = hwspin_lock_request_specific(PREDEFINED_LOCK_ID);
+		if (!hwlock)
+			...
+
+		/* try to take it, but don't spin on it */
+		ret = hwspin_trylock(hwlock);
+		if (!ret) {
+			pr_info("lock is already taken\n");
+			return -EBUSY;
+		}
+
+		/*
+		* we took the lock, do our thing now, but do NOT sleep
+		*/
+
+		/* release the lock */
+		hwspin_unlock(hwlock);
+
+		/* free the lock */
+		ret = hwspin_lock_free(hwlock);
+		if (ret)
+			...
+
+		return ret;
+	}
+
+
+API for implementors
+====================
+
+::
+
+  int hwspin_lock_register(struct hwspinlock_device *bank, struct device *dev,
+		const struct hwspinlock_ops *ops, int base_id, int num_locks);
+
+To be called from the underlying platform-specific implementation, in
+order to register a new hwspinlock device (which is usually a bank of
+numerous locks). Should be called from a process context (this function
+might sleep).
+
+Returns 0 on success, or appropriate error code on failure.
+
+::
+
+  int hwspin_lock_unregister(struct hwspinlock_device *bank);
+
+To be called from the underlying vendor-specific implementation, in order
+to unregister an hwspinlock device (which is usually a bank of numerous
+locks).
+
+Should be called from a process context (this function might sleep).
+
+Returns the address of hwspinlock on success, or NULL on error (e.g.
+if the hwspinlock is still in use).
+
+Important structs
+=================
+
+struct hwspinlock_device is a device which usually contains a bank
+of hardware locks. It is registered by the underlying hwspinlock
+implementation using the hwspin_lock_register() API.
+
+::
+
+	/**
+	* struct hwspinlock_device - a device which usually spans numerous hwspinlocks
+	* @dev: underlying device, will be used to invoke runtime PM api
+	* @ops: platform-specific hwspinlock handlers
+	* @base_id: id index of the first lock in this device
+	* @num_locks: number of locks in this device
+	* @lock: dynamically allocated array of 'struct hwspinlock'
+	*/
+	struct hwspinlock_device {
+		struct device *dev;
+		const struct hwspinlock_ops *ops;
+		int base_id;
+		int num_locks;
+		struct hwspinlock lock[0];
+	};
+
+struct hwspinlock_device contains an array of hwspinlock structs, each
+of which represents a single hardware lock::
+
+	/**
+	* struct hwspinlock - this struct represents a single hwspinlock instance
+	* @bank: the hwspinlock_device structure which owns this lock
+	* @lock: initialized and used by hwspinlock core
+	* @priv: private data, owned by the underlying platform-specific hwspinlock drv
+	*/
+	struct hwspinlock {
+		struct hwspinlock_device *bank;
+		spinlock_t lock;
+		void *priv;
+	};
+
+When registering a bank of locks, the hwspinlock driver only needs to
+set the priv members of the locks. The rest of the members are set and
+initialized by the hwspinlock core itself.
+
+Implementation callbacks
+========================
+
+There are three possible callbacks defined in 'struct hwspinlock_ops'::
+
+	struct hwspinlock_ops {
+		int (*trylock)(struct hwspinlock *lock);
+		void (*unlock)(struct hwspinlock *lock);
+		void (*relax)(struct hwspinlock *lock);
+	};
+
+The first two callbacks are mandatory:
+
+The ->trylock() callback should make a single attempt to take the lock, and
+return 0 on failure and 1 on success. This callback may **not** sleep.
+
+The ->unlock() callback releases the lock. It always succeed, and it, too,
+may **not** sleep.
+
+The ->relax() callback is optional. It is called by hwspinlock core while
+spinning on a lock, and can be used by the underlying implementation to force
+a delay between two successive invocations of ->trylock(). It may **not** sleep.
diff --git a/Documentation/locking/index.rst b/Documentation/locking/index.rst
index 5d6800a723dc..d785878cad65 100644
--- a/Documentation/locking/index.rst
+++ b/Documentation/locking/index.rst
@@ -16,6 +16,13 @@ locking
     rt-mutex
     spinlocks
     ww-mutex-design
+    preempt-locking
+    pi-futex
+    futex-requeue-pi
+    hwspinlock
+    percpu-rw-semaphore
+    robust-futexes
+    robust-futex-ABI
 
 .. only::  subproject and html
 
diff --git a/Documentation/locking/percpu-rw-semaphore.rst b/Documentation/locking/percpu-rw-semaphore.rst
new file mode 100644
index 000000000000..247de6410855
--- /dev/null
+++ b/Documentation/locking/percpu-rw-semaphore.rst
@@ -0,0 +1,28 @@
+====================
+Percpu rw semaphores
+====================
+
+Percpu rw semaphores is a new read-write semaphore design that is
+optimized for locking for reading.
+
+The problem with traditional read-write semaphores is that when multiple
+cores take the lock for reading, the cache line containing the semaphore
+is bouncing between L1 caches of the cores, causing performance
+degradation.
+
+Locking for reading is very fast, it uses RCU and it avoids any atomic
+instruction in the lock and unlock path. On the other hand, locking for
+writing is very expensive, it calls synchronize_rcu() that can take
+hundreds of milliseconds.
+
+The lock is declared with "struct percpu_rw_semaphore" type.
+The lock is initialized percpu_init_rwsem, it returns 0 on success and
+-ENOMEM on allocation failure.
+The lock must be freed with percpu_free_rwsem to avoid memory leak.
+
+The lock is locked for read with percpu_down_read, percpu_up_read and
+for write with percpu_down_write, percpu_up_write.
+
+The idea of using RCU for optimized rw-lock was introduced by
+Eric Dumazet <eric.dumazet@gmail.com>.
+The code was written by Mikulas Patocka <mpatocka@redhat.com>
diff --git a/Documentation/locking/pi-futex.rst b/Documentation/locking/pi-futex.rst
new file mode 100644
index 000000000000..c33ba2befbf8
--- /dev/null
+++ b/Documentation/locking/pi-futex.rst
@@ -0,0 +1,122 @@
+======================
+Lightweight PI-futexes
+======================
+
+We are calling them lightweight for 3 reasons:
+
+ - in the user-space fastpath a PI-enabled futex involves no kernel work
+   (or any other PI complexity) at all. No registration, no extra kernel
+   calls - just pure fast atomic ops in userspace.
+
+ - even in the slowpath, the system call and scheduling pattern is very
+   similar to normal futexes.
+
+ - the in-kernel PI implementation is streamlined around the mutex
+   abstraction, with strict rules that keep the implementation
+   relatively simple: only a single owner may own a lock (i.e. no
+   read-write lock support), only the owner may unlock a lock, no
+   recursive locking, etc.
+
+Priority Inheritance - why?
+---------------------------
+
+The short reply: user-space PI helps achieving/improving determinism for
+user-space applications. In the best-case, it can help achieve
+determinism and well-bound latencies. Even in the worst-case, PI will
+improve the statistical distribution of locking related application
+delays.
+
+The longer reply
+----------------
+
+Firstly, sharing locks between multiple tasks is a common programming
+technique that often cannot be replaced with lockless algorithms. As we
+can see it in the kernel [which is a quite complex program in itself],
+lockless structures are rather the exception than the norm - the current
+ratio of lockless vs. locky code for shared data structures is somewhere
+between 1:10 and 1:100. Lockless is hard, and the complexity of lockless
+algorithms often endangers to ability to do robust reviews of said code.
+I.e. critical RT apps often choose lock structures to protect critical
+data structures, instead of lockless algorithms. Furthermore, there are
+cases (like shared hardware, or other resource limits) where lockless
+access is mathematically impossible.
+
+Media players (such as Jack) are an example of reasonable application
+design with multiple tasks (with multiple priority levels) sharing
+short-held locks: for example, a highprio audio playback thread is
+combined with medium-prio construct-audio-data threads and low-prio
+display-colory-stuff threads. Add video and decoding to the mix and
+we've got even more priority levels.
+
+So once we accept that synchronization objects (locks) are an
+unavoidable fact of life, and once we accept that multi-task userspace
+apps have a very fair expectation of being able to use locks, we've got
+to think about how to offer the option of a deterministic locking
+implementation to user-space.
+
+Most of the technical counter-arguments against doing priority
+inheritance only apply to kernel-space locks. But user-space locks are
+different, there we cannot disable interrupts or make the task
+non-preemptible in a critical section, so the 'use spinlocks' argument
+does not apply (user-space spinlocks have the same priority inversion
+problems as other user-space locking constructs). Fact is, pretty much
+the only technique that currently enables good determinism for userspace
+locks (such as futex-based pthread mutexes) is priority inheritance:
+
+Currently (without PI), if a high-prio and a low-prio task shares a lock
+[this is a quite common scenario for most non-trivial RT applications],
+even if all critical sections are coded carefully to be deterministic
+(i.e. all critical sections are short in duration and only execute a
+limited number of instructions), the kernel cannot guarantee any
+deterministic execution of the high-prio task: any medium-priority task
+could preempt the low-prio task while it holds the shared lock and
+executes the critical section, and could delay it indefinitely.
+
+Implementation
+--------------
+
+As mentioned before, the userspace fastpath of PI-enabled pthread
+mutexes involves no kernel work at all - they behave quite similarly to
+normal futex-based locks: a 0 value means unlocked, and a value==TID
+means locked. (This is the same method as used by list-based robust
+futexes.) Userspace uses atomic ops to lock/unlock these mutexes without
+entering the kernel.
+
+To handle the slowpath, we have added two new futex ops:
+
+  - FUTEX_LOCK_PI
+  - FUTEX_UNLOCK_PI
+
+If the lock-acquire fastpath fails, [i.e. an atomic transition from 0 to
+TID fails], then FUTEX_LOCK_PI is called. The kernel does all the
+remaining work: if there is no futex-queue attached to the futex address
+yet then the code looks up the task that owns the futex [it has put its
+own TID into the futex value], and attaches a 'PI state' structure to
+the futex-queue. The pi_state includes an rt-mutex, which is a PI-aware,
+kernel-based synchronization object. The 'other' task is made the owner
+of the rt-mutex, and the FUTEX_WAITERS bit is atomically set in the
+futex value. Then this task tries to lock the rt-mutex, on which it
+blocks. Once it returns, it has the mutex acquired, and it sets the
+futex value to its own TID and returns. Userspace has no other work to
+perform - it now owns the lock, and futex value contains
+FUTEX_WAITERS|TID.
+
+If the unlock side fastpath succeeds, [i.e. userspace manages to do a
+TID -> 0 atomic transition of the futex value], then no kernel work is
+triggered.
+
+If the unlock fastpath fails (because the FUTEX_WAITERS bit is set),
+then FUTEX_UNLOCK_PI is called, and the kernel unlocks the futex on the
+behalf of userspace - and it also unlocks the attached
+pi_state->rt_mutex and thus wakes up any potential waiters.
+
+Note that under this approach, contrary to previous PI-futex approaches,
+there is no prior 'registration' of a PI-futex. [which is not quite
+possible anyway, due to existing ABI properties of pthread mutexes.]
+
+Also, under this scheme, 'robustness' and 'PI' are two orthogonal
+properties of futexes, and all four combinations are possible: futex,
+robust-futex, PI-futex, robust+PI-futex.
+
+More details about priority inheritance can be found in
+Documentation/locking/rt-mutex.rst.
diff --git a/Documentation/locking/preempt-locking.rst b/Documentation/locking/preempt-locking.rst
new file mode 100644
index 000000000000..dce336134e54
--- /dev/null
+++ b/Documentation/locking/preempt-locking.rst
@@ -0,0 +1,144 @@
+===========================================================================
+Proper Locking Under a Preemptible Kernel: Keeping Kernel Code Preempt-Safe
+===========================================================================
+
+:Author: Robert Love <rml@tech9.net>
+
+
+Introduction
+============
+
+
+A preemptible kernel creates new locking issues.  The issues are the same as
+those under SMP: concurrency and reentrancy.  Thankfully, the Linux preemptible
+kernel model leverages existing SMP locking mechanisms.  Thus, the kernel
+requires explicit additional locking for very few additional situations.
+
+This document is for all kernel hackers.  Developing code in the kernel
+requires protecting these situations.
+ 
+
+RULE #1: Per-CPU data structures need explicit protection
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+Two similar problems arise. An example code snippet::
+
+	struct this_needs_locking tux[NR_CPUS];
+	tux[smp_processor_id()] = some_value;
+	/* task is preempted here... */
+	something = tux[smp_processor_id()];
+
+First, since the data is per-CPU, it may not have explicit SMP locking, but
+require it otherwise.  Second, when a preempted task is finally rescheduled,
+the previous value of smp_processor_id may not equal the current.  You must
+protect these situations by disabling preemption around them.
+
+You can also use put_cpu() and get_cpu(), which will disable preemption.
+
+
+RULE #2: CPU state must be protected.
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+Under preemption, the state of the CPU must be protected.  This is arch-
+dependent, but includes CPU structures and state not preserved over a context
+switch.  For example, on x86, entering and exiting FPU mode is now a critical
+section that must occur while preemption is disabled.  Think what would happen
+if the kernel is executing a floating-point instruction and is then preempted.
+Remember, the kernel does not save FPU state except for user tasks.  Therefore,
+upon preemption, the FPU registers will be sold to the lowest bidder.  Thus,
+preemption must be disabled around such regions.
+
+Note, some FPU functions are already explicitly preempt safe.  For example,
+kernel_fpu_begin and kernel_fpu_end will disable and enable preemption.
+
+
+RULE #3: Lock acquire and release must be performed by same task
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+A lock acquired in one task must be released by the same task.  This
+means you can't do oddball things like acquire a lock and go off to
+play while another task releases it.  If you want to do something
+like this, acquire and release the task in the same code path and
+have the caller wait on an event by the other task.
+
+
+Solution
+========
+
+
+Data protection under preemption is achieved by disabling preemption for the
+duration of the critical region.
+
+::
+
+  preempt_enable()		decrement the preempt counter
+  preempt_disable()		increment the preempt counter
+  preempt_enable_no_resched()	decrement, but do not immediately preempt
+  preempt_check_resched()	if needed, reschedule
+  preempt_count()		return the preempt counter
+
+The functions are nestable.  In other words, you can call preempt_disable
+n-times in a code path, and preemption will not be reenabled until the n-th
+call to preempt_enable.  The preempt statements define to nothing if
+preemption is not enabled.
+
+Note that you do not need to explicitly prevent preemption if you are holding
+any locks or interrupts are disabled, since preemption is implicitly disabled
+in those cases.
+
+But keep in mind that 'irqs disabled' is a fundamentally unsafe way of
+disabling preemption - any cond_resched() or cond_resched_lock() might trigger
+a reschedule if the preempt count is 0. A simple printk() might trigger a
+reschedule. So use this implicit preemption-disabling property only if you
+know that the affected codepath does not do any of this. Best policy is to use
+this only for small, atomic code that you wrote and which calls no complex
+functions.
+
+Example::
+
+	cpucache_t *cc; /* this is per-CPU */
+	preempt_disable();
+	cc = cc_data(searchp);
+	if (cc && cc->avail) {
+		__free_block(searchp, cc_entry(cc), cc->avail);
+		cc->avail = 0;
+	}
+	preempt_enable();
+	return 0;
+
+Notice how the preemption statements must encompass every reference of the
+critical variables.  Another example::
+
+	int buf[NR_CPUS];
+	set_cpu_val(buf);
+	if (buf[smp_processor_id()] == -1) printf(KERN_INFO "wee!\n");
+	spin_lock(&buf_lock);
+	/* ... */
+
+This code is not preempt-safe, but see how easily we can fix it by simply
+moving the spin_lock up two lines.
+
+
+Preventing preemption using interrupt disabling
+===============================================
+
+
+It is possible to prevent a preemption event using local_irq_disable and
+local_irq_save.  Note, when doing so, you must be very careful to not cause
+an event that would set need_resched and result in a preemption check.  When
+in doubt, rely on locking or explicit preemption disabling.
+
+Note in 2.5 interrupt disabling is now only per-CPU (e.g. local).
+
+An additional concern is proper usage of local_irq_disable and local_irq_save.
+These may be used to protect from preemption, however, on exit, if preemption
+may be enabled, a test to see if preemption is required should be done.  If
+these are called from the spin_lock and read/write lock macros, the right thing
+is done.  They may also be called within a spin-lock protected region, however,
+if they are ever called outside of this context, a test for preemption should
+be made. Do note that calls from interrupt context or bottom half/ tasklets
+are also protected by preemption locks and so may use the versions which do
+not check preemption.
diff --git a/Documentation/locking/robust-futex-ABI.rst b/Documentation/locking/robust-futex-ABI.rst
new file mode 100644
index 000000000000..f24904f1c16f
--- /dev/null
+++ b/Documentation/locking/robust-futex-ABI.rst
@@ -0,0 +1,184 @@
+====================
+The robust futex ABI
+====================
+
+:Author: Started by Paul Jackson <pj@sgi.com>
+
+
+Robust_futexes provide a mechanism that is used in addition to normal
+futexes, for kernel assist of cleanup of held locks on task exit.
+
+The interesting data as to what futexes a thread is holding is kept on a
+linked list in user space, where it can be updated efficiently as locks
+are taken and dropped, without kernel intervention.  The only additional
+kernel intervention required for robust_futexes above and beyond what is
+required for futexes is:
+
+ 1) a one time call, per thread, to tell the kernel where its list of
+    held robust_futexes begins, and
+ 2) internal kernel code at exit, to handle any listed locks held
+    by the exiting thread.
+
+The existing normal futexes already provide a "Fast Userspace Locking"
+mechanism, which handles uncontested locking without needing a system
+call, and handles contested locking by maintaining a list of waiting
+threads in the kernel.  Options on the sys_futex(2) system call support
+waiting on a particular futex, and waking up the next waiter on a
+particular futex.
+
+For robust_futexes to work, the user code (typically in a library such
+as glibc linked with the application) has to manage and place the
+necessary list elements exactly as the kernel expects them.  If it fails
+to do so, then improperly listed locks will not be cleaned up on exit,
+probably causing deadlock or other such failure of the other threads
+waiting on the same locks.
+
+A thread that anticipates possibly using robust_futexes should first
+issue the system call::
+
+    asmlinkage long
+    sys_set_robust_list(struct robust_list_head __user *head, size_t len);
+
+The pointer 'head' points to a structure in the threads address space
+consisting of three words.  Each word is 32 bits on 32 bit arch's, or 64
+bits on 64 bit arch's, and local byte order.  Each thread should have
+its own thread private 'head'.
+
+If a thread is running in 32 bit compatibility mode on a 64 native arch
+kernel, then it can actually have two such structures - one using 32 bit
+words for 32 bit compatibility mode, and one using 64 bit words for 64
+bit native mode.  The kernel, if it is a 64 bit kernel supporting 32 bit
+compatibility mode, will attempt to process both lists on each task
+exit, if the corresponding sys_set_robust_list() call has been made to
+setup that list.
+
+  The first word in the memory structure at 'head' contains a
+  pointer to a single linked list of 'lock entries', one per lock,
+  as described below.  If the list is empty, the pointer will point
+  to itself, 'head'.  The last 'lock entry' points back to the 'head'.
+
+  The second word, called 'offset', specifies the offset from the
+  address of the associated 'lock entry', plus or minus, of what will
+  be called the 'lock word', from that 'lock entry'.  The 'lock word'
+  is always a 32 bit word, unlike the other words above.  The 'lock
+  word' holds 2 flag bits in the upper 2 bits, and the thread id (TID)
+  of the thread holding the lock in the bottom 30 bits.  See further
+  below for a description of the flag bits.
+
+  The third word, called 'list_op_pending', contains transient copy of
+  the address of the 'lock entry', during list insertion and removal,
+  and is needed to correctly resolve races should a thread exit while
+  in the middle of a locking or unlocking operation.
+
+Each 'lock entry' on the single linked list starting at 'head' consists
+of just a single word, pointing to the next 'lock entry', or back to
+'head' if there are no more entries.  In addition, nearby to each 'lock
+entry', at an offset from the 'lock entry' specified by the 'offset'
+word, is one 'lock word'.
+
+The 'lock word' is always 32 bits, and is intended to be the same 32 bit
+lock variable used by the futex mechanism, in conjunction with
+robust_futexes.  The kernel will only be able to wakeup the next thread
+waiting for a lock on a threads exit if that next thread used the futex
+mechanism to register the address of that 'lock word' with the kernel.
+
+For each futex lock currently held by a thread, if it wants this
+robust_futex support for exit cleanup of that lock, it should have one
+'lock entry' on this list, with its associated 'lock word' at the
+specified 'offset'.  Should a thread die while holding any such locks,
+the kernel will walk this list, mark any such locks with a bit
+indicating their holder died, and wakeup the next thread waiting for
+that lock using the futex mechanism.
+
+When a thread has invoked the above system call to indicate it
+anticipates using robust_futexes, the kernel stores the passed in 'head'
+pointer for that task.  The task may retrieve that value later on by
+using the system call::
+
+    asmlinkage long
+    sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
+                        size_t __user *len_ptr);
+
+It is anticipated that threads will use robust_futexes embedded in
+larger, user level locking structures, one per lock.  The kernel
+robust_futex mechanism doesn't care what else is in that structure, so
+long as the 'offset' to the 'lock word' is the same for all
+robust_futexes used by that thread.  The thread should link those locks
+it currently holds using the 'lock entry' pointers.  It may also have
+other links between the locks, such as the reverse side of a double
+linked list, but that doesn't matter to the kernel.
+
+By keeping its locks linked this way, on a list starting with a 'head'
+pointer known to the kernel, the kernel can provide to a thread the
+essential service available for robust_futexes, which is to help clean
+up locks held at the time of (a perhaps unexpectedly) exit.
+
+Actual locking and unlocking, during normal operations, is handled
+entirely by user level code in the contending threads, and by the
+existing futex mechanism to wait for, and wakeup, locks.  The kernels
+only essential involvement in robust_futexes is to remember where the
+list 'head' is, and to walk the list on thread exit, handling locks
+still held by the departing thread, as described below.
+
+There may exist thousands of futex lock structures in a threads shared
+memory, on various data structures, at a given point in time. Only those
+lock structures for locks currently held by that thread should be on
+that thread's robust_futex linked lock list a given time.
+
+A given futex lock structure in a user shared memory region may be held
+at different times by any of the threads with access to that region. The
+thread currently holding such a lock, if any, is marked with the threads
+TID in the lower 30 bits of the 'lock word'.
+
+When adding or removing a lock from its list of held locks, in order for
+the kernel to correctly handle lock cleanup regardless of when the task
+exits (perhaps it gets an unexpected signal 9 in the middle of
+manipulating this list), the user code must observe the following
+protocol on 'lock entry' insertion and removal:
+
+On insertion:
+
+ 1) set the 'list_op_pending' word to the address of the 'lock entry'
+    to be inserted,
+ 2) acquire the futex lock,
+ 3) add the lock entry, with its thread id (TID) in the bottom 30 bits
+    of the 'lock word', to the linked list starting at 'head', and
+ 4) clear the 'list_op_pending' word.
+
+On removal:
+
+ 1) set the 'list_op_pending' word to the address of the 'lock entry'
+    to be removed,
+ 2) remove the lock entry for this lock from the 'head' list,
+ 3) release the futex lock, and
+ 4) clear the 'lock_op_pending' word.
+
+On exit, the kernel will consider the address stored in
+'list_op_pending' and the address of each 'lock word' found by walking
+the list starting at 'head'.  For each such address, if the bottom 30
+bits of the 'lock word' at offset 'offset' from that address equals the
+exiting threads TID, then the kernel will do two things:
+
+ 1) if bit 31 (0x80000000) is set in that word, then attempt a futex
+    wakeup on that address, which will waken the next thread that has
+    used to the futex mechanism to wait on that address, and
+ 2) atomically set  bit 30 (0x40000000) in the 'lock word'.
+
+In the above, bit 31 was set by futex waiters on that lock to indicate
+they were waiting, and bit 30 is set by the kernel to indicate that the
+lock owner died holding the lock.
+
+The kernel exit code will silently stop scanning the list further if at
+any point:
+
+ 1) the 'head' pointer or an subsequent linked list pointer
+    is not a valid address of a user space word
+ 2) the calculated location of the 'lock word' (address plus
+    'offset') is not the valid address of a 32 bit user space
+    word
+ 3) if the list contains more than 1 million (subject to
+    future kernel configuration changes) elements.
+
+When the kernel sees a list entry whose 'lock word' doesn't have the
+current threads TID in the lower 30 bits, it does nothing with that
+entry, and goes on to the next entry.
diff --git a/Documentation/locking/robust-futexes.rst b/Documentation/locking/robust-futexes.rst
new file mode 100644
index 000000000000..6361fb01c9c1
--- /dev/null
+++ b/Documentation/locking/robust-futexes.rst
@@ -0,0 +1,221 @@
+========================================
+A description of what robust futexes are
+========================================
+
+:Started by: Ingo Molnar <mingo@redhat.com>
+
+Background
+----------
+
+what are robust futexes? To answer that, we first need to understand
+what futexes are: normal futexes are special types of locks that in the
+noncontended case can be acquired/released from userspace without having
+to enter the kernel.
+
+A futex is in essence a user-space address, e.g. a 32-bit lock variable
+field. If userspace notices contention (the lock is already owned and
+someone else wants to grab it too) then the lock is marked with a value
+that says "there's a waiter pending", and the sys_futex(FUTEX_WAIT)
+syscall is used to wait for the other guy to release it. The kernel
+creates a 'futex queue' internally, so that it can later on match up the
+waiter with the waker - without them having to know about each other.
+When the owner thread releases the futex, it notices (via the variable
+value) that there were waiter(s) pending, and does the
+sys_futex(FUTEX_WAKE) syscall to wake them up.  Once all waiters have
+taken and released the lock, the futex is again back to 'uncontended'
+state, and there's no in-kernel state associated with it. The kernel
+completely forgets that there ever was a futex at that address. This
+method makes futexes very lightweight and scalable.
+
+"Robustness" is about dealing with crashes while holding a lock: if a
+process exits prematurely while holding a pthread_mutex_t lock that is
+also shared with some other process (e.g. yum segfaults while holding a
+pthread_mutex_t, or yum is kill -9-ed), then waiters for that lock need
+to be notified that the last owner of the lock exited in some irregular
+way.
+
+To solve such types of problems, "robust mutex" userspace APIs were
+created: pthread_mutex_lock() returns an error value if the owner exits
+prematurely - and the new owner can decide whether the data protected by
+the lock can be recovered safely.
+
+There is a big conceptual problem with futex based mutexes though: it is
+the kernel that destroys the owner task (e.g. due to a SEGFAULT), but
+the kernel cannot help with the cleanup: if there is no 'futex queue'
+(and in most cases there is none, futexes being fast lightweight locks)
+then the kernel has no information to clean up after the held lock!
+Userspace has no chance to clean up after the lock either - userspace is
+the one that crashes, so it has no opportunity to clean up. Catch-22.
+
+In practice, when e.g. yum is kill -9-ed (or segfaults), a system reboot
+is needed to release that futex based lock. This is one of the leading
+bugreports against yum.
+
+To solve this problem, the traditional approach was to extend the vma
+(virtual memory area descriptor) concept to have a notion of 'pending
+robust futexes attached to this area'. This approach requires 3 new
+syscall variants to sys_futex(): FUTEX_REGISTER, FUTEX_DEREGISTER and
+FUTEX_RECOVER. At do_exit() time, all vmas are searched to see whether
+they have a robust_head set. This approach has two fundamental problems
+left:
+
+ - it has quite complex locking and race scenarios. The vma-based
+   approach had been pending for years, but they are still not completely
+   reliable.
+
+ - they have to scan _every_ vma at sys_exit() time, per thread!
+
+The second disadvantage is a real killer: pthread_exit() takes around 1
+microsecond on Linux, but with thousands (or tens of thousands) of vmas
+every pthread_exit() takes a millisecond or more, also totally
+destroying the CPU's L1 and L2 caches!
+
+This is very much noticeable even for normal process sys_exit_group()
+calls: the kernel has to do the vma scanning unconditionally! (this is
+because the kernel has no knowledge about how many robust futexes there
+are to be cleaned up, because a robust futex might have been registered
+in another task, and the futex variable might have been simply mmap()-ed
+into this process's address space).
+
+This huge overhead forced the creation of CONFIG_FUTEX_ROBUST so that
+normal kernels can turn it off, but worse than that: the overhead makes
+robust futexes impractical for any type of generic Linux distribution.
+
+So something had to be done.
+
+New approach to robust futexes
+------------------------------
+
+At the heart of this new approach there is a per-thread private list of
+robust locks that userspace is holding (maintained by glibc) - which
+userspace list is registered with the kernel via a new syscall [this
+registration happens at most once per thread lifetime]. At do_exit()
+time, the kernel checks this user-space list: are there any robust futex
+locks to be cleaned up?
+
+In the common case, at do_exit() time, there is no list registered, so
+the cost of robust futexes is just a simple current->robust_list != NULL
+comparison. If the thread has registered a list, then normally the list
+is empty. If the thread/process crashed or terminated in some incorrect
+way then the list might be non-empty: in this case the kernel carefully
+walks the list [not trusting it], and marks all locks that are owned by
+this thread with the FUTEX_OWNER_DIED bit, and wakes up one waiter (if
+any).
+
+The list is guaranteed to be private and per-thread at do_exit() time,
+so it can be accessed by the kernel in a lockless way.
+
+There is one race possible though: since adding to and removing from the
+list is done after the futex is acquired by glibc, there is a few
+instructions window for the thread (or process) to die there, leaving
+the futex hung. To protect against this possibility, userspace (glibc)
+also maintains a simple per-thread 'list_op_pending' field, to allow the
+kernel to clean up if the thread dies after acquiring the lock, but just
+before it could have added itself to the list. Glibc sets this
+list_op_pending field before it tries to acquire the futex, and clears
+it after the list-add (or list-remove) has finished.
+
+That's all that is needed - all the rest of robust-futex cleanup is done
+in userspace [just like with the previous patches].
+
+Ulrich Drepper has implemented the necessary glibc support for this new
+mechanism, which fully enables robust mutexes.
+
+Key differences of this userspace-list based approach, compared to the
+vma based method:
+
+ - it's much, much faster: at thread exit time, there's no need to loop
+   over every vma (!), which the VM-based method has to do. Only a very
+   simple 'is the list empty' op is done.
+
+ - no VM changes are needed - 'struct address_space' is left alone.
+
+ - no registration of individual locks is needed: robust mutexes don't
+   need any extra per-lock syscalls. Robust mutexes thus become a very
+   lightweight primitive - so they don't force the application designer
+   to do a hard choice between performance and robustness - robust
+   mutexes are just as fast.
+
+ - no per-lock kernel allocation happens.
+
+ - no resource limits are needed.
+
+ - no kernel-space recovery call (FUTEX_RECOVER) is needed.
+
+ - the implementation and the locking is "obvious", and there are no
+   interactions with the VM.
+
+Performance
+-----------
+
+I have benchmarked the time needed for the kernel to process a list of 1
+million (!) held locks, using the new method [on a 2GHz CPU]:
+
+ - with FUTEX_WAIT set [contended mutex]: 130 msecs
+ - without FUTEX_WAIT set [uncontended mutex]: 30 msecs
+
+I have also measured an approach where glibc does the lock notification
+[which it currently does for !pshared robust mutexes], and that took 256
+msecs - clearly slower, due to the 1 million FUTEX_WAKE syscalls
+userspace had to do.
+
+(1 million held locks are unheard of - we expect at most a handful of
+locks to be held at a time. Nevertheless it's nice to know that this
+approach scales nicely.)
+
+Implementation details
+----------------------
+
+The patch adds two new syscalls: one to register the userspace list, and
+one to query the registered list pointer::
+
+ asmlinkage long
+ sys_set_robust_list(struct robust_list_head __user *head,
+                     size_t len);
+
+ asmlinkage long
+ sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
+                     size_t __user *len_ptr);
+
+List registration is very fast: the pointer is simply stored in
+current->robust_list. [Note that in the future, if robust futexes become
+widespread, we could extend sys_clone() to register a robust-list head
+for new threads, without the need of another syscall.]
+
+So there is virtually zero overhead for tasks not using robust futexes,
+and even for robust futex users, there is only one extra syscall per
+thread lifetime, and the cleanup operation, if it happens, is fast and
+straightforward. The kernel doesn't have any internal distinction between
+robust and normal futexes.
+
+If a futex is found to be held at exit time, the kernel sets the
+following bit of the futex word::
+
+	#define FUTEX_OWNER_DIED        0x40000000
+
+and wakes up the next futex waiter (if any). User-space does the rest of
+the cleanup.
+
+Otherwise, robust futexes are acquired by glibc by putting the TID into
+the futex field atomically. Waiters set the FUTEX_WAITERS bit::
+
+	#define FUTEX_WAITERS           0x80000000
+
+and the remaining bits are for the TID.
+
+Testing, architecture support
+-----------------------------
+
+I've tested the new syscalls on x86 and x86_64, and have made sure the
+parsing of the userspace list is robust [ ;-) ] even if the list is
+deliberately corrupted.
+
+i386 and x86_64 syscalls are wired up at the moment, and Ulrich has
+tested the new glibc code (on x86_64 and i386), and it works for his
+robust-mutex testcases.
+
+All other architectures should build just fine too - but they won't have
+the new syscalls yet.
+
+Architectures need to implement the new futex_atomic_cmpxchg_inatomic()
+inline function before writing up the syscalls.
diff --git a/Documentation/locking/rt-mutex.rst b/Documentation/locking/rt-mutex.rst
index c365dc302081..3b5097a380e6 100644
--- a/Documentation/locking/rt-mutex.rst
+++ b/Documentation/locking/rt-mutex.rst
@@ -4,7 +4,7 @@ RT-mutex subsystem with PI support
 
 RT-mutexes with priority inheritance are used to support PI-futexes,
 which enable pthread_mutex_t priority inheritance attributes
-(PTHREAD_PRIO_INHERIT). [See Documentation/pi-futex.txt for more details
+(PTHREAD_PRIO_INHERIT). [See Documentation/locking/pi-futex.rst for more details
 about PI-futexes.]
 
 This technology was developed in the -rt tree and streamlined for
diff --git a/Documentation/percpu-rw-semaphore.txt b/Documentation/percpu-rw-semaphore.txt
deleted file mode 100644
index 247de6410855..000000000000
--- a/Documentation/percpu-rw-semaphore.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-====================
-Percpu rw semaphores
-====================
-
-Percpu rw semaphores is a new read-write semaphore design that is
-optimized for locking for reading.
-
-The problem with traditional read-write semaphores is that when multiple
-cores take the lock for reading, the cache line containing the semaphore
-is bouncing between L1 caches of the cores, causing performance
-degradation.
-
-Locking for reading is very fast, it uses RCU and it avoids any atomic
-instruction in the lock and unlock path. On the other hand, locking for
-writing is very expensive, it calls synchronize_rcu() that can take
-hundreds of milliseconds.
-
-The lock is declared with "struct percpu_rw_semaphore" type.
-The lock is initialized percpu_init_rwsem, it returns 0 on success and
--ENOMEM on allocation failure.
-The lock must be freed with percpu_free_rwsem to avoid memory leak.
-
-The lock is locked for read with percpu_down_read, percpu_up_read and
-for write with percpu_down_write, percpu_up_write.
-
-The idea of using RCU for optimized rw-lock was introduced by
-Eric Dumazet <eric.dumazet@gmail.com>.
-The code was written by Mikulas Patocka <mpatocka@redhat.com>
diff --git a/Documentation/pi-futex.txt b/Documentation/pi-futex.txt
deleted file mode 100644
index c33ba2befbf8..000000000000
--- a/Documentation/pi-futex.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-======================
-Lightweight PI-futexes
-======================
-
-We are calling them lightweight for 3 reasons:
-
- - in the user-space fastpath a PI-enabled futex involves no kernel work
-   (or any other PI complexity) at all. No registration, no extra kernel
-   calls - just pure fast atomic ops in userspace.
-
- - even in the slowpath, the system call and scheduling pattern is very
-   similar to normal futexes.
-
- - the in-kernel PI implementation is streamlined around the mutex
-   abstraction, with strict rules that keep the implementation
-   relatively simple: only a single owner may own a lock (i.e. no
-   read-write lock support), only the owner may unlock a lock, no
-   recursive locking, etc.
-
-Priority Inheritance - why?
----------------------------
-
-The short reply: user-space PI helps achieving/improving determinism for
-user-space applications. In the best-case, it can help achieve
-determinism and well-bound latencies. Even in the worst-case, PI will
-improve the statistical distribution of locking related application
-delays.
-
-The longer reply
-----------------
-
-Firstly, sharing locks between multiple tasks is a common programming
-technique that often cannot be replaced with lockless algorithms. As we
-can see it in the kernel [which is a quite complex program in itself],
-lockless structures are rather the exception than the norm - the current
-ratio of lockless vs. locky code for shared data structures is somewhere
-between 1:10 and 1:100. Lockless is hard, and the complexity of lockless
-algorithms often endangers to ability to do robust reviews of said code.
-I.e. critical RT apps often choose lock structures to protect critical
-data structures, instead of lockless algorithms. Furthermore, there are
-cases (like shared hardware, or other resource limits) where lockless
-access is mathematically impossible.
-
-Media players (such as Jack) are an example of reasonable application
-design with multiple tasks (with multiple priority levels) sharing
-short-held locks: for example, a highprio audio playback thread is
-combined with medium-prio construct-audio-data threads and low-prio
-display-colory-stuff threads. Add video and decoding to the mix and
-we've got even more priority levels.
-
-So once we accept that synchronization objects (locks) are an
-unavoidable fact of life, and once we accept that multi-task userspace
-apps have a very fair expectation of being able to use locks, we've got
-to think about how to offer the option of a deterministic locking
-implementation to user-space.
-
-Most of the technical counter-arguments against doing priority
-inheritance only apply to kernel-space locks. But user-space locks are
-different, there we cannot disable interrupts or make the task
-non-preemptible in a critical section, so the 'use spinlocks' argument
-does not apply (user-space spinlocks have the same priority inversion
-problems as other user-space locking constructs). Fact is, pretty much
-the only technique that currently enables good determinism for userspace
-locks (such as futex-based pthread mutexes) is priority inheritance:
-
-Currently (without PI), if a high-prio and a low-prio task shares a lock
-[this is a quite common scenario for most non-trivial RT applications],
-even if all critical sections are coded carefully to be deterministic
-(i.e. all critical sections are short in duration and only execute a
-limited number of instructions), the kernel cannot guarantee any
-deterministic execution of the high-prio task: any medium-priority task
-could preempt the low-prio task while it holds the shared lock and
-executes the critical section, and could delay it indefinitely.
-
-Implementation
---------------
-
-As mentioned before, the userspace fastpath of PI-enabled pthread
-mutexes involves no kernel work at all - they behave quite similarly to
-normal futex-based locks: a 0 value means unlocked, and a value==TID
-means locked. (This is the same method as used by list-based robust
-futexes.) Userspace uses atomic ops to lock/unlock these mutexes without
-entering the kernel.
-
-To handle the slowpath, we have added two new futex ops:
-
-  - FUTEX_LOCK_PI
-  - FUTEX_UNLOCK_PI
-
-If the lock-acquire fastpath fails, [i.e. an atomic transition from 0 to
-TID fails], then FUTEX_LOCK_PI is called. The kernel does all the
-remaining work: if there is no futex-queue attached to the futex address
-yet then the code looks up the task that owns the futex [it has put its
-own TID into the futex value], and attaches a 'PI state' structure to
-the futex-queue. The pi_state includes an rt-mutex, which is a PI-aware,
-kernel-based synchronization object. The 'other' task is made the owner
-of the rt-mutex, and the FUTEX_WAITERS bit is atomically set in the
-futex value. Then this task tries to lock the rt-mutex, on which it
-blocks. Once it returns, it has the mutex acquired, and it sets the
-futex value to its own TID and returns. Userspace has no other work to
-perform - it now owns the lock, and futex value contains
-FUTEX_WAITERS|TID.
-
-If the unlock side fastpath succeeds, [i.e. userspace manages to do a
-TID -> 0 atomic transition of the futex value], then no kernel work is
-triggered.
-
-If the unlock fastpath fails (because the FUTEX_WAITERS bit is set),
-then FUTEX_UNLOCK_PI is called, and the kernel unlocks the futex on the
-behalf of userspace - and it also unlocks the attached
-pi_state->rt_mutex and thus wakes up any potential waiters.
-
-Note that under this approach, contrary to previous PI-futex approaches,
-there is no prior 'registration' of a PI-futex. [which is not quite
-possible anyway, due to existing ABI properties of pthread mutexes.]
-
-Also, under this scheme, 'robustness' and 'PI' are two orthogonal
-properties of futexes, and all four combinations are possible: futex,
-robust-futex, PI-futex, robust+PI-futex.
-
-More details about priority inheritance can be found in
-Documentation/locking/rt-mutex.rst.
diff --git a/Documentation/preempt-locking.txt b/Documentation/preempt-locking.txt
deleted file mode 100644
index dce336134e54..000000000000
--- a/Documentation/preempt-locking.txt
+++ /dev/null
@@ -1,144 +0,0 @@
-===========================================================================
-Proper Locking Under a Preemptible Kernel: Keeping Kernel Code Preempt-Safe
-===========================================================================
-
-:Author: Robert Love <rml@tech9.net>
-
-
-Introduction
-============
-
-
-A preemptible kernel creates new locking issues.  The issues are the same as
-those under SMP: concurrency and reentrancy.  Thankfully, the Linux preemptible
-kernel model leverages existing SMP locking mechanisms.  Thus, the kernel
-requires explicit additional locking for very few additional situations.
-
-This document is for all kernel hackers.  Developing code in the kernel
-requires protecting these situations.
- 
-
-RULE #1: Per-CPU data structures need explicit protection
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-
-Two similar problems arise. An example code snippet::
-
-	struct this_needs_locking tux[NR_CPUS];
-	tux[smp_processor_id()] = some_value;
-	/* task is preempted here... */
-	something = tux[smp_processor_id()];
-
-First, since the data is per-CPU, it may not have explicit SMP locking, but
-require it otherwise.  Second, when a preempted task is finally rescheduled,
-the previous value of smp_processor_id may not equal the current.  You must
-protect these situations by disabling preemption around them.
-
-You can also use put_cpu() and get_cpu(), which will disable preemption.
-
-
-RULE #2: CPU state must be protected.
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-
-Under preemption, the state of the CPU must be protected.  This is arch-
-dependent, but includes CPU structures and state not preserved over a context
-switch.  For example, on x86, entering and exiting FPU mode is now a critical
-section that must occur while preemption is disabled.  Think what would happen
-if the kernel is executing a floating-point instruction and is then preempted.
-Remember, the kernel does not save FPU state except for user tasks.  Therefore,
-upon preemption, the FPU registers will be sold to the lowest bidder.  Thus,
-preemption must be disabled around such regions.
-
-Note, some FPU functions are already explicitly preempt safe.  For example,
-kernel_fpu_begin and kernel_fpu_end will disable and enable preemption.
-
-
-RULE #3: Lock acquire and release must be performed by same task
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-
-A lock acquired in one task must be released by the same task.  This
-means you can't do oddball things like acquire a lock and go off to
-play while another task releases it.  If you want to do something
-like this, acquire and release the task in the same code path and
-have the caller wait on an event by the other task.
-
-
-Solution
-========
-
-
-Data protection under preemption is achieved by disabling preemption for the
-duration of the critical region.
-
-::
-
-  preempt_enable()		decrement the preempt counter
-  preempt_disable()		increment the preempt counter
-  preempt_enable_no_resched()	decrement, but do not immediately preempt
-  preempt_check_resched()	if needed, reschedule
-  preempt_count()		return the preempt counter
-
-The functions are nestable.  In other words, you can call preempt_disable
-n-times in a code path, and preemption will not be reenabled until the n-th
-call to preempt_enable.  The preempt statements define to nothing if
-preemption is not enabled.
-
-Note that you do not need to explicitly prevent preemption if you are holding
-any locks or interrupts are disabled, since preemption is implicitly disabled
-in those cases.
-
-But keep in mind that 'irqs disabled' is a fundamentally unsafe way of
-disabling preemption - any cond_resched() or cond_resched_lock() might trigger
-a reschedule if the preempt count is 0. A simple printk() might trigger a
-reschedule. So use this implicit preemption-disabling property only if you
-know that the affected codepath does not do any of this. Best policy is to use
-this only for small, atomic code that you wrote and which calls no complex
-functions.
-
-Example::
-
-	cpucache_t *cc; /* this is per-CPU */
-	preempt_disable();
-	cc = cc_data(searchp);
-	if (cc && cc->avail) {
-		__free_block(searchp, cc_entry(cc), cc->avail);
-		cc->avail = 0;
-	}
-	preempt_enable();
-	return 0;
-
-Notice how the preemption statements must encompass every reference of the
-critical variables.  Another example::
-
-	int buf[NR_CPUS];
-	set_cpu_val(buf);
-	if (buf[smp_processor_id()] == -1) printf(KERN_INFO "wee!\n");
-	spin_lock(&buf_lock);
-	/* ... */
-
-This code is not preempt-safe, but see how easily we can fix it by simply
-moving the spin_lock up two lines.
-
-
-Preventing preemption using interrupt disabling
-===============================================
-
-
-It is possible to prevent a preemption event using local_irq_disable and
-local_irq_save.  Note, when doing so, you must be very careful to not cause
-an event that would set need_resched and result in a preemption check.  When
-in doubt, rely on locking or explicit preemption disabling.
-
-Note in 2.5 interrupt disabling is now only per-CPU (e.g. local).
-
-An additional concern is proper usage of local_irq_disable and local_irq_save.
-These may be used to protect from preemption, however, on exit, if preemption
-may be enabled, a test to see if preemption is required should be done.  If
-these are called from the spin_lock and read/write lock macros, the right thing
-is done.  They may also be called within a spin-lock protected region, however,
-if they are ever called outside of this context, a test for preemption should
-be made. Do note that calls from interrupt context or bottom half/ tasklets
-are also protected by preemption locks and so may use the versions which do
-not check preemption.
diff --git a/Documentation/robust-futex-ABI.txt b/Documentation/robust-futex-ABI.txt
deleted file mode 100644
index f24904f1c16f..000000000000
--- a/Documentation/robust-futex-ABI.txt
+++ /dev/null
@@ -1,184 +0,0 @@
-====================
-The robust futex ABI
-====================
-
-:Author: Started by Paul Jackson <pj@sgi.com>
-
-
-Robust_futexes provide a mechanism that is used in addition to normal
-futexes, for kernel assist of cleanup of held locks on task exit.
-
-The interesting data as to what futexes a thread is holding is kept on a
-linked list in user space, where it can be updated efficiently as locks
-are taken and dropped, without kernel intervention.  The only additional
-kernel intervention required for robust_futexes above and beyond what is
-required for futexes is:
-
- 1) a one time call, per thread, to tell the kernel where its list of
-    held robust_futexes begins, and
- 2) internal kernel code at exit, to handle any listed locks held
-    by the exiting thread.
-
-The existing normal futexes already provide a "Fast Userspace Locking"
-mechanism, which handles uncontested locking without needing a system
-call, and handles contested locking by maintaining a list of waiting
-threads in the kernel.  Options on the sys_futex(2) system call support
-waiting on a particular futex, and waking up the next waiter on a
-particular futex.
-
-For robust_futexes to work, the user code (typically in a library such
-as glibc linked with the application) has to manage and place the
-necessary list elements exactly as the kernel expects them.  If it fails
-to do so, then improperly listed locks will not be cleaned up on exit,
-probably causing deadlock or other such failure of the other threads
-waiting on the same locks.
-
-A thread that anticipates possibly using robust_futexes should first
-issue the system call::
-
-    asmlinkage long
-    sys_set_robust_list(struct robust_list_head __user *head, size_t len);
-
-The pointer 'head' points to a structure in the threads address space
-consisting of three words.  Each word is 32 bits on 32 bit arch's, or 64
-bits on 64 bit arch's, and local byte order.  Each thread should have
-its own thread private 'head'.
-
-If a thread is running in 32 bit compatibility mode on a 64 native arch
-kernel, then it can actually have two such structures - one using 32 bit
-words for 32 bit compatibility mode, and one using 64 bit words for 64
-bit native mode.  The kernel, if it is a 64 bit kernel supporting 32 bit
-compatibility mode, will attempt to process both lists on each task
-exit, if the corresponding sys_set_robust_list() call has been made to
-setup that list.
-
-  The first word in the memory structure at 'head' contains a
-  pointer to a single linked list of 'lock entries', one per lock,
-  as described below.  If the list is empty, the pointer will point
-  to itself, 'head'.  The last 'lock entry' points back to the 'head'.
-
-  The second word, called 'offset', specifies the offset from the
-  address of the associated 'lock entry', plus or minus, of what will
-  be called the 'lock word', from that 'lock entry'.  The 'lock word'
-  is always a 32 bit word, unlike the other words above.  The 'lock
-  word' holds 2 flag bits in the upper 2 bits, and the thread id (TID)
-  of the thread holding the lock in the bottom 30 bits.  See further
-  below for a description of the flag bits.
-
-  The third word, called 'list_op_pending', contains transient copy of
-  the address of the 'lock entry', during list insertion and removal,
-  and is needed to correctly resolve races should a thread exit while
-  in the middle of a locking or unlocking operation.
-
-Each 'lock entry' on the single linked list starting at 'head' consists
-of just a single word, pointing to the next 'lock entry', or back to
-'head' if there are no more entries.  In addition, nearby to each 'lock
-entry', at an offset from the 'lock entry' specified by the 'offset'
-word, is one 'lock word'.
-
-The 'lock word' is always 32 bits, and is intended to be the same 32 bit
-lock variable used by the futex mechanism, in conjunction with
-robust_futexes.  The kernel will only be able to wakeup the next thread
-waiting for a lock on a threads exit if that next thread used the futex
-mechanism to register the address of that 'lock word' with the kernel.
-
-For each futex lock currently held by a thread, if it wants this
-robust_futex support for exit cleanup of that lock, it should have one
-'lock entry' on this list, with its associated 'lock word' at the
-specified 'offset'.  Should a thread die while holding any such locks,
-the kernel will walk this list, mark any such locks with a bit
-indicating their holder died, and wakeup the next thread waiting for
-that lock using the futex mechanism.
-
-When a thread has invoked the above system call to indicate it
-anticipates using robust_futexes, the kernel stores the passed in 'head'
-pointer for that task.  The task may retrieve that value later on by
-using the system call::
-
-    asmlinkage long
-    sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
-                        size_t __user *len_ptr);
-
-It is anticipated that threads will use robust_futexes embedded in
-larger, user level locking structures, one per lock.  The kernel
-robust_futex mechanism doesn't care what else is in that structure, so
-long as the 'offset' to the 'lock word' is the same for all
-robust_futexes used by that thread.  The thread should link those locks
-it currently holds using the 'lock entry' pointers.  It may also have
-other links between the locks, such as the reverse side of a double
-linked list, but that doesn't matter to the kernel.
-
-By keeping its locks linked this way, on a list starting with a 'head'
-pointer known to the kernel, the kernel can provide to a thread the
-essential service available for robust_futexes, which is to help clean
-up locks held at the time of (a perhaps unexpectedly) exit.
-
-Actual locking and unlocking, during normal operations, is handled
-entirely by user level code in the contending threads, and by the
-existing futex mechanism to wait for, and wakeup, locks.  The kernels
-only essential involvement in robust_futexes is to remember where the
-list 'head' is, and to walk the list on thread exit, handling locks
-still held by the departing thread, as described below.
-
-There may exist thousands of futex lock structures in a threads shared
-memory, on various data structures, at a given point in time. Only those
-lock structures for locks currently held by that thread should be on
-that thread's robust_futex linked lock list a given time.
-
-A given futex lock structure in a user shared memory region may be held
-at different times by any of the threads with access to that region. The
-thread currently holding such a lock, if any, is marked with the threads
-TID in the lower 30 bits of the 'lock word'.
-
-When adding or removing a lock from its list of held locks, in order for
-the kernel to correctly handle lock cleanup regardless of when the task
-exits (perhaps it gets an unexpected signal 9 in the middle of
-manipulating this list), the user code must observe the following
-protocol on 'lock entry' insertion and removal:
-
-On insertion:
-
- 1) set the 'list_op_pending' word to the address of the 'lock entry'
-    to be inserted,
- 2) acquire the futex lock,
- 3) add the lock entry, with its thread id (TID) in the bottom 30 bits
-    of the 'lock word', to the linked list starting at 'head', and
- 4) clear the 'list_op_pending' word.
-
-On removal:
-
- 1) set the 'list_op_pending' word to the address of the 'lock entry'
-    to be removed,
- 2) remove the lock entry for this lock from the 'head' list,
- 3) release the futex lock, and
- 4) clear the 'lock_op_pending' word.
-
-On exit, the kernel will consider the address stored in
-'list_op_pending' and the address of each 'lock word' found by walking
-the list starting at 'head'.  For each such address, if the bottom 30
-bits of the 'lock word' at offset 'offset' from that address equals the
-exiting threads TID, then the kernel will do two things:
-
- 1) if bit 31 (0x80000000) is set in that word, then attempt a futex
-    wakeup on that address, which will waken the next thread that has
-    used to the futex mechanism to wait on that address, and
- 2) atomically set  bit 30 (0x40000000) in the 'lock word'.
-
-In the above, bit 31 was set by futex waiters on that lock to indicate
-they were waiting, and bit 30 is set by the kernel to indicate that the
-lock owner died holding the lock.
-
-The kernel exit code will silently stop scanning the list further if at
-any point:
-
- 1) the 'head' pointer or an subsequent linked list pointer
-    is not a valid address of a user space word
- 2) the calculated location of the 'lock word' (address plus
-    'offset') is not the valid address of a 32 bit user space
-    word
- 3) if the list contains more than 1 million (subject to
-    future kernel configuration changes) elements.
-
-When the kernel sees a list entry whose 'lock word' doesn't have the
-current threads TID in the lower 30 bits, it does nothing with that
-entry, and goes on to the next entry.
diff --git a/Documentation/robust-futexes.txt b/Documentation/robust-futexes.txt
deleted file mode 100644
index 6361fb01c9c1..000000000000
--- a/Documentation/robust-futexes.txt
+++ /dev/null
@@ -1,221 +0,0 @@
-========================================
-A description of what robust futexes are
-========================================
-
-:Started by: Ingo Molnar <mingo@redhat.com>
-
-Background
-----------
-
-what are robust futexes? To answer that, we first need to understand
-what futexes are: normal futexes are special types of locks that in the
-noncontended case can be acquired/released from userspace without having
-to enter the kernel.
-
-A futex is in essence a user-space address, e.g. a 32-bit lock variable
-field. If userspace notices contention (the lock is already owned and
-someone else wants to grab it too) then the lock is marked with a value
-that says "there's a waiter pending", and the sys_futex(FUTEX_WAIT)
-syscall is used to wait for the other guy to release it. The kernel
-creates a 'futex queue' internally, so that it can later on match up the
-waiter with the waker - without them having to know about each other.
-When the owner thread releases the futex, it notices (via the variable
-value) that there were waiter(s) pending, and does the
-sys_futex(FUTEX_WAKE) syscall to wake them up.  Once all waiters have
-taken and released the lock, the futex is again back to 'uncontended'
-state, and there's no in-kernel state associated with it. The kernel
-completely forgets that there ever was a futex at that address. This
-method makes futexes very lightweight and scalable.
-
-"Robustness" is about dealing with crashes while holding a lock: if a
-process exits prematurely while holding a pthread_mutex_t lock that is
-also shared with some other process (e.g. yum segfaults while holding a
-pthread_mutex_t, or yum is kill -9-ed), then waiters for that lock need
-to be notified that the last owner of the lock exited in some irregular
-way.
-
-To solve such types of problems, "robust mutex" userspace APIs were
-created: pthread_mutex_lock() returns an error value if the owner exits
-prematurely - and the new owner can decide whether the data protected by
-the lock can be recovered safely.
-
-There is a big conceptual problem with futex based mutexes though: it is
-the kernel that destroys the owner task (e.g. due to a SEGFAULT), but
-the kernel cannot help with the cleanup: if there is no 'futex queue'
-(and in most cases there is none, futexes being fast lightweight locks)
-then the kernel has no information to clean up after the held lock!
-Userspace has no chance to clean up after the lock either - userspace is
-the one that crashes, so it has no opportunity to clean up. Catch-22.
-
-In practice, when e.g. yum is kill -9-ed (or segfaults), a system reboot
-is needed to release that futex based lock. This is one of the leading
-bugreports against yum.
-
-To solve this problem, the traditional approach was to extend the vma
-(virtual memory area descriptor) concept to have a notion of 'pending
-robust futexes attached to this area'. This approach requires 3 new
-syscall variants to sys_futex(): FUTEX_REGISTER, FUTEX_DEREGISTER and
-FUTEX_RECOVER. At do_exit() time, all vmas are searched to see whether
-they have a robust_head set. This approach has two fundamental problems
-left:
-
- - it has quite complex locking and race scenarios. The vma-based
-   approach had been pending for years, but they are still not completely
-   reliable.
-
- - they have to scan _every_ vma at sys_exit() time, per thread!
-
-The second disadvantage is a real killer: pthread_exit() takes around 1
-microsecond on Linux, but with thousands (or tens of thousands) of vmas
-every pthread_exit() takes a millisecond or more, also totally
-destroying the CPU's L1 and L2 caches!
-
-This is very much noticeable even for normal process sys_exit_group()
-calls: the kernel has to do the vma scanning unconditionally! (this is
-because the kernel has no knowledge about how many robust futexes there
-are to be cleaned up, because a robust futex might have been registered
-in another task, and the futex variable might have been simply mmap()-ed
-into this process's address space).
-
-This huge overhead forced the creation of CONFIG_FUTEX_ROBUST so that
-normal kernels can turn it off, but worse than that: the overhead makes
-robust futexes impractical for any type of generic Linux distribution.
-
-So something had to be done.
-
-New approach to robust futexes
-------------------------------
-
-At the heart of this new approach there is a per-thread private list of
-robust locks that userspace is holding (maintained by glibc) - which
-userspace list is registered with the kernel via a new syscall [this
-registration happens at most once per thread lifetime]. At do_exit()
-time, the kernel checks this user-space list: are there any robust futex
-locks to be cleaned up?
-
-In the common case, at do_exit() time, there is no list registered, so
-the cost of robust futexes is just a simple current->robust_list != NULL
-comparison. If the thread has registered a list, then normally the list
-is empty. If the thread/process crashed or terminated in some incorrect
-way then the list might be non-empty: in this case the kernel carefully
-walks the list [not trusting it], and marks all locks that are owned by
-this thread with the FUTEX_OWNER_DIED bit, and wakes up one waiter (if
-any).
-
-The list is guaranteed to be private and per-thread at do_exit() time,
-so it can be accessed by the kernel in a lockless way.
-
-There is one race possible though: since adding to and removing from the
-list is done after the futex is acquired by glibc, there is a few
-instructions window for the thread (or process) to die there, leaving
-the futex hung. To protect against this possibility, userspace (glibc)
-also maintains a simple per-thread 'list_op_pending' field, to allow the
-kernel to clean up if the thread dies after acquiring the lock, but just
-before it could have added itself to the list. Glibc sets this
-list_op_pending field before it tries to acquire the futex, and clears
-it after the list-add (or list-remove) has finished.
-
-That's all that is needed - all the rest of robust-futex cleanup is done
-in userspace [just like with the previous patches].
-
-Ulrich Drepper has implemented the necessary glibc support for this new
-mechanism, which fully enables robust mutexes.
-
-Key differences of this userspace-list based approach, compared to the
-vma based method:
-
- - it's much, much faster: at thread exit time, there's no need to loop
-   over every vma (!), which the VM-based method has to do. Only a very
-   simple 'is the list empty' op is done.
-
- - no VM changes are needed - 'struct address_space' is left alone.
-
- - no registration of individual locks is needed: robust mutexes don't
-   need any extra per-lock syscalls. Robust mutexes thus become a very
-   lightweight primitive - so they don't force the application designer
-   to do a hard choice between performance and robustness - robust
-   mutexes are just as fast.
-
- - no per-lock kernel allocation happens.
-
- - no resource limits are needed.
-
- - no kernel-space recovery call (FUTEX_RECOVER) is needed.
-
- - the implementation and the locking is "obvious", and there are no
-   interactions with the VM.
-
-Performance
------------
-
-I have benchmarked the time needed for the kernel to process a list of 1
-million (!) held locks, using the new method [on a 2GHz CPU]:
-
- - with FUTEX_WAIT set [contended mutex]: 130 msecs
- - without FUTEX_WAIT set [uncontended mutex]: 30 msecs
-
-I have also measured an approach where glibc does the lock notification
-[which it currently does for !pshared robust mutexes], and that took 256
-msecs - clearly slower, due to the 1 million FUTEX_WAKE syscalls
-userspace had to do.
-
-(1 million held locks are unheard of - we expect at most a handful of
-locks to be held at a time. Nevertheless it's nice to know that this
-approach scales nicely.)
-
-Implementation details
-----------------------
-
-The patch adds two new syscalls: one to register the userspace list, and
-one to query the registered list pointer::
-
- asmlinkage long
- sys_set_robust_list(struct robust_list_head __user *head,
-                     size_t len);
-
- asmlinkage long
- sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
-                     size_t __user *len_ptr);
-
-List registration is very fast: the pointer is simply stored in
-current->robust_list. [Note that in the future, if robust futexes become
-widespread, we could extend sys_clone() to register a robust-list head
-for new threads, without the need of another syscall.]
-
-So there is virtually zero overhead for tasks not using robust futexes,
-and even for robust futex users, there is only one extra syscall per
-thread lifetime, and the cleanup operation, if it happens, is fast and
-straightforward. The kernel doesn't have any internal distinction between
-robust and normal futexes.
-
-If a futex is found to be held at exit time, the kernel sets the
-following bit of the futex word::
-
-	#define FUTEX_OWNER_DIED        0x40000000
-
-and wakes up the next futex waiter (if any). User-space does the rest of
-the cleanup.
-
-Otherwise, robust futexes are acquired by glibc by putting the TID into
-the futex field atomically. Waiters set the FUTEX_WAITERS bit::
-
-	#define FUTEX_WAITERS           0x80000000
-
-and the remaining bits are for the TID.
-
-Testing, architecture support
------------------------------
-
-I've tested the new syscalls on x86 and x86_64, and have made sure the
-parsing of the userspace list is robust [ ;-) ] even if the list is
-deliberately corrupted.
-
-i386 and x86_64 syscalls are wired up at the moment, and Ulrich has
-tested the new glibc code (on x86_64 and i386), and it works for his
-robust-mutex testcases.
-
-All other architectures should build just fine too - but they won't have
-the new syscalls yet.
-
-Architectures need to implement the new futex_atomic_cmpxchg_inatomic()
-inline function before writing up the syscalls.
diff --git a/MAINTAINERS b/MAINTAINERS
index 3e4b174a88e1..70076af67296 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7023,13 +7023,13 @@ R:	Darren Hart <dvhart@infradead.org>
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
-F:	Documentation/*futex*
+F:	Documentation/locking/*futex*
 F:	include/asm-generic/futex.h
 F:	include/linux/futex.h
 F:	include/uapi/linux/futex.h
 F:	kernel/futex.c
 F:	tools/perf/bench/futex*
-F:	tools/testing/selftests/futex/
+F:	Documentation/locking/*futex*
 
 GASKET DRIVER FRAMEWORK
 M:	Rob Springer <rspringer@google.com>
@@ -7498,7 +7498,7 @@ L:	linux-remoteproc@vger.kernel.org
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/andersson/remoteproc.git hwspinlock-next
 F:	Documentation/devicetree/bindings/hwlock/
-F:	Documentation/hwspinlock.txt
+F:	Documentation/locking/hwspinlock.rst
 F:	drivers/hwspinlock/
 F:	include/linux/hwspinlock.h
 
-- 
cgit v1.2.3


From fdb1b5e08929f21d29aedd447233b32f497b8999 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Mon, 18 May 2020 06:19:25 -0600
Subject: Revert "docs: sysctl/kernel: document ngroups_max"

This reverts commit 2f4c33063ad713e3a5b63002cf8362846e78bd71.

The changes here were fine, but there's a non-documentation change to
sysctl.c that makes messes elsewhere; those changes should have been done
independently.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/sysctl/kernel.rst | 9 ---------
 kernel/sysctl.c                             | 4 ++--
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 6b313b6d42d5..eb6bc9cc0318 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -517,15 +517,6 @@ Notes:
      successful IPC object allocation. If an IPC object allocation syscall
      fails, it is undefined if the value remains unmodified or is reset to -1.
 
-
-ngroups_max
-===========
-
-Maximum number of supplementary groups, _i.e._ the maximum size which
-``setgroups`` will accept. Exports ``NGROUPS_MAX`` from the kernel.
-
-
-
 nmi_watchdog
 ============
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 2ba9f449d273..8a176d8727a3 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -146,7 +146,7 @@ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
 static int maxolduid = 65535;
 static int minolduid;
 
-static const int ngroups_max = NGROUPS_MAX;
+static int ngroups_max = NGROUPS_MAX;
 static const int cap_last_cap = CAP_LAST_CAP;
 
 /*
@@ -883,7 +883,7 @@ static struct ctl_table kern_table[] = {
 #endif
 	{
 		.procname	= "ngroups_max",
-		.data		= (void *)&ngroups_max,
+		.data		= &ngroups_max,
 		.maxlen		= sizeof (int),
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
-- 
cgit v1.2.3


From 75a49a5e4f92d38d99b1d6b24cf9bbf0a6c05791 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@gmail.com>
Date: Sat, 23 May 2020 21:11:35 +0200
Subject: Documentation/features: Refresh the arch support status files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I was manually editing the arch-support.txt for eBPF-JIT, when I
realized the refresh script [1] has not been run for a while. Let's
fix that, so that the entries are more up-to-date.

[1] Documentation/features/scripts/features-refresh.sh

Signed-off-by: Björn Töpel <bjorn.topel@gmail.com>
Link: https://lore.kernel.org/r/20200523191135.21889-1-bjorn.topel@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/features/core/eBPF-JIT/arch-support.txt           | 2 +-
 Documentation/features/debug/KASAN/arch-support.txt             | 6 +++---
 Documentation/features/debug/gcov-profile-all/arch-support.txt  | 2 +-
 Documentation/features/debug/kprobes-on-ftrace/arch-support.txt | 2 +-
 Documentation/features/debug/kprobes/arch-support.txt           | 2 +-
 Documentation/features/debug/kretprobes/arch-support.txt        | 2 +-
 Documentation/features/debug/stackprotector/arch-support.txt    | 2 +-
 Documentation/features/debug/uprobes/arch-support.txt           | 2 +-
 Documentation/features/io/dma-contiguous/arch-support.txt       | 2 +-
 Documentation/features/locking/lockdep/arch-support.txt         | 2 +-
 Documentation/features/perf/kprobes-event/arch-support.txt      | 4 ++--
 Documentation/features/perf/perf-regs/arch-support.txt          | 4 ++--
 Documentation/features/perf/perf-stackdump/arch-support.txt     | 4 ++--
 Documentation/features/seccomp/seccomp-filter/arch-support.txt  | 2 +-
 Documentation/features/vm/huge-vmap/arch-support.txt            | 2 +-
 Documentation/features/vm/pte_special/arch-support.txt          | 2 +-
 16 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/Documentation/features/core/eBPF-JIT/arch-support.txt b/Documentation/features/core/eBPF-JIT/arch-support.txt
index 9ae6e8d0d10d..9ed964f65224 100644
--- a/Documentation/features/core/eBPF-JIT/arch-support.txt
+++ b/Documentation/features/core/eBPF-JIT/arch-support.txt
@@ -23,7 +23,7 @@
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: |  ok  |
diff --git a/Documentation/features/debug/KASAN/arch-support.txt b/Documentation/features/debug/KASAN/arch-support.txt
index 304dcd461795..6ff38548923e 100644
--- a/Documentation/features/debug/KASAN/arch-support.txt
+++ b/Documentation/features/debug/KASAN/arch-support.txt
@@ -22,9 +22,9 @@
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
-    |     powerpc: | TODO |
-    |       riscv: | TODO |
-    |        s390: | TODO |
+    |     powerpc: |  ok  |
+    |       riscv: |  ok  |
+    |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
diff --git a/Documentation/features/debug/gcov-profile-all/arch-support.txt b/Documentation/features/debug/gcov-profile-all/arch-support.txt
index 6fb2b0671994..210256f6a4cf 100644
--- a/Documentation/features/debug/gcov-profile-all/arch-support.txt
+++ b/Documentation/features/debug/gcov-profile-all/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
index 32b297295fff..97cd7aa74905 100644
--- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
+++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: | TODO |
     |       arm64: | TODO |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
diff --git a/Documentation/features/debug/kprobes/arch-support.txt b/Documentation/features/debug/kprobes/arch-support.txt
index e68239b5d2f0..caa444a2a89f 100644
--- a/Documentation/features/debug/kprobes/arch-support.txt
+++ b/Documentation/features/debug/kprobes/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: |  ok  |
diff --git a/Documentation/features/debug/kretprobes/arch-support.txt b/Documentation/features/debug/kretprobes/arch-support.txt
index f17131b328e5..b805aada395e 100644
--- a/Documentation/features/debug/kretprobes/arch-support.txt
+++ b/Documentation/features/debug/kretprobes/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: |  ok  |
diff --git a/Documentation/features/debug/stackprotector/arch-support.txt b/Documentation/features/debug/stackprotector/arch-support.txt
index 32bbdfc64c32..12410f606edc 100644
--- a/Documentation/features/debug/stackprotector/arch-support.txt
+++ b/Documentation/features/debug/stackprotector/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
diff --git a/Documentation/features/debug/uprobes/arch-support.txt b/Documentation/features/debug/uprobes/arch-support.txt
index 1c577d0cfc7f..be8acbb95b54 100644
--- a/Documentation/features/debug/uprobes/arch-support.txt
+++ b/Documentation/features/debug/uprobes/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
diff --git a/Documentation/features/io/dma-contiguous/arch-support.txt b/Documentation/features/io/dma-contiguous/arch-support.txt
index eb28b5c97ca6..895c3b0f6492 100644
--- a/Documentation/features/io/dma-contiguous/arch-support.txt
+++ b/Documentation/features/io/dma-contiguous/arch-support.txt
@@ -16,7 +16,7 @@
     |     hexagon: | TODO |
     |        ia64: | TODO |
     |        m68k: | TODO |
-    |  microblaze: | TODO |
+    |  microblaze: |  ok  |
     |        mips: |  ok  |
     |       nds32: | TODO |
     |       nios2: | TODO |
diff --git a/Documentation/features/locking/lockdep/arch-support.txt b/Documentation/features/locking/lockdep/arch-support.txt
index 941fd5b1094d..98cb9d85c55d 100644
--- a/Documentation/features/locking/lockdep/arch-support.txt
+++ b/Documentation/features/locking/lockdep/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: |  ok  |
     |        ia64: | TODO |
diff --git a/Documentation/features/perf/kprobes-event/arch-support.txt b/Documentation/features/perf/kprobes-event/arch-support.txt
index d8278bf62b85..518f352fc727 100644
--- a/Documentation/features/perf/kprobes-event/arch-support.txt
+++ b/Documentation/features/perf/kprobes-event/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: |  ok  |
     |        ia64: | TODO |
@@ -21,7 +21,7 @@
     |       nds32: |  ok  |
     |       nios2: | TODO |
     |    openrisc: | TODO |
-    |      parisc: | TODO |
+    |      parisc: |  ok  |
     |     powerpc: |  ok  |
     |       riscv: | TODO |
     |        s390: |  ok  |
diff --git a/Documentation/features/perf/perf-regs/arch-support.txt b/Documentation/features/perf/perf-regs/arch-support.txt
index 687d049d9cee..c22cd6f8aa5e 100644
--- a/Documentation/features/perf/perf-regs/arch-support.txt
+++ b/Documentation/features/perf/perf-regs/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
@@ -23,7 +23,7 @@
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/perf/perf-stackdump/arch-support.txt b/Documentation/features/perf/perf-stackdump/arch-support.txt
index 90996e3d18a8..527fe4d0b074 100644
--- a/Documentation/features/perf/perf-stackdump/arch-support.txt
+++ b/Documentation/features/perf/perf-stackdump/arch-support.txt
@@ -11,7 +11,7 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
@@ -23,7 +23,7 @@
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/seccomp/seccomp-filter/arch-support.txt b/Documentation/features/seccomp/seccomp-filter/arch-support.txt
index 4fe6c3c3be5c..c7b837f735b1 100644
--- a/Documentation/features/seccomp/seccomp-filter/arch-support.txt
+++ b/Documentation/features/seccomp/seccomp-filter/arch-support.txt
@@ -23,7 +23,7 @@
     |    openrisc: | TODO |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt
index 019131c5acce..8525f1981f19 100644
--- a/Documentation/features/vm/huge-vmap/arch-support.txt
+++ b/Documentation/features/vm/huge-vmap/arch-support.txt
@@ -22,7 +22,7 @@
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
-    |     powerpc: | TODO |
+    |     powerpc: |  ok  |
     |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: | TODO |
diff --git a/Documentation/features/vm/pte_special/arch-support.txt b/Documentation/features/vm/pte_special/arch-support.txt
index 3d492a34c8ee..2e017387e228 100644
--- a/Documentation/features/vm/pte_special/arch-support.txt
+++ b/Documentation/features/vm/pte_special/arch-support.txt
@@ -17,7 +17,7 @@
     |        ia64: | TODO |
     |        m68k: | TODO |
     |  microblaze: | TODO |
-    |        mips: | TODO |
+    |        mips: |  ok  |
     |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
-- 
cgit v1.2.3


From 5f7859c1cfaeeb1677075a72e36a0f0e8b05e9a8 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@gmail.com>
Date: Sat, 23 May 2020 23:00:05 +0200
Subject: Documentation/features: Correct RISC-V kprobes support entry
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Documentation/features/debug/kprobes/arch-support.txt incorrectly
states that RISC-V has kprobes support. This is not the case.

Note that entries that have been incorrectly marked with 'ok' will not
be changed back to 'TODO' by the features-refresh.sh script.

Fixes: 7156fc292850 ("Documentation/features: Refresh the arch support status files in place")
Signed-off-by: Björn Töpel <bjorn.topel@gmail.com>
Link: https://lore.kernel.org/r/20200523210005.59140-1-bjorn.topel@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/features/debug/kprobes/arch-support.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/features/debug/kprobes/arch-support.txt b/Documentation/features/debug/kprobes/arch-support.txt
index caa444a2a89f..8b316c6e03d4 100644
--- a/Documentation/features/debug/kprobes/arch-support.txt
+++ b/Documentation/features/debug/kprobes/arch-support.txt
@@ -23,7 +23,7 @@
     |    openrisc: | TODO |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
-    |       riscv: |  ok  |
+    |       riscv: | TODO |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
-- 
cgit v1.2.3


From f45ab53f7aed660302bcc69ac86af23b439d5ede Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 21 May 2020 09:51:37 -0700
Subject: nvdimm: fixes to maintainter-entry-profile

Fix punctuation and wording in a few places.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Link: https://lore.kernel.org/r/103a0e71-28b5-e4c2-fdf2-80d2dd005b44@infradead.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/nvdimm/maintainer-entry-profile.rst | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/Documentation/nvdimm/maintainer-entry-profile.rst b/Documentation/nvdimm/maintainer-entry-profile.rst
index efe37adadcea..9da748e42623 100644
--- a/Documentation/nvdimm/maintainer-entry-profile.rst
+++ b/Documentation/nvdimm/maintainer-entry-profile.rst
@@ -4,15 +4,15 @@ LIBNVDIMM Maintainer Entry Profile
 Overview
 --------
 The libnvdimm subsystem manages persistent memory across multiple
-architectures. The mailing list, is tracked by patchwork here:
+architectures. The mailing list is tracked by patchwork here:
 https://patchwork.kernel.org/project/linux-nvdimm/list/
 ...and that instance is configured to give feedback to submitters on
 patch acceptance and upstream merge. Patches are merged to either the
-'libnvdimm-fixes', or 'libnvdimm-for-next' branch. Those branches are
+'libnvdimm-fixes' or 'libnvdimm-for-next' branch. Those branches are
 available here:
 https://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git/
 
-In general patches can be submitted against the latest -rc, however if
+In general patches can be submitted against the latest -rc; however, if
 the incoming code change is dependent on other pending changes then the
 patch should be based on the libnvdimm-for-next branch. However, since
 persistent memory sits at the intersection of storage and memory there
@@ -35,12 +35,12 @@ getting the test environment set up.
 
 ACPI Device Specific Methods (_DSM)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Before patches enabling for a new _DSM family will be considered it must
+Before patches enabling a new _DSM family will be considered, it must
 be assigned a format-interface-code from the NVDIMM Sub-team of the ACPI
 Specification Working Group. In general, the stance of the subsystem is
-to push back on the proliferation of NVDIMM command sets, do strongly
+to push back on the proliferation of NVDIMM command sets, so do strongly
 consider implementing support for an existing command set. See
-drivers/acpi/nfit/nfit.h for the set of support command sets.
+drivers/acpi/nfit/nfit.h for the set of supported command sets.
 
 
 Key Cycle Dates
@@ -48,7 +48,7 @@ Key Cycle Dates
 New submissions can be sent at any time, but if they intend to hit the
 next merge window they should be sent before -rc4, and ideally
 stabilized in the libnvdimm-for-next branch by -rc6. Of course if a
-patch set requires more than 2 weeks of review -rc4 is already too late
+patch set requires more than 2 weeks of review, -rc4 is already too late
 and some patches may require multiple development cycles to review.
 
 
-- 
cgit v1.2.3


From 17444d9b0b5bf7ba1d7e19411d0cb5883eac40b4 Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Mon, 18 May 2020 16:58:36 +0200
Subject: docs: sysctl/kernel: document ngroups_max

This is a read-only export of NGROUPS_MAX.

Signed-off-by: Stephen Kitt <steve@sk2.org>
Link: https://lore.kernel.org/r/20200518145836.15816-1-steve@sk2.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/sysctl/kernel.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index eb6bc9cc0318..6b313b6d42d5 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -517,6 +517,15 @@ Notes:
      successful IPC object allocation. If an IPC object allocation syscall
      fails, it is undefined if the value remains unmodified or is reset to -1.
 
+
+ngroups_max
+===========
+
+Maximum number of supplementary groups, _i.e._ the maximum size which
+``setgroups`` will accept. Exports ``NGROUPS_MAX`` from the kernel.
+
+
+
 nmi_watchdog
 ============
 
-- 
cgit v1.2.3


From 4eb9241127a0b5ac3aaaf1b246728009527ebc86 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 16 May 2020 20:43:39 -0700
Subject: Documentation: admin-guide: update bug-hunting.rst

Update Documentation/admin-guide/bug-hunting.rst:

- add a small section on "Modules linked in" and their possible flags;
- delete all references to ksymoops since it is no longer applicable;
- fix spello, grammar, and punctuation;
- note that get_maintainers.pl only provides recent patchers if it is
  run inside a git tree;
- add mention of scripts/decode_stacktrace.sh;

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: greg@wind.rmcc.com
Link: https://lore.kernel.org/r/c629a9ef-3867-c3d1-f6c9-2c3b0e4ac68a@infradead.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/bug-hunting.rst | 53 ++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/Documentation/admin-guide/bug-hunting.rst b/Documentation/admin-guide/bug-hunting.rst
index 44b8a4edd348..f7c80f4649fc 100644
--- a/Documentation/admin-guide/bug-hunting.rst
+++ b/Documentation/admin-guide/bug-hunting.rst
@@ -49,15 +49,19 @@ the issue, it may also contain the word **Oops**, as on this one::
 
 Despite being an **Oops** or some other sort of stack trace, the offended
 line is usually required to identify and handle the bug. Along this chapter,
-we'll refer to "Oops" for all kinds of stack traces that need to be analized.
+we'll refer to "Oops" for all kinds of stack traces that need to be analyzed.
 
-.. note::
+If the kernel is compiled with ``CONFIG_DEBUG_INFO``, you can enhance the
+quality of the stack trace by using file:`scripts/decode_stacktrace.sh`.
+
+Modules linked in
+-----------------
+
+Modules that are tainted or are being loaded or unloaded are marked with
+"(...)", where the taint flags are described in
+file:`Documentation/admin-guide/tainted-kernels.rst`, "being loaded" is
+annotated with "+", and "being unloaded" is annotated with "-".
 
-  ``ksymoops`` is useless on 2.6 or upper.  Please use the Oops in its original
-  format (from ``dmesg``, etc).  Ignore any references in this or other docs to
-  "decoding the Oops" or "running it through ksymoops".
-  If you post an Oops from 2.6+ that has been run through ``ksymoops``,
-  people will just tell you to repost it.
 
 Where is the Oops message is located?
 -------------------------------------
@@ -71,7 +75,7 @@ by running ``journalctl`` command.
 Sometimes ``klogd`` dies, in which case you can run ``dmesg > file`` to
 read the data from the kernel buffers and save it.  Or you can
 ``cat /proc/kmsg > file``, however you have to break in to stop the transfer,
-``kmsg`` is a "never ending file".
+since ``kmsg`` is a "never ending file".
 
 If the machine has crashed so badly that you cannot enter commands or
 the disk is not available then you have three options:
@@ -81,9 +85,9 @@ the disk is not available then you have three options:
     planned for a crash. Alternatively, you can take a picture of
     the screen with a digital camera - not nice, but better than
     nothing.  If the messages scroll off the top of the console, you
-    may find that booting with a higher resolution (eg, ``vga=791``)
+    may find that booting with a higher resolution (e.g., ``vga=791``)
     will allow you to read more of the text. (Caveat: This needs ``vesafb``,
-    so won't help for 'early' oopses)
+    so won't help for 'early' oopses.)
 
 (2) Boot with a serial console (see
     :ref:`Documentation/admin-guide/serial-console.rst <serial_console>`),
@@ -104,7 +108,7 @@ Kernel source file. There are two methods for doing that. Usually, using
 gdb
 ^^^
 
-The GNU debug (``gdb``) is the best way to figure out the exact file and line
+The GNU debugger (``gdb``) is the best way to figure out the exact file and line
 number of the OOPS from the ``vmlinux`` file.
 
 The usage of gdb works best on a kernel compiled with ``CONFIG_DEBUG_INFO``.
@@ -165,7 +169,7 @@ If you have a call trace, such as::
       [<ffffffff8802770b>] :jbd:journal_stop+0x1be/0x1ee
       ...
 
-this shows the problem likely in the :jbd: module. You can load that module
+this shows the problem likely is in the :jbd: module. You can load that module
 in gdb and list the relevant code::
 
   $ gdb fs/jbd/jbd.ko
@@ -199,8 +203,9 @@ in the kernel hacking menu of the menu configuration.) For example::
    You need to be at the top level of the kernel tree for this to pick up
    your C files.
 
-If you don't have access to the code you can also debug on some crash dumps
-e.g. crash dump output as shown by Dave Miller::
+If you don't have access to the source code you can still debug some crash
+dumps using the following method (example crash dump output as shown by
+Dave Miller)::
 
      EIP is at 	+0x14/0x4c0
       ...
@@ -230,6 +235,9 @@ e.g. crash dump output as shown by Dave Miller::
          mov        0x8(%ebp), %ebx         ! %ebx = skb->sk
          mov        0x13c(%ebx), %eax       ! %eax = inet_sk(sk)->opt
 
+file:`scripts/decodecode` can be used to automate most of this, depending
+on what CPU architecture is being debugged.
+
 Reporting the bug
 -----------------
 
@@ -241,7 +249,7 @@ used for the development of the affected code. This can be done by using
 the ``get_maintainer.pl`` script.
 
 For example, if you find a bug at the gspca's sonixj.c file, you can get
-their maintainers with::
+its maintainers with::
 
 	$ ./scripts/get_maintainer.pl -f drivers/media/usb/gspca/sonixj.c
 	Hans Verkuil <hverkuil@xs4all.nl> (odd fixer:GSPCA USB WEBCAM DRIVER,commit_signer:1/1=100%)
@@ -253,16 +261,17 @@ their maintainers with::
 
 Please notice that it will point to:
 
-- The last developers that touched on the source code. On the above example,
-  Tejun and Bhaktipriya (in this specific case, none really envolved on the
-  development of this file);
+- The last developers that touched the source code (if this is done inside
+  a git tree). On the above example, Tejun and Bhaktipriya (in this
+  specific case, none really envolved on the development of this file);
 - The driver maintainer (Hans Verkuil);
 - The subsystem maintainer (Mauro Carvalho Chehab);
 - The driver and/or subsystem mailing list (linux-media@vger.kernel.org);
 - the Linux Kernel mailing list (linux-kernel@vger.kernel.org).
 
 Usually, the fastest way to have your bug fixed is to report it to mailing
-list used for the development of the code (linux-media ML) copying the driver maintainer (Hans).
+list used for the development of the code (linux-media ML) copying the
+driver maintainer (Hans).
 
 If you are totally stumped as to whom to send the report, and
 ``get_maintainer.pl`` didn't provide you anything useful, send it to
@@ -303,9 +312,9 @@ protection fault message can be simply cut out of the message files
 and forwarded to the kernel developers.
 
 Two types of address resolution are performed by ``klogd``.  The first is
-static translation and the second is dynamic translation.  Static
-translation uses the System.map file in much the same manner that
-ksymoops does.  In order to do static translation the ``klogd`` daemon
+static translation and the second is dynamic translation.
+Static translation uses the System.map file.
+In order to do static translation the ``klogd`` daemon
 must be able to find a system map file at daemon initialization time.
 See the klogd man page for information on how ``klogd`` searches for map
 files.
-- 
cgit v1.2.3


From 997c798e1444ad02e8af8b18c869fff5c61867da Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Fri, 15 May 2020 23:24:44 +0200
Subject: docs: sysctl/kernel: document unaligned controls

This documents ignore-unaligned-usertrap, unaligned-dump-stack, and
unaligned-trap, based on arch/arc/kernel/unaligned.c,
arch/ia64/kernel/unaligned.c, and arch/parisc/kernel/unaligned.c.

While we're at it, integrate unaligned-memory-access.txt into the docs
tree.

Signed-off-by: Stephen Kitt <steve@sk2.org>
Link: https://lore.kernel.org/r/20200515212443.5012-1-steve@sk2.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/sysctl/kernel.rst       |  51 +++++
 Documentation/process/index.rst                   |   1 +
 Documentation/process/unaligned-memory-access.rst | 265 ++++++++++++++++++++++
 Documentation/unaligned-memory-access.txt         | 265 ----------------------
 4 files changed, 317 insertions(+), 265 deletions(-)
 create mode 100644 Documentation/process/unaligned-memory-access.rst
 delete mode 100644 Documentation/unaligned-memory-access.txt

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 6b313b6d42d5..4752d9a79235 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -402,6 +402,25 @@ Controls whether the panic kmsg data should be reported to Hyper-V.
 = =========================================================
 
 
+ignore-unaligned-usertrap
+=========================
+
+On architectures where unaligned accesses cause traps, and where this
+feature is supported (``CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN``;
+currently, ``arc`` and ``ia64``), controls whether all unaligned traps
+are logged.
+
+= =============================================================
+0 Log all unaligned accesses.
+1 Only warn the first time a process traps. This is the default
+  setting.
+= =============================================================
+
+See also `unaligned-trap`_ and `unaligned-dump-stack`_. On ``ia64``,
+this allows system administrators to override the
+``IA64_THREAD_UAC_NOPRINT`` ``prctl`` and avoid logs being flooded.
+
+
 kexec_load_disabled
 ===================
 
@@ -1261,6 +1280,38 @@ See :doc:`/admin-guide/kernel-parameters` and
 :doc:`/trace/boottime-trace`.
 
 
+.. _unaligned-dump-stack:
+
+unaligned-dump-stack (ia64)
+===========================
+
+When logging unaligned accesses, controls whether the stack is
+dumped.
+
+= ===================================================
+0 Do not dump the stack. This is the default setting.
+1 Dump the stack.
+= ===================================================
+
+See also `ignore-unaligned-usertrap`_.
+
+
+unaligned-trap
+==============
+
+On architectures where unaligned accesses cause traps, and where this
+feature is supported (``CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW``; currently,
+``arc`` and ``parisc``), controls whether unaligned traps are caught
+and emulated (instead of failing).
+
+= ========================================================
+0 Do not emulate unaligned accesses.
+1 Emulate unaligned accesses. This is the default setting.
+= ========================================================
+
+See also `ignore-unaligned-usertrap`_.
+
+
 unknown_nmi_panic
 =================
 
diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst
index 6399d92f0b21..f07c9250c3ac 100644
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -61,6 +61,7 @@ lack of a better place.
    botching-up-ioctls
    clang-format
    ../riscv/patch-acceptance
+   unaligned-memory-access
 
 .. only::  subproject and html
 
diff --git a/Documentation/process/unaligned-memory-access.rst b/Documentation/process/unaligned-memory-access.rst
new file mode 100644
index 000000000000..1ee82419d8aa
--- /dev/null
+++ b/Documentation/process/unaligned-memory-access.rst
@@ -0,0 +1,265 @@
+=========================
+Unaligned Memory Accesses
+=========================
+
+:Author: Daniel Drake <dsd@gentoo.org>,
+:Author: Johannes Berg <johannes@sipsolutions.net>
+
+:With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt,
+  Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, Uli Kunitz,
+  Vadim Lobanov
+
+
+Linux runs on a wide variety of architectures which have varying behaviour
+when it comes to memory access. This document presents some details about
+unaligned accesses, why you need to write code that doesn't cause them,
+and how to write such code!
+
+
+The definition of an unaligned access
+=====================================
+
+Unaligned memory accesses occur when you try to read N bytes of data starting
+from an address that is not evenly divisible by N (i.e. addr % N != 0).
+For example, reading 4 bytes of data from address 0x10004 is fine, but
+reading 4 bytes of data from address 0x10005 would be an unaligned memory
+access.
+
+The above may seem a little vague, as memory access can happen in different
+ways. The context here is at the machine code level: certain instructions read
+or write a number of bytes to or from memory (e.g. movb, movw, movl in x86
+assembly). As will become clear, it is relatively easy to spot C statements
+which will compile to multiple-byte memory access instructions, namely when
+dealing with types such as u16, u32 and u64.
+
+
+Natural alignment
+=================
+
+The rule mentioned above forms what we refer to as natural alignment:
+When accessing N bytes of memory, the base memory address must be evenly
+divisible by N, i.e. addr % N == 0.
+
+When writing code, assume the target architecture has natural alignment
+requirements.
+
+In reality, only a few architectures require natural alignment on all sizes
+of memory access. However, we must consider ALL supported architectures;
+writing code that satisfies natural alignment requirements is the easiest way
+to achieve full portability.
+
+
+Why unaligned access is bad
+===========================
+
+The effects of performing an unaligned memory access vary from architecture
+to architecture. It would be easy to write a whole document on the differences
+here; a summary of the common scenarios is presented below:
+
+ - Some architectures are able to perform unaligned memory accesses
+   transparently, but there is usually a significant performance cost.
+ - Some architectures raise processor exceptions when unaligned accesses
+   happen. The exception handler is able to correct the unaligned access,
+   at significant cost to performance.
+ - Some architectures raise processor exceptions when unaligned accesses
+   happen, but the exceptions do not contain enough information for the
+   unaligned access to be corrected.
+ - Some architectures are not capable of unaligned memory access, but will
+   silently perform a different memory access to the one that was requested,
+   resulting in a subtle code bug that is hard to detect!
+
+It should be obvious from the above that if your code causes unaligned
+memory accesses to happen, your code will not work correctly on certain
+platforms and will cause performance problems on others.
+
+
+Code that does not cause unaligned access
+=========================================
+
+At first, the concepts above may seem a little hard to relate to actual
+coding practice. After all, you don't have a great deal of control over
+memory addresses of certain variables, etc.
+
+Fortunately things are not too complex, as in most cases, the compiler
+ensures that things will work for you. For example, take the following
+structure::
+
+	struct foo {
+		u16 field1;
+		u32 field2;
+		u8 field3;
+	};
+
+Let us assume that an instance of the above structure resides in memory
+starting at address 0x10000. With a basic level of understanding, it would
+not be unreasonable to expect that accessing field2 would cause an unaligned
+access. You'd be expecting field2 to be located at offset 2 bytes into the
+structure, i.e. address 0x10002, but that address is not evenly divisible
+by 4 (remember, we're reading a 4 byte value here).
+
+Fortunately, the compiler understands the alignment constraints, so in the
+above case it would insert 2 bytes of padding in between field1 and field2.
+Therefore, for standard structure types you can always rely on the compiler
+to pad structures so that accesses to fields are suitably aligned (assuming
+you do not cast the field to a type of different length).
+
+Similarly, you can also rely on the compiler to align variables and function
+parameters to a naturally aligned scheme, based on the size of the type of
+the variable.
+
+At this point, it should be clear that accessing a single byte (u8 or char)
+will never cause an unaligned access, because all memory addresses are evenly
+divisible by one.
+
+On a related topic, with the above considerations in mind you may observe
+that you could reorder the fields in the structure in order to place fields
+where padding would otherwise be inserted, and hence reduce the overall
+resident memory size of structure instances. The optimal layout of the
+above example is::
+
+	struct foo {
+		u32 field2;
+		u16 field1;
+		u8 field3;
+	};
+
+For a natural alignment scheme, the compiler would only have to add a single
+byte of padding at the end of the structure. This padding is added in order
+to satisfy alignment constraints for arrays of these structures.
+
+Another point worth mentioning is the use of __attribute__((packed)) on a
+structure type. This GCC-specific attribute tells the compiler never to
+insert any padding within structures, useful when you want to use a C struct
+to represent some data that comes in a fixed arrangement 'off the wire'.
+
+You might be inclined to believe that usage of this attribute can easily
+lead to unaligned accesses when accessing fields that do not satisfy
+architectural alignment requirements. However, again, the compiler is aware
+of the alignment constraints and will generate extra instructions to perform
+the memory access in a way that does not cause unaligned access. Of course,
+the extra instructions obviously cause a loss in performance compared to the
+non-packed case, so the packed attribute should only be used when avoiding
+structure padding is of importance.
+
+
+Code that causes unaligned access
+=================================
+
+With the above in mind, let's move onto a real life example of a function
+that can cause an unaligned memory access. The following function taken
+from include/linux/etherdevice.h is an optimized routine to compare two
+ethernet MAC addresses for equality::
+
+  bool ether_addr_equal(const u8 *addr1, const u8 *addr2)
+  {
+  #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	u32 fold = ((*(const u32 *)addr1) ^ (*(const u32 *)addr2)) |
+		   ((*(const u16 *)(addr1 + 4)) ^ (*(const u16 *)(addr2 + 4)));
+
+	return fold == 0;
+  #else
+	const u16 *a = (const u16 *)addr1;
+	const u16 *b = (const u16 *)addr2;
+	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0;
+  #endif
+  }
+
+In the above function, when the hardware has efficient unaligned access
+capability, there is no issue with this code.  But when the hardware isn't
+able to access memory on arbitrary boundaries, the reference to a[0] causes
+2 bytes (16 bits) to be read from memory starting at address addr1.
+
+Think about what would happen if addr1 was an odd address such as 0x10003.
+(Hint: it'd be an unaligned access.)
+
+Despite the potential unaligned access problems with the above function, it
+is included in the kernel anyway but is understood to only work normally on
+16-bit-aligned addresses. It is up to the caller to ensure this alignment or
+not use this function at all. This alignment-unsafe function is still useful
+as it is a decent optimization for the cases when you can ensure alignment,
+which is true almost all of the time in ethernet networking context.
+
+
+Here is another example of some code that could cause unaligned accesses::
+
+	void myfunc(u8 *data, u32 value)
+	{
+		[...]
+		*((u32 *) data) = cpu_to_le32(value);
+		[...]
+	}
+
+This code will cause unaligned accesses every time the data parameter points
+to an address that is not evenly divisible by 4.
+
+In summary, the 2 main scenarios where you may run into unaligned access
+problems involve:
+
+ 1. Casting variables to types of different lengths
+ 2. Pointer arithmetic followed by access to at least 2 bytes of data
+
+
+Avoiding unaligned accesses
+===========================
+
+The easiest way to avoid unaligned access is to use the get_unaligned() and
+put_unaligned() macros provided by the <asm/unaligned.h> header file.
+
+Going back to an earlier example of code that potentially causes unaligned
+access::
+
+	void myfunc(u8 *data, u32 value)
+	{
+		[...]
+		*((u32 *) data) = cpu_to_le32(value);
+		[...]
+	}
+
+To avoid the unaligned memory access, you would rewrite it as follows::
+
+	void myfunc(u8 *data, u32 value)
+	{
+		[...]
+		value = cpu_to_le32(value);
+		put_unaligned(value, (u32 *) data);
+		[...]
+	}
+
+The get_unaligned() macro works similarly. Assuming 'data' is a pointer to
+memory and you wish to avoid unaligned access, its usage is as follows::
+
+	u32 value = get_unaligned((u32 *) data);
+
+These macros work for memory accesses of any length (not just 32 bits as
+in the examples above). Be aware that when compared to standard access of
+aligned memory, using these macros to access unaligned memory can be costly in
+terms of performance.
+
+If use of such macros is not convenient, another option is to use memcpy(),
+where the source or destination (or both) are of type u8* or unsigned char*.
+Due to the byte-wise nature of this operation, unaligned accesses are avoided.
+
+
+Alignment vs. Networking
+========================
+
+On architectures that require aligned loads, networking requires that the IP
+header is aligned on a four-byte boundary to optimise the IP stack. For
+regular ethernet hardware, the constant NET_IP_ALIGN is used. On most
+architectures this constant has the value 2 because the normal ethernet
+header is 14 bytes long, so in order to get proper alignment one needs to
+DMA to an address which can be expressed as 4*n + 2. One notable exception
+here is powerpc which defines NET_IP_ALIGN to 0 because DMA to unaligned
+addresses can be very expensive and dwarf the cost of unaligned loads.
+
+For some ethernet hardware that cannot DMA to unaligned addresses like
+4*n+2 or non-ethernet hardware, this can be a problem, and it is then
+required to copy the incoming frame into an aligned buffer. Because this is
+unnecessary on architectures that can do unaligned accesses, the code can be
+made dependent on CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS like so::
+
+	#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+		skb = original skb
+	#else
+		skb = copy skb
+	#endif
diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt
deleted file mode 100644
index 1ee82419d8aa..000000000000
--- a/Documentation/unaligned-memory-access.txt
+++ /dev/null
@@ -1,265 +0,0 @@
-=========================
-Unaligned Memory Accesses
-=========================
-
-:Author: Daniel Drake <dsd@gentoo.org>,
-:Author: Johannes Berg <johannes@sipsolutions.net>
-
-:With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt,
-  Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, Uli Kunitz,
-  Vadim Lobanov
-
-
-Linux runs on a wide variety of architectures which have varying behaviour
-when it comes to memory access. This document presents some details about
-unaligned accesses, why you need to write code that doesn't cause them,
-and how to write such code!
-
-
-The definition of an unaligned access
-=====================================
-
-Unaligned memory accesses occur when you try to read N bytes of data starting
-from an address that is not evenly divisible by N (i.e. addr % N != 0).
-For example, reading 4 bytes of data from address 0x10004 is fine, but
-reading 4 bytes of data from address 0x10005 would be an unaligned memory
-access.
-
-The above may seem a little vague, as memory access can happen in different
-ways. The context here is at the machine code level: certain instructions read
-or write a number of bytes to or from memory (e.g. movb, movw, movl in x86
-assembly). As will become clear, it is relatively easy to spot C statements
-which will compile to multiple-byte memory access instructions, namely when
-dealing with types such as u16, u32 and u64.
-
-
-Natural alignment
-=================
-
-The rule mentioned above forms what we refer to as natural alignment:
-When accessing N bytes of memory, the base memory address must be evenly
-divisible by N, i.e. addr % N == 0.
-
-When writing code, assume the target architecture has natural alignment
-requirements.
-
-In reality, only a few architectures require natural alignment on all sizes
-of memory access. However, we must consider ALL supported architectures;
-writing code that satisfies natural alignment requirements is the easiest way
-to achieve full portability.
-
-
-Why unaligned access is bad
-===========================
-
-The effects of performing an unaligned memory access vary from architecture
-to architecture. It would be easy to write a whole document on the differences
-here; a summary of the common scenarios is presented below:
-
- - Some architectures are able to perform unaligned memory accesses
-   transparently, but there is usually a significant performance cost.
- - Some architectures raise processor exceptions when unaligned accesses
-   happen. The exception handler is able to correct the unaligned access,
-   at significant cost to performance.
- - Some architectures raise processor exceptions when unaligned accesses
-   happen, but the exceptions do not contain enough information for the
-   unaligned access to be corrected.
- - Some architectures are not capable of unaligned memory access, but will
-   silently perform a different memory access to the one that was requested,
-   resulting in a subtle code bug that is hard to detect!
-
-It should be obvious from the above that if your code causes unaligned
-memory accesses to happen, your code will not work correctly on certain
-platforms and will cause performance problems on others.
-
-
-Code that does not cause unaligned access
-=========================================
-
-At first, the concepts above may seem a little hard to relate to actual
-coding practice. After all, you don't have a great deal of control over
-memory addresses of certain variables, etc.
-
-Fortunately things are not too complex, as in most cases, the compiler
-ensures that things will work for you. For example, take the following
-structure::
-
-	struct foo {
-		u16 field1;
-		u32 field2;
-		u8 field3;
-	};
-
-Let us assume that an instance of the above structure resides in memory
-starting at address 0x10000. With a basic level of understanding, it would
-not be unreasonable to expect that accessing field2 would cause an unaligned
-access. You'd be expecting field2 to be located at offset 2 bytes into the
-structure, i.e. address 0x10002, but that address is not evenly divisible
-by 4 (remember, we're reading a 4 byte value here).
-
-Fortunately, the compiler understands the alignment constraints, so in the
-above case it would insert 2 bytes of padding in between field1 and field2.
-Therefore, for standard structure types you can always rely on the compiler
-to pad structures so that accesses to fields are suitably aligned (assuming
-you do not cast the field to a type of different length).
-
-Similarly, you can also rely on the compiler to align variables and function
-parameters to a naturally aligned scheme, based on the size of the type of
-the variable.
-
-At this point, it should be clear that accessing a single byte (u8 or char)
-will never cause an unaligned access, because all memory addresses are evenly
-divisible by one.
-
-On a related topic, with the above considerations in mind you may observe
-that you could reorder the fields in the structure in order to place fields
-where padding would otherwise be inserted, and hence reduce the overall
-resident memory size of structure instances. The optimal layout of the
-above example is::
-
-	struct foo {
-		u32 field2;
-		u16 field1;
-		u8 field3;
-	};
-
-For a natural alignment scheme, the compiler would only have to add a single
-byte of padding at the end of the structure. This padding is added in order
-to satisfy alignment constraints for arrays of these structures.
-
-Another point worth mentioning is the use of __attribute__((packed)) on a
-structure type. This GCC-specific attribute tells the compiler never to
-insert any padding within structures, useful when you want to use a C struct
-to represent some data that comes in a fixed arrangement 'off the wire'.
-
-You might be inclined to believe that usage of this attribute can easily
-lead to unaligned accesses when accessing fields that do not satisfy
-architectural alignment requirements. However, again, the compiler is aware
-of the alignment constraints and will generate extra instructions to perform
-the memory access in a way that does not cause unaligned access. Of course,
-the extra instructions obviously cause a loss in performance compared to the
-non-packed case, so the packed attribute should only be used when avoiding
-structure padding is of importance.
-
-
-Code that causes unaligned access
-=================================
-
-With the above in mind, let's move onto a real life example of a function
-that can cause an unaligned memory access. The following function taken
-from include/linux/etherdevice.h is an optimized routine to compare two
-ethernet MAC addresses for equality::
-
-  bool ether_addr_equal(const u8 *addr1, const u8 *addr2)
-  {
-  #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-	u32 fold = ((*(const u32 *)addr1) ^ (*(const u32 *)addr2)) |
-		   ((*(const u16 *)(addr1 + 4)) ^ (*(const u16 *)(addr2 + 4)));
-
-	return fold == 0;
-  #else
-	const u16 *a = (const u16 *)addr1;
-	const u16 *b = (const u16 *)addr2;
-	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0;
-  #endif
-  }
-
-In the above function, when the hardware has efficient unaligned access
-capability, there is no issue with this code.  But when the hardware isn't
-able to access memory on arbitrary boundaries, the reference to a[0] causes
-2 bytes (16 bits) to be read from memory starting at address addr1.
-
-Think about what would happen if addr1 was an odd address such as 0x10003.
-(Hint: it'd be an unaligned access.)
-
-Despite the potential unaligned access problems with the above function, it
-is included in the kernel anyway but is understood to only work normally on
-16-bit-aligned addresses. It is up to the caller to ensure this alignment or
-not use this function at all. This alignment-unsafe function is still useful
-as it is a decent optimization for the cases when you can ensure alignment,
-which is true almost all of the time in ethernet networking context.
-
-
-Here is another example of some code that could cause unaligned accesses::
-
-	void myfunc(u8 *data, u32 value)
-	{
-		[...]
-		*((u32 *) data) = cpu_to_le32(value);
-		[...]
-	}
-
-This code will cause unaligned accesses every time the data parameter points
-to an address that is not evenly divisible by 4.
-
-In summary, the 2 main scenarios where you may run into unaligned access
-problems involve:
-
- 1. Casting variables to types of different lengths
- 2. Pointer arithmetic followed by access to at least 2 bytes of data
-
-
-Avoiding unaligned accesses
-===========================
-
-The easiest way to avoid unaligned access is to use the get_unaligned() and
-put_unaligned() macros provided by the <asm/unaligned.h> header file.
-
-Going back to an earlier example of code that potentially causes unaligned
-access::
-
-	void myfunc(u8 *data, u32 value)
-	{
-		[...]
-		*((u32 *) data) = cpu_to_le32(value);
-		[...]
-	}
-
-To avoid the unaligned memory access, you would rewrite it as follows::
-
-	void myfunc(u8 *data, u32 value)
-	{
-		[...]
-		value = cpu_to_le32(value);
-		put_unaligned(value, (u32 *) data);
-		[...]
-	}
-
-The get_unaligned() macro works similarly. Assuming 'data' is a pointer to
-memory and you wish to avoid unaligned access, its usage is as follows::
-
-	u32 value = get_unaligned((u32 *) data);
-
-These macros work for memory accesses of any length (not just 32 bits as
-in the examples above). Be aware that when compared to standard access of
-aligned memory, using these macros to access unaligned memory can be costly in
-terms of performance.
-
-If use of such macros is not convenient, another option is to use memcpy(),
-where the source or destination (or both) are of type u8* or unsigned char*.
-Due to the byte-wise nature of this operation, unaligned accesses are avoided.
-
-
-Alignment vs. Networking
-========================
-
-On architectures that require aligned loads, networking requires that the IP
-header is aligned on a four-byte boundary to optimise the IP stack. For
-regular ethernet hardware, the constant NET_IP_ALIGN is used. On most
-architectures this constant has the value 2 because the normal ethernet
-header is 14 bytes long, so in order to get proper alignment one needs to
-DMA to an address which can be expressed as 4*n + 2. One notable exception
-here is powerpc which defines NET_IP_ALIGN to 0 because DMA to unaligned
-addresses can be very expensive and dwarf the cost of unaligned loads.
-
-For some ethernet hardware that cannot DMA to unaligned addresses like
-4*n+2 or non-ethernet hardware, this can be a problem, and it is then
-required to copy the incoming frame into an aligned buffer. Because this is
-unnecessary on architectures that can do unaligned accesses, the code can be
-made dependent on CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS like so::
-
-	#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-		skb = original skb
-	#else
-		skb = copy skb
-	#endif
-- 
cgit v1.2.3


From cea0fad0f8b41481c0052a1f30d296d6d6dd0b2e Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda Delgado <ricardo@ribalda.com>
Date: Thu, 30 Apr 2020 15:52:24 +0200
Subject: mailmap: change email for Ricardo Ribalda

Modify  emails to ribalda@kernel.org and unify my surname in all the
files.

Signed-off-by: Ricardo Ribalda <ribalda@kernel.org>
Link: https://lore.kernel.org/r/20200430135224.362700-1-ricardo@ribalda.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .mailmap                             | 4 +++-
 CREDITS                              | 6 ++++--
 MAINTAINERS                          | 4 ++--
 drivers/iio/dac/ad5761.c             | 4 ++--
 drivers/iio/dac/ti-dac7612.c         | 4 ++--
 drivers/leds/leds-pca963x.c          | 2 +-
 drivers/media/i2c/imx214.c           | 4 ++--
 include/linux/platform_data/ad5761.h | 2 +-
 8 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/.mailmap b/.mailmap
index 4600dcfed0d3..09ae92771030 100644
--- a/.mailmap
+++ b/.mailmap
@@ -235,7 +235,9 @@ Ralf Baechle <ralf@linux-mips.org>
 Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
 Randy Dunlap <rdunlap@infradead.org> <rdunlap@xenotime.net>
 Rémi Denis-Courmont <rdenis@simphalempin.com>
-Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
+Ricardo Ribalda <ribalda@kernel.org> <ricardo.ribalda@gmail.com>
+Ricardo Ribalda <ribalda@kernel.org> <ricardo@ribalda.com>
+Ricardo Ribalda <ribalda@kernel.org> Ricardo Ribalda Delgado <ribalda@kernel.org>
 Ross Zwisler <zwisler@kernel.org> <ross.zwisler@linux.intel.com>
 Rudolf Marek <R.Marek@sh.cvut.cz>
 Rui Saraiva <rmps@joel.ist.utl.pt>
diff --git a/CREDITS b/CREDITS
index 032b5994f476..0787b5872906 100644
--- a/CREDITS
+++ b/CREDITS
@@ -3104,14 +3104,16 @@ W: http://www.qsl.net/dl1bke/
 D: Generic Z8530 driver, AX.25 DAMA slave implementation
 D: Several AX.25 hacks
 
-N: Ricardo Ribalda Delgado
-E: ricardo.ribalda@gmail.com
+N: Ricardo Ribalda
+E: ribalda@kernel.org
 W: http://ribalda.com
 D: PLX USB338x driver
 D: PCA9634 driver
 D: Option GTM671WFS
 D: Fintek F81216A
 D: AD5761 iio driver
+D: TI DAC7612 driver
+D: Sony IMX214 driver
 D: Various kernel hacks
 S: Qtechnology A/S
 S: Valby Langgade 142
diff --git a/MAINTAINERS b/MAINTAINERS
index 70076af67296..f967ba816191 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15642,7 +15642,7 @@ F:	drivers/ssb/
 F:	include/linux/ssb/
 
 SONY IMX214 SENSOR DRIVER
-M:	Ricardo Ribalda <ricardo.ribalda@gmail.com>
+M:	Ricardo Ribalda <ribalda@kernel.org>
 L:	linux-media@vger.kernel.org
 S:	Maintained
 T:	git git://linuxtv.org/media_tree.git
@@ -16629,7 +16629,7 @@ S:	Maintained
 F:	sound/soc/ti/
 
 TEXAS INSTRUMENTS' DAC7612 DAC DRIVER
-M:	Ricardo Ribalda <ricardo@ribalda.com>
+M:	Ricardo Ribalda <ribalda@kernel.org>
 L:	linux-iio@vger.kernel.org
 S:	Supported
 F:	Documentation/devicetree/bindings/iio/dac/ti,dac7612.txt
diff --git a/drivers/iio/dac/ad5761.c b/drivers/iio/dac/ad5761.c
index 4fb42b743f0f..7468fbd11684 100644
--- a/drivers/iio/dac/ad5761.c
+++ b/drivers/iio/dac/ad5761.c
@@ -3,7 +3,7 @@
  * AD5721, AD5721R, AD5761, AD5761R, Voltage Output Digital to Analog Converter
  *
  * Copyright 2016 Qtechnology A/S
- * 2016 Ricardo Ribalda <ricardo.ribalda@gmail.com>
+ * 2016 Ricardo Ribalda <ribalda@kernel.org>
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -423,6 +423,6 @@ static struct spi_driver ad5761_driver = {
 };
 module_spi_driver(ad5761_driver);
 
-MODULE_AUTHOR("Ricardo Ribalda <ricardo.ribalda@gmail.com>");
+MODULE_AUTHOR("Ricardo Ribalda <ribalda@kernel.org>");
 MODULE_DESCRIPTION("Analog Devices AD5721, AD5721R, AD5761, AD5761R driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/dac/ti-dac7612.c b/drivers/iio/dac/ti-dac7612.c
index c46805144dd4..de0c6573cd97 100644
--- a/drivers/iio/dac/ti-dac7612.c
+++ b/drivers/iio/dac/ti-dac7612.c
@@ -3,7 +3,7 @@
  * DAC7612 Dual, 12-Bit Serial input Digital-to-Analog Converter
  *
  * Copyright 2019 Qtechnology A/S
- * 2019 Ricardo Ribalda <ricardo@ribalda.com>
+ * 2019 Ricardo Ribalda <ribalda@kernel.org>
  *
  * Licensed under the GPL-2.
  */
@@ -179,6 +179,6 @@ static struct spi_driver dac7612_driver = {
 };
 module_spi_driver(dac7612_driver);
 
-MODULE_AUTHOR("Ricardo Ribalda <ricardo@ribalda.com>");
+MODULE_AUTHOR("Ricardo Ribalda <ribalda@kernel.org>");
 MODULE_DESCRIPTION("Texas Instruments DAC7612 DAC driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/leds/leds-pca963x.c b/drivers/leds/leds-pca963x.c
index 66cdc003b8f4..d288acbc99c7 100644
--- a/drivers/leds/leds-pca963x.c
+++ b/drivers/leds/leds-pca963x.c
@@ -4,7 +4,7 @@
  * Copyright 2013 Qtechnology/AS
  *
  * Author: Peter Meerwald <p.meerwald@bct-electronic.com>
- * Author: Ricardo Ribalda <ricardo.ribalda@gmail.com>
+ * Author: Ricardo Ribalda <ribalda@kernel.org>
  *
  * Based on leds-pca955x.c
  *
diff --git a/drivers/media/i2c/imx214.c b/drivers/media/i2c/imx214.c
index 4175d06ffd47..1ef5af9a8c8b 100644
--- a/drivers/media/i2c/imx214.c
+++ b/drivers/media/i2c/imx214.c
@@ -4,7 +4,7 @@
  *
  * Copyright 2018 Qtechnology A/S
  *
- * Ricardo Ribalda <ricardo.ribalda@gmail.com>
+ * Ricardo Ribalda <ribalda@kernel.org>
  */
 #include <linux/clk.h>
 #include <linux/delay.h>
@@ -1120,5 +1120,5 @@ static struct i2c_driver imx214_i2c_driver = {
 module_i2c_driver(imx214_i2c_driver);
 
 MODULE_DESCRIPTION("Sony IMX214 Camera driver");
-MODULE_AUTHOR("Ricardo Ribalda <ricardo.ribalda@gmail.com>");
+MODULE_AUTHOR("Ricardo Ribalda <ribalda@kernel.org>");
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/ad5761.h b/include/linux/platform_data/ad5761.h
index 02bef5177ff5..69e261e2ca14 100644
--- a/include/linux/platform_data/ad5761.h
+++ b/include/linux/platform_data/ad5761.h
@@ -3,7 +3,7 @@
  * AD5721, AD5721R, AD5761, AD5761R, Voltage Output Digital to Analog Converter
  *
  * Copyright 2016 Qtechnology A/S
- * 2016 Ricardo Ribalda <ricardo.ribalda@gmail.com>
+ * 2016 Ricardo Ribalda <ribalda@kernel.org>
  */
 #ifndef __LINUX_PLATFORM_DATA_AD5761_H__
 #define __LINUX_PLATFORM_DATA_AD5761_H__
-- 
cgit v1.2.3


From e2d467de34228db978683dd1a20fe3fe23018e0c Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Tue, 21 Apr 2020 14:48:34 -0700
Subject: Documentation: LSM: Correct the basic LSM description

This is a first pass at updating the basic documentation on
Linux Security Modules (LSM), which is frighteningly out of date.
Remove untrue statements about the LSM framework. Replace them
with true statements where it is convenient to do so. This is
the beginnig of a larger effort to bring the LSM documentation
up to date.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Link: https://lore.kernel.org/r/4c053d72-2d58-612f-6d6b-f04226d0181e@schaufler-ca.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/security/lsm.rst | 202 ++++++++++++++---------------------------
 1 file changed, 66 insertions(+), 136 deletions(-)

diff --git a/Documentation/security/lsm.rst b/Documentation/security/lsm.rst
index aadf47c808c0..6a2a2e973080 100644
--- a/Documentation/security/lsm.rst
+++ b/Documentation/security/lsm.rst
@@ -35,47 +35,50 @@ desired model of security. Linus also suggested the possibility of
 migrating the Linux capabilities code into such a module.
 
 The Linux Security Modules (LSM) project was started by WireX to develop
-such a framework. LSM is a joint development effort by several security
+such a framework. LSM was a joint development effort by several security
 projects, including Immunix, SELinux, SGI and Janus, and several
 individuals, including Greg Kroah-Hartman and James Morris, to develop a
-Linux kernel patch that implements this framework. The patch is
-currently tracking the 2.4 series and is targeted for integration into
-the 2.5 development series. This technical report provides an overview
-of the framework and the example capabilities security module provided
-by the LSM kernel patch.
+Linux kernel patch that implements this framework. The work was
+incorporated in the mainstream in December of 2003. This technical
+report provides an overview of the framework and the capabilities
+security module.
 
 LSM Framework
 =============
 
-The LSM kernel patch provides a general kernel framework to support
+The LSM framework provides a general kernel framework to support
 security modules. In particular, the LSM framework is primarily focused
 on supporting access control modules, although future development is
-likely to address other security needs such as auditing. By itself, the
+likely to address other security needs such as sandboxing. By itself, the
 framework does not provide any additional security; it merely provides
-the infrastructure to support security modules. The LSM kernel patch
-also moves most of the capabilities logic into an optional security
-module, with the system defaulting to the traditional superuser logic.
+the infrastructure to support security modules. The LSM framework is
+optional, requiring `CONFIG_SECURITY` to be enabled. The capabilities
+logic is implemented as a security module.
 This capabilities module is discussed further in
 `LSM Capabilities Module`_.
 
-The LSM kernel patch adds security fields to kernel data structures and
-inserts calls to hook functions at critical points in the kernel code to
-manage the security fields and to perform access control. It also adds
-functions for registering and unregistering security modules, and adds a
-general :c:func:`security()` system call to support new system calls
-for security-aware applications.
-
-The LSM security fields are simply ``void*`` pointers. For process and
-program execution security information, security fields were added to
+The LSM framework includes security fields in kernel data structures and
+calls to hook functions at critical points in the kernel code to
+manage the security fields and to perform access control.
+It also adds functions for registering security modules.
+An interface `/sys/kernel/security/lsm` reports a comma separated list
+of security modules that are active on the system.
+
+The LSM security fields are simply ``void*`` pointers.
+The data is referred to as a blob, which may be managed by
+the framework or by the individual security modules that use it.
+Security blobs that are used by more than one security module are
+typically managed by the framework.
+For process and
+program execution security information, security fields are included in
 :c:type:`struct task_struct <task_struct>` and
-:c:type:`struct linux_binprm <linux_binprm>`. For filesystem
-security information, a security field was added to :c:type:`struct
+:c:type:`struct cred <cred>`.
+For filesystem
+security information, a security field is included in :c:type:`struct
 super_block <super_block>`. For pipe, file, and socket security
-information, security fields were added to :c:type:`struct inode
-<inode>` and :c:type:`struct file <file>`. For packet and
-network device security information, security fields were added to
-:c:type:`struct sk_buff <sk_buff>` and :c:type:`struct
-net_device <net_device>`. For System V IPC security information,
+information, security fields are included in :c:type:`struct inode
+<inode>` and :c:type:`struct file <file>`.
+For System V IPC security information,
 security fields were added to :c:type:`struct kern_ipc_perm
 <kern_ipc_perm>` and :c:type:`struct msg_msg
 <msg_msg>`; additionally, the definitions for :c:type:`struct
@@ -84,118 +87,45 @@ were moved to header files (``include/linux/msg.h`` and
 ``include/linux/shm.h`` as appropriate) to allow the security modules to
 use these definitions.
 
-Each LSM hook is a function pointer in a global table, security_ops.
-This table is a :c:type:`struct security_operations
-<security_operations>` structure as defined by
-``include/linux/security.h``. Detailed documentation for each hook is
-included in this header file. At present, this structure consists of a
-collection of substructures that group related hooks based on the kernel
-object (e.g. task, inode, file, sk_buff, etc) as well as some top-level
-hook function pointers for system operations. This structure is likely
-to be flattened in the future for performance. The placement of the hook
-calls in the kernel code is described by the "called:" lines in the
-per-hook documentation in the header file. The hook calls can also be
-easily found in the kernel code by looking for the string
-"security_ops->".
-
-Linus mentioned per-process security hooks in his original remarks as a
-possible alternative to global security hooks. However, if LSM were to
-start from the perspective of per-process hooks, then the base framework
-would have to deal with how to handle operations that involve multiple
-processes (e.g. kill), since each process might have its own hook for
-controlling the operation. This would require a general mechanism for
-composing hooks in the base framework. Additionally, LSM would still
-need global hooks for operations that have no process context (e.g.
-network input operations). Consequently, LSM provides global security
-hooks, but a security module is free to implement per-process hooks
-(where that makes sense) by storing a security_ops table in each
-process' security field and then invoking these per-process hooks from
-the global hooks. The problem of composition is thus deferred to the
-module.
-
-The global security_ops table is initialized to a set of hook functions
-provided by a dummy security module that provides traditional superuser
-logic. A :c:func:`register_security()` function (in
-``security/security.c``) is provided to allow a security module to set
-security_ops to refer to its own hook functions, and an
-:c:func:`unregister_security()` function is provided to revert
-security_ops to the dummy module hooks. This mechanism is used to set
-the primary security module, which is responsible for making the final
-decision for each hook.
-
-LSM also provides a simple mechanism for stacking additional security
-modules with the primary security module. It defines
-:c:func:`register_security()` and
-:c:func:`unregister_security()` hooks in the :c:type:`struct
-security_operations <security_operations>` structure and
-provides :c:func:`mod_reg_security()` and
-:c:func:`mod_unreg_security()` functions that invoke these hooks
-after performing some sanity checking. A security module can call these
-functions in order to stack with other modules. However, the actual
-details of how this stacking is handled are deferred to the module,
-which can implement these hooks in any way it wishes (including always
-returning an error if it does not wish to support stacking). In this
-manner, LSM again defers the problem of composition to the module.
-
-Although the LSM hooks are organized into substructures based on kernel
-object, all of the hooks can be viewed as falling into two major
+For packet and
+network device security information, security fields were added to
+:c:type:`struct sk_buff <sk_buff>` and
+:c:type:`struct scm_cookie <scm_cookie>`.
+Unlike the other security module data, the data used here is a
+32-bit integer. The security modules are required to map or otherwise
+associate these values with real security attributes.
+
+LSM hooks are maintained in lists. A list is maintained for each
+hook, and the hooks are called in the order specified by CONFIG_LSM.
+Detailed documentation for each hook is
+included in the `include/linux/lsm_hooks.h` header file.
+
+The LSM framework provides for a close approximation of
+general security module stacking. It defines
+security_add_hooks() to which each security module passes a
+:c:type:`struct security_hooks_list <security_hooks_list>`,
+which are added to the lists.
+The LSM framework does not provide a mechanism for removing hooks that
+have been registered. The SELinux security module has implemented
+a way to remove itself, however the feature has been deprecated.
+
+The hooks can be viewed as falling into two major
 categories: hooks that are used to manage the security fields and hooks
 that are used to perform access control. Examples of the first category
-of hooks include the :c:func:`alloc_security()` and
-:c:func:`free_security()` hooks defined for each kernel data
-structure that has a security field. These hooks are used to allocate
-and free security structures for kernel objects. The first category of
-hooks also includes hooks that set information in the security field
-after allocation, such as the :c:func:`post_lookup()` hook in
-:c:type:`struct inode_security_ops <inode_security_ops>`.
-This hook is used to set security information for inodes after
-successful lookup operations. An example of the second category of hooks
-is the :c:func:`permission()` hook in :c:type:`struct
-inode_security_ops <inode_security_ops>`. This hook checks
-permission when accessing an inode.
+of hooks include the security_inode_alloc() and security_inode_free()
+These hooks are used to allocate
+and free security structures for inode objects.
+An example of the second category of hooks
+is the security_inode_permission() hook.
+This hook checks permission when accessing an inode.
 
 LSM Capabilities Module
 =======================
 
-The LSM kernel patch moves most of the existing POSIX.1e capabilities
-logic into an optional security module stored in the file
-``security/capability.c``. This change allows users who do not want to
-use capabilities to omit this code entirely from their kernel, instead
-using the dummy module for traditional superuser logic or any other
-module that they desire. This change also allows the developers of the
-capabilities logic to maintain and enhance their code more freely,
-without needing to integrate patches back into the base kernel.
-
-In addition to moving the capabilities logic, the LSM kernel patch could
-move the capability-related fields from the kernel data structures into
-the new security fields managed by the security modules. However, at
-present, the LSM kernel patch leaves the capability fields in the kernel
-data structures. In his original remarks, Linus suggested that this
-might be preferable so that other security modules can be easily stacked
-with the capabilities module without needing to chain multiple security
-structures on the security field. It also avoids imposing extra overhead
-on the capabilities module to manage the security fields. However, the
-LSM framework could certainly support such a move if it is determined to
-be desirable, with only a few additional changes described below.
-
-At present, the capabilities logic for computing process capabilities on
-:c:func:`execve()` and :c:func:`set\*uid()`, checking
-capabilities for a particular process, saving and checking capabilities
-for netlink messages, and handling the :c:func:`capget()` and
-:c:func:`capset()` system calls have been moved into the
-capabilities module. There are still a few locations in the base kernel
-where capability-related fields are directly examined or modified, but
-the current version of the LSM patch does allow a security module to
-completely replace the assignment and testing of capabilities. These few
-locations would need to be changed if the capability-related fields were
-moved into the security field. The following is a list of known
-locations that still perform such direct examination or modification of
-capability-related fields:
-
--  ``fs/open.c``::c:func:`sys_access()`
-
--  ``fs/lockd/host.c``::c:func:`nlm_bind_host()`
-
--  ``fs/nfsd/auth.c``::c:func:`nfsd_setuser()`
-
--  ``fs/proc/array.c``::c:func:`task_cap()`
+The POSIX.1e capabilities logic is maintained as a security module
+stored in the file ``security/commoncap.c``. The capabilities
+module uses the order field of the :c:type:`lsm_info` description
+to identify it as the first security module to be registered.
+The capabilities security module does not use the general security
+blobs, unlike other modules. The reasons are historical and are
+based on overhead, complexity and performance concerns.
-- 
cgit v1.2.3


From 2dcc51b3fe1708d3cda1b3fe9a30c09550791387 Mon Sep 17 00:00:00 2001
From: Flavio Suligoi <f.suligoi@asem.it>
Date: Tue, 19 May 2020 10:41:27 +0200
Subject: docs: filesystems: add info about efivars content

When an EFI variable is reading from:

/sys/firmware/efi/efivars

(for example using "hexdump"), the first 4 bytes of the
output are not the real EFI variable data, but the variable
attributes (in little-endian format).

Signed-off-by: Flavio Suligoi <f.suligoi@asem.it>
Link: https://lore.kernel.org/r/20200519084128.12756-1-f.suligoi@asem.it
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/efivarfs.rst | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/Documentation/filesystems/efivarfs.rst b/Documentation/filesystems/efivarfs.rst
index 90ac65683e7e..0551985821b8 100644
--- a/Documentation/filesystems/efivarfs.rst
+++ b/Documentation/filesystems/efivarfs.rst
@@ -24,3 +24,20 @@ files that are not well-known standardized variables are created
 as immutable files.  This doesn't prevent removal - "chattr -i" will work -
 but it does prevent this kind of failure from being accomplished
 accidentally.
+
+.. warning ::
+      When a content of an UEFI variable in /sys/firmware/efi/efivars is
+      displayed, for example using "hexdump", pay attention that the first
+      4 bytes of the output represent the UEFI variable attributes,
+      in little-endian format.
+
+      Practically the output of each efivar is composed of:
+
+          +-----------------------------------+
+          |4_bytes_of_attributes + efivar_data|
+          +-----------------------------------+
+
+*See also:*
+
+- Documentation/admin-guide/acpi/ssdt-overlays.rst
+- Documentation/ABI/stable/sysfs-firmware-efi-vars
-- 
cgit v1.2.3


From dd9a41bc61cc62d38306465ed62373b98df0049e Mon Sep 17 00:00:00 2001
From: Flavio Suligoi <f.suligoi@asem.it>
Date: Tue, 19 May 2020 10:41:28 +0200
Subject: docs: acpi: fix old http link and improve document format

The website:

    http://wiki.minnowboard.org

doesn't exist anymore. The same pages are moved to:

    https://www.elinux.org/Minnowboard

Other improvements concern the introduction of some rst
semantic markup in the document.

Signed-off-by: Flavio Suligoi <f.suligoi@asem.it>
Link: https://lore.kernel.org/r/20200519084128.12756-2-f.suligoi@asem.it
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/acpi/ssdt-overlays.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/acpi/ssdt-overlays.rst b/Documentation/admin-guide/acpi/ssdt-overlays.rst
index da37455f96c9..5d7e25988085 100644
--- a/Documentation/admin-guide/acpi/ssdt-overlays.rst
+++ b/Documentation/admin-guide/acpi/ssdt-overlays.rst
@@ -63,7 +63,7 @@ which can then be compiled to AML binary format::
     ASL Input:     minnomax.asl - 30 lines, 614 bytes, 7 keywords
     AML Output:    minnowmax.aml - 165 bytes, 6 named objects, 1 executable opcodes
 
-[1] http://wiki.minnowboard.org/MinnowBoard_MAX#Low_Speed_Expansion_Connector_.28Top.29
+[1] https://www.elinux.org/Minnowboard:MinnowMax#Low_Speed_Expansion_.28Top.29
 
 The resulting AML code can then be loaded by the kernel using one of the methods
 below.
-- 
cgit v1.2.3


From b8170fad6e5fe8a695fbba9305a1e5ede278a2db Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Mon, 18 May 2020 13:29:24 -0500
Subject: tracing: Fix events.rst section numbering

The in-kernel trace event API should have its own section, and the
duplicate section numbers need fixing as well.

Reported-by: Li Xinhai <lixinhai.lxh@gmail.com>
Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Link: https://lore.kernel.org/r/90ea854dfb728390b50ddf8a8675238973ee014a.camel@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/trace/events.rst | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index 4a2ebe0bd19b..f792b1959a33 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -527,8 +527,8 @@ The following commands are supported:
 
   See Documentation/trace/histogram.rst for details and examples.
 
-6.3 In-kernel trace event API
------------------------------
+7. In-kernel trace event API
+============================
 
 In most cases, the command-line interface to trace events is more than
 sufficient.  Sometimes, however, applications might find the need for
@@ -560,8 +560,8 @@ following:
   - tracing synthetic events from in-kernel code
   - the low-level "dynevent_cmd" API
 
-6.3.1 Dyamically creating synthetic event definitions
------------------------------------------------------
+7.1 Dyamically creating synthetic event definitions
+---------------------------------------------------
 
 There are a couple ways to create a new synthetic event from a kernel
 module or other kernel code.
@@ -666,8 +666,8 @@ registered by calling the synth_event_gen_cmd_end() function::
 At this point, the event object is ready to be used for tracing new
 events.
 
-6.3.3 Tracing synthetic events from in-kernel code
---------------------------------------------------
+7.2 Tracing synthetic events from in-kernel code
+------------------------------------------------
 
 To trace a synthetic event, there are several options.  The first
 option is to trace the event in one call, using synth_event_trace()
@@ -678,8 +678,8 @@ synth_event_trace_start() and synth_event_trace_end() along with
 synth_event_add_next_val() or synth_event_add_val() to add the values
 piecewise.
 
-6.3.3.1 Tracing a synthetic event all at once
----------------------------------------------
+7.2.1 Tracing a synthetic event all at once
+-------------------------------------------
 
 To trace a synthetic event all at once, the synth_event_trace() or
 synth_event_trace_array() functions can be used.
@@ -780,8 +780,8 @@ remove the event::
 
        ret = synth_event_delete("schedtest");
 
-6.3.3.1 Tracing a synthetic event piecewise
--------------------------------------------
+7.2.2 Tracing a synthetic event piecewise
+-----------------------------------------
 
 To trace a synthetic using the piecewise method described above, the
 synth_event_trace_start() function is used to 'open' the synthetic
@@ -864,8 +864,8 @@ Note that synth_event_trace_end() must be called at the end regardless
 of whether any of the add calls failed (say due to a bad field name
 being passed in).
 
-6.3.4 Dyamically creating kprobe and kretprobe event definitions
-----------------------------------------------------------------
+7.3 Dyamically creating kprobe and kretprobe event definitions
+--------------------------------------------------------------
 
 To create a kprobe or kretprobe trace event from kernel code, the
 kprobe_event_gen_cmd_start() or kretprobe_event_gen_cmd_start()
@@ -941,8 +941,8 @@ used to give the kprobe event file back and delete the event::
 
   ret = kprobe_event_delete("gen_kprobe_test");
 
-6.3.4 The "dynevent_cmd" low-level API
---------------------------------------
+7.4 The "dynevent_cmd" low-level API
+------------------------------------
 
 Both the in-kernel synthetic event and kprobe interfaces are built on
 top of a lower-level "dynevent_cmd" interface.  This interface is
-- 
cgit v1.2.3


From 9469b390720460c1fca26b1ab3343bd70f7d5f5d Mon Sep 17 00:00:00 2001
From: Sedat Dilek <sedat.dilek@gmail.com>
Date: Mon, 1 Jun 2020 02:59:11 +0200
Subject: zswap: docs/vm: Fix typo accept_threshold_percent in zswap.rst

Recently, I switched over from swap-file to zramswap.

When reading the Documentation/vm/zswap.rst file I fell over this typo.

The parameter is called accept_threshold_percent not accept_threhsold_percent
in /sys/module/zswap/parameters/ directory.

Fixes: 45190f01dd402 ("mm/zswap.c: add allocation hysteresis if pool limit is hit")
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Sedat Dilek <sedat.dilek@gmail.com>
Link: https://lore.kernel.org/r/20200601005911.31222-1-sedat.dilek@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/vm/zswap.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/vm/zswap.rst b/Documentation/vm/zswap.rst
index f8c6a79d7c70..d8d9fa4a1f0d 100644
--- a/Documentation/vm/zswap.rst
+++ b/Documentation/vm/zswap.rst
@@ -140,10 +140,10 @@ without any real benefit but with a performance drop for the system), a
 special parameter has been introduced to implement a sort of hysteresis to
 refuse taking pages into zswap pool until it has sufficient space if the limit
 has been hit. To set the threshold at which zswap would start accepting pages
-again after it became full, use the sysfs ``accept_threhsold_percent``
+again after it became full, use the sysfs ``accept_threshold_percent``
 attribute, e. g.::
 
-	echo 80 > /sys/module/zswap/parameters/accept_threhsold_percent
+	echo 80 > /sys/module/zswap/parameters/accept_threshold_percent
 
 Setting this parameter to 100 will disable the hysteresis.
 
-- 
cgit v1.2.3


From e35b5a4c494a75a683ddf4901a43e0a128d5bfe3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 26 May 2020 18:17:13 -0700
Subject: Documentation: fixes to the maintainer-entry-profile template

Do some wordsmithing and copy editing on the maintainer-entry-profile
profile (template, guide):
- fix punctuation
- fix some wording
- use "-rc" consistently

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: linux-nvdimm@lists.01.org
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: linux-doc@vger.kernel.org
Link: https://lore.kernel.org/r/fbaa9b67-e7b8-d5e8-ecbb-6ae068234880@infradead.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/maintainer/maintainer-entry-profile.rst | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/Documentation/maintainer/maintainer-entry-profile.rst b/Documentation/maintainer/maintainer-entry-profile.rst
index 11ebe3682771..77e43c8b24b4 100644
--- a/Documentation/maintainer/maintainer-entry-profile.rst
+++ b/Documentation/maintainer/maintainer-entry-profile.rst
@@ -7,7 +7,7 @@ The Maintainer Entry Profile supplements the top-level process documents
 (submitting-patches, submitting drivers...) with
 subsystem/device-driver-local customs as well as details about the patch
 submission life-cycle. A contributor uses this document to level set
-their expectations and avoid common mistakes, maintainers may use these
+their expectations and avoid common mistakes; maintainers may use these
 profiles to look across subsystems for opportunities to converge on
 common practices.
 
@@ -26,7 +26,7 @@ Example questions to consider:
 - Does the subsystem have a patchwork instance? Are patchwork state
   changes notified?
 - Any bots or CI infrastructure that watches the list, or automated
-  testing feedback that the subsystem gates acceptance?
+  testing feedback that the subsystem uses to gate acceptance?
 - Git branches that are pulled into -next?
 - What branch should contributors submit against?
 - Links to any other Maintainer Entry Profiles? For example a
@@ -54,8 +54,8 @@ One of the common misunderstandings of submitters is that patches can be
 sent at any time before the merge window closes and can still be
 considered for the next -rc1. The reality is that most patches need to
 be settled in soaking in linux-next in advance of the merge window
-opening. Clarify for the submitter the key dates (in terms rc release
-week) that patches might considered for merging and when patches need to
+opening. Clarify for the submitter the key dates (in terms of -rc release
+week) that patches might be considered for merging and when patches need to
 wait for the next -rc. At a minimum:
 
 - Last -rc for new feature submissions:
@@ -70,8 +70,8 @@ wait for the next -rc. At a minimum:
 - Last -rc to merge features: Deadline for merge decisions
   Indicate to contributors the point at which an as yet un-applied patch
   set will need to wait for the NEXT+1 merge window. Of course there is no
-  obligation to ever except any given patchset, but if the review has not
-  concluded by this point the expectation the contributor should wait and
+  obligation to ever accept any given patchset, but if the review has not
+  concluded by this point the expectation is the contributor should wait and
   resubmit for the following merge window.
 
 Optional:
-- 
cgit v1.2.3