581 files changed, 9695 insertions, 3088 deletions
diff --git a/.gitignore b/.gitignore
index 7587ef56b92d..8f5422cba6e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,6 +30,7 @@
 *.lz4
 *.lzma
 *.lzo
+*.mod
 *.mod.c
 *.o
 *.o.*
diff --git a/.mailmap b/.mailmap
index 0fef932de3db..acba1a6163f1 100644
--- a/.mailmap
+++ b/.mailmap
@@ -98,6 +98,7 @@ Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
 Javi Merino <javi.merino@kernel.org> <javi.merino@arm.com>
 <javier@osg.samsung.com> <javier.martinez@collabora.co.uk>
 Jean Tourrilhes <jt@hpl.hp.com>
+<jean-philippe@linaro.org> <jean-philippe.brucker@arm.com>
 Jeff Garzik <jgarzik@pretzel.yyz.us>
 Jeff Layton <jlayton@kernel.org> <jlayton@redhat.com>
 Jeff Layton <jlayton@kernel.org> <jlayton@poochiereds.net>
@@ -116,6 +117,7 @@ John Stultz <johnstul@us.ibm.com>
 Juha Yrjola <at solidboot.com>
 Juha Yrjola <juha.yrjola@nokia.com>
 Juha Yrjola <juha.yrjola@solidboot.com>
+Julien Thierry <julien.thierry.kdev@gmail.com> <julien.thierry@arm.com>
 Kay Sievers <kay.sievers@vrfy.org>
 Kenneth W Chen <kenneth.w.chen@intel.com>
 Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com>
@@ -132,6 +134,7 @@ Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
 Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org>
 Li Yang <leoyang.li@nxp.com> <leoli@freescale.com>
 Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
+Marc Zyngier <maz@kernel.org> <marc.zyngier@arm.com>
 Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
 Mark Brown <broonie@sirena.org.uk>
 Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst
index 83db42092935..e5d450df06b4 100644
--- a/Documentation/PCI/pci-error-recovery.rst
+++ b/Documentation/PCI/pci-error-recovery.rst
@@ -403,7 +403,7 @@ That is, the recovery API only requires that:
 .. note::
 
    Implementation details for the powerpc platform are discussed in
-   the file Documentation/powerpc/eeh-pci-error-recovery.txt
+   the file Documentation/powerpc/eeh-pci-error-recovery.rst
 
    As of this writing, there is a growing list of device drivers with
    patches implementing error recovery. Not all of these patches are in
@@ -422,3 +422,6 @@ That is, the recovery API only requires that:
    - drivers/net/cxgb3
    - drivers/net/s2io.c
    - drivers/net/qlge
+
+The End
+-------
diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt
index 8151f0195f76..23f115dc87cf 100644
--- a/Documentation/RCU/rculist_nulls.txt
+++ b/Documentation/RCU/rculist_nulls.txt
@@ -1,7 +1,7 @@
 Using hlist_nulls to protect read-mostly linked lists and
 objects using SLAB_TYPESAFE_BY_RCU allocations.
 
-Please read the basics in Documentation/RCU/listRCU.txt
+Please read the basics in Documentation/RCU/listRCU.rst
 
 Using special makers (called 'nulls') is a convenient way
 to solve following problem :
diff --git a/Documentation/admin-guide/conf.py b/Documentation/admin-guide/conf.py
deleted file mode 100644
index 86f738953799..000000000000
--- a/Documentation/admin-guide/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = 'Linux Kernel User Documentation'
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'linux-user.tex', 'Linux Kernel User Documentation',
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 46b826fcb5ad..7ccd158b3894 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2545,7 +2545,7 @@
 			mem_encrypt=on:		Activate SME
 			mem_encrypt=off:	Do not activate SME
 
-			Refer to Documentation/virtual/kvm/amd-memory-encryption.rst
+			Refer to Documentation/virt/kvm/amd-memory-encryption.rst
 			for details on when memory encryption can be activated.
 
 	mem_sleep_default=	[SUSPEND] Default system suspend mode:
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index 7ab93a8404b9..bd5714547cee 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -53,7 +53,7 @@ disabled, there is ``khugepaged`` daemon that scans memory and
 collapses sequences of basic pages into huge pages.
 
 The THP behaviour is controlled via :ref:`sysfs <thp_sysfs>`
-interface and using madivse(2) and prctl(2) system calls.
+interface and using madvise(2) and prctl(2) system calls.
 
 Transparent Hugepage Support maximizes the usefulness of free memory
 if compared to the reservation approach of hugetlbfs by allowing all
diff --git a/Documentation/conf.py b/Documentation/conf.py
index 3b2397bcb565..a8fe845832bc 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -16,6 +16,8 @@ import sys
 import os
 import sphinx
 
+from subprocess import check_output
+
 # Get Sphinx version
 major, minor, patch = sphinx.version_info[:3]
 
@@ -276,10 +278,21 @@ latex_elements = {
         \\setsansfont{DejaVu Sans}
         \\setromanfont{DejaVu Serif}
         \\setmonofont{DejaVu Sans Mono}
-
      '''
 }
 
+# At least one book (translations) may have Asian characters
+# with are only displayed if xeCJK is used
+
+cjk_cmd = check_output(['fc-list', '--format="%{family[0]}\n"']).decode('utf-8', 'ignore')
+if cjk_cmd.find("Noto Sans CJK SC") >= 0:
+    print ("enabling CJK for LaTeX builder")
+    latex_elements['preamble']  += '''
+	% This is needed for translations
+        \\usepackage{xeCJK}
+        \\setCJKmainfont{Noto Sans CJK SC}
+     '''
+
 # Fix reference escape troubles with Sphinx 1.4.x
 if major == 1 and minor > 3:
     latex_elements['preamble']  += '\\renewcommand*{\\DUrole}[2]{ #2 }\n'
@@ -410,6 +423,21 @@ latex_documents = [
      'The kernel development community', 'manual'),
 ]
 
+# Add all other index files from Documentation/ subdirectories
+for fn in os.listdir('.'):
+    doc = os.path.join(fn, "index")
+    if os.path.exists(doc + ".rst"):
+        has = False
+        for l in latex_documents:
+            if l[0] == doc:
+                has = True
+                break
+        if not has:
+            latex_documents.append((doc, fn + '.tex',
+                                    'Linux %s Documentation' % fn.capitalize(),
+                                    'The kernel development community',
+                                    'manual'))
+
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
 #latex_logo = None
diff --git a/Documentation/core-api/conf.py b/Documentation/core-api/conf.py
deleted file mode 100644
index db1f7659f3da..000000000000
--- a/Documentation/core-api/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Core-API Documentation"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'core-api.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/crypto/conf.py b/Documentation/crypto/conf.py
deleted file mode 100644
index 4335d251ddf3..000000000000
--- a/Documentation/crypto/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = 'Linux Kernel Crypto API'
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'crypto-api.tex', 'Linux Kernel Crypto API manual',
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/dev-tools/conf.py b/Documentation/dev-tools/conf.py
deleted file mode 100644
index 7faafa3f7888..000000000000
--- a/Documentation/dev-tools/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Development tools for the kernel"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'dev-tools.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/devicetree/bindings/arm/idle-states.txt b/Documentation/devicetree/bindings/arm/idle-states.txt
index 326f29b270ad..2d325bed37e5 100644
--- a/Documentation/devicetree/bindings/arm/idle-states.txt
+++ b/Documentation/devicetree/bindings/arm/idle-states.txt
@@ -703,4 +703,4 @@ cpus {
     https://www.devicetree.org/specifications/
 
 [6] ARM Linux Kernel documentation - Booting AArch64 Linux
-    Documentation/arm64/booting.txt
+    Documentation/arm64/booting.rst
diff --git a/Documentation/doc-guide/conf.py b/Documentation/doc-guide/conf.py
deleted file mode 100644
index fd3731182d5a..000000000000
--- a/Documentation/doc-guide/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = 'Linux Kernel Documentation Guide'
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'kernel-doc-guide.tex', 'Linux Kernel Documentation Guide',
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 5eba889ea84d..9f4392876099 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -30,6 +30,7 @@
 *.lzo
 *.mo
 *.moc
+*.mod
 *.mod.c
 *.o
 *.o.*
diff --git a/Documentation/driver-api/80211/conf.py b/Documentation/driver-api/80211/conf.py
deleted file mode 100644
index 4424b4b0b9c3..000000000000
--- a/Documentation/driver-api/80211/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux 802.11 Driver Developer's Guide"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', '80211.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/driver-api/conf.py b/Documentation/driver-api/conf.py
deleted file mode 100644
index 202726d20088..000000000000
--- a/Documentation/driver-api/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "The Linux driver implementer's API guide"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'driver-api.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/driver-api/generic-counter.rst b/Documentation/driver-api/generic-counter.rst
index 0c161b1a3be6..8382f01a53e3 100644
--- a/Documentation/driver-api/generic-counter.rst
+++ b/Documentation/driver-api/generic-counter.rst
@@ -233,7 +233,7 @@ Userspace Interface
 Several sysfs attributes are generated by the Generic Counter interface,
 and reside under the /sys/bus/counter/devices/counterX directory, where
 counterX refers to the respective counter device. Please see
-Documentation/ABI/testing/sys-bus-counter-generic-sysfs for detailed
+Documentation/ABI/testing/sysfs-bus-counter for detailed
 information on each Generic Counter interface sysfs attribute.
 
 Through these sysfs attributes, programs and scripts may interact with
@@ -325,7 +325,7 @@ sysfs attributes, where Y is the unique ID of the respective Count:
 
 For a more detailed breakdown of the available Generic Counter interface
 sysfs attributes, please refer to the
-Documentation/ABI/testing/sys-bus-counter file.
+Documentation/ABI/testing/sysfs-bus-counter file.
 
 The Signals and Counts associated with the Counter device are registered
 to the system as well by the counter_register function. The
diff --git a/Documentation/driver-api/ntb.rst b/Documentation/driver-api/ntb.rst
index 074a423c853c..87d1372da879 100644
--- a/Documentation/driver-api/ntb.rst
+++ b/Documentation/driver-api/ntb.rst
@@ -200,6 +200,33 @@ Debugfs Files:
 	This file is used to read and write peer scratchpads.  See
 	*spad* for details.
 
+NTB MSI Test Client (ntb\_msi\_test)
+------------------------------------
+
+The MSI test client serves to test and debug the MSI library which
+allows for passing MSI interrupts across NTB memory windows. The
+test client is interacted with through the debugfs filesystem:
+
+* *debugfs*/ntb\_tool/*hw*/
+	A directory in debugfs will be created for each
+	NTB device probed by the tool.  This directory is shortened to *hw*
+	below.
+* *hw*/port
+	This file describes the local port number
+* *hw*/irq*_occurrences
+	One occurrences file exists for each interrupt and, when read,
+	returns the number of times the interrupt has been triggered.
+* *hw*/peer*/port
+	This file describes the port number for each peer
+* *hw*/peer*/count
+	This file describes the number of interrupts that can be
+	triggered on each peer
+* *hw*/peer*/trigger
+	Writing an interrupt number (any number less than the value
+	specified in count) will trigger the interrupt on the
+	specified peer. That peer's interrupt's occurrence file
+	should be incremented.
+
 NTB Hardware Drivers
 ====================
 
diff --git a/Documentation/driver-api/phy/phy.rst b/Documentation/driver-api/phy/phy.rst
index 457c3e0f86d6..8fc1ce0bb905 100644
--- a/Documentation/driver-api/phy/phy.rst
+++ b/Documentation/driver-api/phy/phy.rst
@@ -179,8 +179,8 @@ PHY Mappings
 
 In order to get reference to a PHY without help from DeviceTree, the framework
 offers lookups which can be compared to clkdev that allow clk structures to be
-bound to devices. A lookup can be made be made during runtime when a handle to
-the struct phy already exists.
+bound to devices. A lookup can be made during runtime when a handle to the
+struct phy already exists.
 
 The framework offers the following API for registering and unregistering the
 lookups::
diff --git a/Documentation/driver-api/pm/conf.py b/Documentation/driver-api/pm/conf.py
deleted file mode 100644
index a89fac11272f..000000000000
--- a/Documentation/driver-api/pm/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Device Power Management"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'pm.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/filesystems/conf.py b/Documentation/filesystems/conf.py
deleted file mode 100644
index ea44172af5c4..000000000000
--- a/Documentation/filesystems/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux Filesystems API"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'filesystems.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 209672010fb4..6b7a41cfcaed 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -436,7 +436,7 @@ for the inode.  If d_make_root(inode) is passed a NULL inode it returns NULL
 and also requires no further error handling. Typical usage is:
 
 	inode = foofs_new_inode(....);
-	s->s_root = d_make_inode(inode);
+	s->s_root = d_make_root(inode);
 	if (!s->s_root)
 		/* Nothing needed for the inode cleanup */
 		return -ENOMEM;
diff --git a/Documentation/gpu/conf.py b/Documentation/gpu/conf.py
deleted file mode 100644
index 1757b040fb32..000000000000
--- a/Documentation/gpu/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux GPU Driver Developer's Guide"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'gpu.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/hwmon/k8temp.rst b/Documentation/hwmon/k8temp.rst
index 72da12aa17e5..fe9109521056 100644
--- a/Documentation/hwmon/k8temp.rst
+++ b/Documentation/hwmon/k8temp.rst
@@ -9,7 +9,7 @@ Supported chips:
 
     Addresses scanned: PCI space
 
-    Datasheet: http://support.amd.com/us/Processor_TechDocs/32559.pdf
+    Datasheet: http://www.amd.com/system/files/TechDocs/32559.pdf
 
 Author: Rudolf Marek
 
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 70ae148ec980..2df5a3da563c 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -111,9 +111,11 @@ needed).
    netlabel/index
    networking/index
    pcmcia/index
+   power/index
    target/index
    timers/index
    watchdog/index
+   virtual/index
    input/index
    hwmon/index
    gpu/index
@@ -143,6 +145,7 @@ implementation.
    arm64/index
    ia64/index
    m68k/index
+   powerpc/index
    riscv/index
    s390/index
    sh/index
diff --git a/Documentation/input/conf.py b/Documentation/input/conf.py
deleted file mode 100644
index d2352fdc92ed..000000000000
--- a/Documentation/input/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "The Linux input driver subsystem"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'linux-input.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst
index ce9b99c004ae..61b2181ed3ea 100644
--- a/Documentation/kbuild/kbuild.rst
+++ b/Documentation/kbuild/kbuild.rst
@@ -38,12 +38,11 @@ Additional options to the assembler (for built-in and modules).
 
 AFLAGS_MODULE
 -------------
-Additional module specific options to use for $(AS).
+Additional assembler options for modules.
 
 AFLAGS_KERNEL
 -------------
-Additional options for $(AS) when used for assembler
-code for code that is compiled as built-in.
+Additional assembler options for built-in.
 
 KCFLAGS
 -------
diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst
index f31158457753..f4f0f7ffde2b 100644
--- a/Documentation/kbuild/makefiles.rst
+++ b/Documentation/kbuild/makefiles.rst
@@ -328,7 +328,7 @@ more details, with real examples.
 	variable $(KBUILD_CFLAGS) and uses it for compilation flags for the
 	entire tree.
 
-	asflags-y specifies options for assembling with $(AS).
+	asflags-y specifies assembler options.
 
 	Example::
 
@@ -490,7 +490,7 @@ more details, with real examples.
 	as-instr checks if the assembler reports a specific instruction
 	and then outputs either option1 or option2
 	C escapes are supported in the test instruction
-	Note: as-instr-option uses KBUILD_AFLAGS for $(AS) options
+	Note: as-instr-option uses KBUILD_AFLAGS for assembler options
 
     cc-option
 	cc-option is used to check if $(CC) supports a given option, and if
@@ -906,7 +906,7 @@ When kbuild executes, the following steps are followed (roughly):
 	vmlinux. The usage of $(call if_changed,xxx) will be described later.
 
     KBUILD_AFLAGS
-	$(AS) assembler flags
+	Assembler flags
 
 	Default value - see top level Makefile
 	Append or modify as required per architecture.
@@ -949,16 +949,16 @@ When kbuild executes, the following steps are followed (roughly):
 	to 'y' when selected.
 
     KBUILD_AFLAGS_KERNEL
-	$(AS) options specific for built-in
+	Assembler options specific for built-in
 
 	$(KBUILD_AFLAGS_KERNEL) contains extra C compiler flags used to compile
 	resident kernel code.
 
     KBUILD_AFLAGS_MODULE
-	Options for $(AS) when building modules
+	Assembler options specific for modules
 
 	$(KBUILD_AFLAGS_MODULE) is used to add arch-specific options that
-	are used for $(AS).
+	are used for assembler.
 
 	From commandline AFLAGS_MODULE shall be used (see kbuild.txt).
 
diff --git a/Documentation/kernel-hacking/conf.py b/Documentation/kernel-hacking/conf.py
deleted file mode 100644
index 3d8acf0f33ad..000000000000
--- a/Documentation/kernel-hacking/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Kernel Hacking Guides"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'kernel-hacking.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/locking/spinlocks.rst b/Documentation/locking/spinlocks.rst
index 098107fb7d86..e93ec6645238 100644
--- a/Documentation/locking/spinlocks.rst
+++ b/Documentation/locking/spinlocks.rst
@@ -82,7 +82,7 @@ itself.  The read lock allows many concurrent readers.  Anything that
 **changes** the list will have to get the write lock.
 
    NOTE! RCU is better for list traversal, but requires careful
-   attention to design detail (see Documentation/RCU/listRCU.txt).
+   attention to design detail (see Documentation/RCU/listRCU.rst).
 
 Also, you cannot "upgrade" a read-lock to a write-lock, so if you at _any_
 time need to do any changes (even if you don't do it every time), you have
@@ -90,7 +90,7 @@ to get the write-lock at the very beginning.
 
    NOTE! We are working hard to remove reader-writer spinlocks in most
    cases, so please don't add a new one without consensus.  (Instead, see
-   Documentation/RCU/rcu.txt for complete information.)
+   Documentation/RCU/rcu.rst for complete information.)
 
 ----
 
diff --git a/Documentation/maintainer/conf.py b/Documentation/maintainer/conf.py
deleted file mode 100644
index 81e9eb7a7884..000000000000
--- a/Documentation/maintainer/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = 'Linux Kernel Development Documentation'
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'maintainer.tex', 'Linux Kernel Development Documentation',
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/media/conf.py b/Documentation/media/conf.py
deleted file mode 100644
index 1f194fcd2cae..000000000000
--- a/Documentation/media/conf.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-# SPDX-License-Identifier: GPL-2.0
-
-project = 'Linux Media Subsystem Documentation'
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'media.tex', 'Linux Media Subsystem Documentation',
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 045bb8148fe9..1adbb8a371c7 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -548,7 +548,7 @@ There are certain things that the Linux kernel memory barriers do not guarantee:
 
 	[*] For information on bus mastering DMA and coherency please read:
 
-	    Documentation/PCI/pci.rst
+	    Documentation/driver-api/pci/pci.rst
 	    Documentation/DMA-API-HOWTO.txt
 	    Documentation/DMA-API.txt
 
diff --git a/Documentation/networking/conf.py b/Documentation/networking/conf.py
deleted file mode 100644
index 40f69e67a883..000000000000
--- a/Documentation/networking/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux Networking Documentation"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'networking.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/power/index.rst b/Documentation/power/index.rst
index 20415f21e48a..002e42745263 100644
--- a/Documentation/power/index.rst
+++ b/Documentation/power/index.rst
@@ -1,4 +1,4 @@
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
 
 ================
 Power Management
diff --git a/Documentation/powerpc/bootwrapper.txt b/Documentation/powerpc/bootwrapper.rst
index d60fced5e1cc..a6292afba573 100644
--- a/Documentation/powerpc/bootwrapper.txt
+++ b/Documentation/powerpc/bootwrapper.rst
@@ -1,5 +1,7 @@
+========================
 The PowerPC boot wrapper
-------------------------
+========================
+
 Copyright (C) Secret Lab Technologies Ltd.
 
 PowerPC image targets compresses and wraps the kernel image (vmlinux) with
@@ -21,6 +23,7 @@ it uses the wrapper script (arch/powerpc/boot/wrapper) to generate target
 image.  The details of the build system is discussed in the next section.
 Currently, the following image format targets exist:
 
+   ==================== ========================================================
    cuImage.%:		Backwards compatible uImage for older version of
 			U-Boot (for versions that don't understand the device
 			tree).  This image embeds a device tree blob inside
@@ -29,31 +32,36 @@ Currently, the following image format targets exist:
 			with boot wrapper code that extracts data from the old
 			bd_info structure and loads the data into the device
 			tree before jumping into the kernel.
-			  Because of the series of #ifdefs found in the
+
+			Because of the series of #ifdefs found in the
 			bd_info structure used in the old U-Boot interfaces,
 			cuImages are platform specific.  Each specific
 			U-Boot platform has a different platform init file
 			which populates the embedded device tree with data
 			from the platform specific bd_info file.  The platform
 			specific cuImage platform init code can be found in
-			arch/powerpc/boot/cuboot.*.c.  Selection of the correct
+			`arch/powerpc/boot/cuboot.*.c`. Selection of the correct
 			cuImage init code for a specific board can be found in
 			the wrapper structure.
+
    dtbImage.%:		Similar to zImage, except device tree blob is embedded
 			inside the image instead of provided by firmware.  The
 			output image file can be either an elf file or a flat
 			binary depending on the platform.
-			  dtbImages are used on systems which do not have an
+
+			dtbImages are used on systems which do not have an
 			interface for passing a device tree directly.
 			dtbImages are similar to simpleImages except that
 			dtbImages have platform specific code for extracting
 			data from the board firmware, but simpleImages do not
 			talk to the firmware at all.
-			  PlayStation 3 support uses dtbImage.  So do Embedded
+
+			PlayStation 3 support uses dtbImage.  So do Embedded
 			Planet boards using the PlanetCore firmware.  Board
 			specific initialization code is typically found in a
 			file named arch/powerpc/boot/<platform>.c; but this
 			can be overridden by the wrapper script.
+
    simpleImage.%:	Firmware independent compressed image that does not
 			depend on any particular firmware interface and embeds
 			a device tree blob.  This image is a flat binary that
@@ -61,14 +69,16 @@ Currently, the following image format targets exist:
 			Firmware cannot pass any configuration data to the
 			kernel with this image type and it depends entirely on
 			the embedded device tree for all information.
-			  The simpleImage is useful for booting systems with
+
+			The simpleImage is useful for booting systems with
 			an unknown firmware interface or for booting from
 			a debugger when no firmware is present (such as on
 			the Xilinx Virtex platform).  The only assumption that
 			simpleImage makes is that RAM is correctly initialized
 			and that the MMU is either off or has RAM mapped to
 			base address 0.
-			  simpleImage also supports inserting special platform
+
+			simpleImage also supports inserting special platform
 			specific initialization code to the start of the bootup
 			sequence.  The virtex405 platform uses this feature to
 			ensure that the cache is invalidated before caching
@@ -81,9 +91,11 @@ Currently, the following image format targets exist:
 			named (virtex405-<board>.dts).  Search the wrapper
 			script for 'virtex405' and see the file
 			arch/powerpc/boot/virtex405-head.S for details.
+
    treeImage.%;		Image format for used with OpenBIOS firmware found
 			on some ppc4xx hardware.  This image embeds a device
 			tree blob inside the image.
+
    uImage:		Native image format used by U-Boot.  The uImage target
 			does not add any boot code.  It just wraps a compressed
 			vmlinux in the uImage data structure.  This image
@@ -91,12 +103,14 @@ Currently, the following image format targets exist:
 			a device tree to the kernel at boot.  If using an older
 			version of U-Boot, then you need to use a cuImage
 			instead.
+
    zImage.%:		Image format which does not embed a device tree.
 			Used by OpenFirmware and other firmware interfaces
 			which are able to supply a device tree.  This image
 			expects firmware to provide the device tree at boot.
 			Typically, if you have general purpose PowerPC
 			hardware then you want this image format.
+   ==================== ========================================================
 
 Image types which embed a device tree blob (simpleImage, dtbImage, treeImage,
 and cuImage) all generate the device tree blob from a file in the
diff --git a/Documentation/powerpc/cpu_families.txt b/Documentation/powerpc/cpu_families.rst
index fc08e22feb1a..1e063c5440c3 100644
--- a/Documentation/powerpc/cpu_families.txt
+++ b/Documentation/powerpc/cpu_families.rst
@@ -1,3 +1,4 @@
+============
 CPU Families
 ============
 
@@ -8,8 +9,8 @@ and are supported by arch/powerpc.
 Book3S (aka sPAPR)
 ------------------
 
- - Hash MMU
- - Mix of 32 & 64 bit
+- Hash MMU
+- Mix of 32 & 64 bit::
 
    +--------------+                 +----------------+
    |  Old POWER   | --------------> | RS64 (threads) |
@@ -108,8 +109,8 @@ Book3S (aka sPAPR)
 IBM BookE
 ---------
 
- - Software loaded TLB.
- - All 32 bit
+- Software loaded TLB.
+- All 32 bit::
 
    +--------------+
    |     401      |
@@ -155,8 +156,8 @@ IBM BookE
 Motorola/Freescale 8xx
 ----------------------
 
- - Software loaded with hardware assist.
- - All 32 bit
+- Software loaded with hardware assist.
+- All 32 bit::
 
    +-------------+
    | MPC8xx Core |
@@ -166,9 +167,9 @@ Motorola/Freescale 8xx
 Freescale BookE
 ---------------
 
- - Software loaded TLB.
- - e6500 adds HW loaded indirect TLB entries.
- - Mix of 32 & 64 bit
+- Software loaded TLB.
+- e6500 adds HW loaded indirect TLB entries.
+- Mix of 32 & 64 bit::
 
    +--------------+
    |     e200     |
@@ -207,8 +208,8 @@ Freescale BookE
 IBM A2 core
 -----------
 
- - Book3E, software loaded TLB + HW loaded indirect TLB entries.
- - 64 bit
+- Book3E, software loaded TLB + HW loaded indirect TLB entries.
+- 64 bit::
 
    +--------------+     +----------------+
    |   A2 core    | --> |      WSP       |
diff --git a/Documentation/powerpc/cpu_features.txt b/Documentation/powerpc/cpu_features.rst
index ae09df8722c8..b7bcdd2f41bb 100644
--- a/Documentation/powerpc/cpu_features.txt
+++ b/Documentation/powerpc/cpu_features.rst
@@ -1,3 +1,7 @@
+============
+CPU Features
+============
+
 Hollis Blanchard <hollis@austin.ibm.com>
 5 Jun 2002
 
@@ -32,7 +36,7 @@ anyways).
 After detecting the processor type, the kernel patches out sections of code
 that shouldn't be used by writing nop's over it. Using cpufeatures requires
 just 2 macros (found in arch/powerpc/include/asm/cputable.h), as seen in head.S
-transfer_to_handler:
+transfer_to_handler::
 
 	#ifdef CONFIG_ALTIVEC
 	BEGIN_FTR_SECTION
diff --git a/Documentation/powerpc/cxl.txt b/Documentation/powerpc/cxl.rst
index c5e8d5098ed3..920546d81326 100644
--- a/Documentation/powerpc/cxl.txt
+++ b/Documentation/powerpc/cxl.rst
@@ -1,3 +1,4 @@
+====================================
 Coherent Accelerator Interface (CXL)
 ====================================
 
@@ -21,6 +22,8 @@ Introduction
 Hardware overview
 =================
 
+    ::
+
          POWER8/9             FPGA
        +----------+        +---------+
        |          |        |         |
@@ -59,14 +62,16 @@ Hardware overview
     the fault. The context to which this fault is serviced is based on
     who owns that acceleration function.
 
-    POWER8 <-----> PSL Version 8 is compliant to the CAIA Version 1.0.
-    POWER9 <-----> PSL Version 9 is compliant to the CAIA Version 2.0.
+    - POWER8 and PSL Version 8 are compliant to the CAIA Version 1.0.
+    - POWER9 and PSL Version 9 are compliant to the CAIA Version 2.0.
+
     This PSL Version 9 provides new features such as:
+
     * Interaction with the nest MMU on the P9 chip.
     * Native DMA support.
     * Supports sending ASB_Notify messages for host thread wakeup.
     * Supports Atomic operations.
-    * ....
+    * etc.
 
     Cards with a PSL9 won't work on a POWER8 system and cards with a
     PSL8 won't work on a POWER9 system.
@@ -147,7 +152,9 @@ User API
     master devices.
 
     A userspace library libcxl is available here:
+
 	https://github.com/ibm-capi/libcxl
+
     This provides a C interface to this kernel API.
 
 open
@@ -165,7 +172,8 @@ open
     When all available contexts are allocated the open call will fail
     and return -ENOSPC.
 
-    Note: IRQs need to be allocated for each context, which may limit
+    Note:
+	  IRQs need to be allocated for each context, which may limit
           the number of contexts that can be created, and therefore
           how many times the device can be opened. The POWER8 CAPP
           supports 2040 IRQs and 3 are used by the kernel, so 2037 are
@@ -186,7 +194,9 @@ ioctl
         updated as userspace allocates and frees memory. This ioctl
         returns once the AFU context is started.
 
-        Takes a pointer to a struct cxl_ioctl_start_work:
+        Takes a pointer to a struct cxl_ioctl_start_work
+
+            ::
 
                 struct cxl_ioctl_start_work {
                         __u64 flags;
@@ -269,7 +279,7 @@ read
     The buffer passed to read() must be at least 4K bytes.
 
     The result of the read will be a buffer of one or more events,
-    each event is of type struct cxl_event, of varying size.
+    each event is of type struct cxl_event, of varying size::
 
             struct cxl_event {
                     struct cxl_event_header header;
@@ -280,7 +290,9 @@ read
                     };
             };
 
-    The struct cxl_event_header is defined as:
+    The struct cxl_event_header is defined as
+
+        ::
 
             struct cxl_event_header {
                     __u16 type;
@@ -307,7 +319,9 @@ read
             For future extensions and padding.
 
     If the event type is CXL_EVENT_AFU_INTERRUPT then the event
-    structure is defined as:
+    structure is defined as
+
+        ::
 
             struct cxl_event_afu_interrupt {
                     __u16 flags;
@@ -326,7 +340,9 @@ read
             For future extensions and padding.
 
     If the event type is CXL_EVENT_DATA_STORAGE then the event
-    structure is defined as:
+    structure is defined as
+
+        ::
 
             struct cxl_event_data_storage {
                     __u16 flags;
@@ -356,7 +372,9 @@ read
             For future extensions
 
     If the event type is CXL_EVENT_AFU_ERROR then the event structure
-    is defined as:
+    is defined as
+
+        ::
 
             struct cxl_event_afu_error {
                     __u16 flags;
@@ -393,15 +411,15 @@ open
 ioctl
 -----
 
-CXL_IOCTL_DOWNLOAD_IMAGE:
-CXL_IOCTL_VALIDATE_IMAGE:
+CXL_IOCTL_DOWNLOAD_IMAGE / CXL_IOCTL_VALIDATE_IMAGE:
     Starts and controls flashing a new FPGA image. Partial
     reconfiguration is not supported (yet), so the image must contain
     a copy of the PSL and AFU(s). Since an image can be quite large,
     the caller may have to iterate, splitting the image in smaller
     chunks.
 
-    Takes a pointer to a struct cxl_adapter_image:
+    Takes a pointer to a struct cxl_adapter_image::
+
         struct cxl_adapter_image {
             __u64 flags;
             __u64 data;
@@ -442,7 +460,7 @@ Udev rules
     The following udev rules could be used to create a symlink to the
     most logical chardev to use in any programming mode (afuX.Yd for
     dedicated, afuX.Ys for afu directed), since the API is virtually
-    identical for each:
+    identical for each::
 
 	SUBSYSTEM=="cxl", ATTRS{mode}=="dedicated_process", SYMLINK="cxl/%b"
 	SUBSYSTEM=="cxl", ATTRS{mode}=="afu_directed", \
diff --git a/Documentation/powerpc/cxlflash.txt b/Documentation/powerpc/cxlflash.rst
index a64bdaa0a1cf..cea67931b3b9 100644
--- a/Documentation/powerpc/cxlflash.txt
+++ b/Documentation/powerpc/cxlflash.rst
@@ -1,3 +1,7 @@
+================================
+Coherent Accelerator (CXL) Flash
+================================
+
 Introduction
 ============
 
@@ -28,7 +32,7 @@ Introduction
     responsible for the initialization of the adapter, setting up the
     special path for user space access, and performing error recovery. It
     communicates directly the Flash Accelerator Functional Unit (AFU)
-    as described in Documentation/powerpc/cxl.txt.
+    as described in Documentation/powerpc/cxl.rst.
 
     The cxlflash driver supports two, mutually exclusive, modes of
     operation at the device (LUN) level:
@@ -58,7 +62,7 @@ Overview
 
     The CXL Flash Adapter Driver establishes a master context with the
     AFU. It uses memory mapped I/O (MMIO) for this control and setup. The
-    Adapter Problem Space Memory Map looks like this:
+    Adapter Problem Space Memory Map looks like this::
 
                      +-------------------------------+
                      |    512 * 64 KB User MMIO      |
@@ -375,7 +379,7 @@ CXL Flash Driver Host IOCTLs
     Each host adapter instance that is supported by the cxlflash driver
     has a special character device associated with it to enable a set of
     host management function. These character devices are hosted in a
-    class dedicated for cxlflash and can be accessed via /dev/cxlflash/*.
+    class dedicated for cxlflash and can be accessed via `/dev/cxlflash/*`.
 
     Applications can be written to perform various functions using the
     host ioctl APIs below.
diff --git a/Documentation/powerpc/DAWR-POWER9.txt b/Documentation/powerpc/dawr-power9.rst
index ecdbb076438c..c96ab6befd9c 100644
--- a/Documentation/powerpc/DAWR-POWER9.txt
+++ b/Documentation/powerpc/dawr-power9.rst
@@ -1,10 +1,11 @@
+=====================
 DAWR issues on POWER9
-============================
+=====================
 
 On POWER9 the Data Address Watchpoint Register (DAWR) can cause a checkstop
 if it points to cache inhibited (CI) memory. Currently Linux has no way to
 disinguish CI memory when configuring the DAWR, so (for now) the DAWR is
-disabled by this commit:
+disabled by this commit::
 
     commit 9654153158d3e0684a1bdb76dbababdb7111d5a0
     Author: Michael Neuling <mikey@neuling.org>
@@ -12,7 +13,7 @@ disabled by this commit:
     powerpc: Disable DAWR in the base POWER9 CPU features
 
 Technical Details:
-============================
+==================
 
 DAWR has 6 different ways of being set.
 1) ptrace
@@ -37,7 +38,7 @@ DAWR on the migration.
 For xmon, the 'bd' command will return an error on P9.
 
 Consequences for users
-============================
+======================
 
 For GDB watchpoints (ie 'watch' command) on POWER9 bare metal , GDB
 will accept the command. Unfortunately since there is no hardware
@@ -57,8 +58,8 @@ trapped in GDB. The watchpoint is remembered, so if the guest is
 migrated back to the POWER8 host, it will start working again.
 
 Force enabling the DAWR
-=============================
-Kernels (since ~v5.2) have an option to force enable the DAWR via:
+=======================
+Kernels (since ~v5.2) have an option to force enable the DAWR via::
 
   echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous
 
@@ -86,5 +87,7 @@ dawr_enable_dangerous file will fail if the hypervisor doesn't support
 writing the DAWR.
 
 To double check the DAWR is working, run this kernel selftest:
+
   tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+
 Any errors/failures/skips mean something is wrong.
diff --git a/Documentation/powerpc/dscr.txt b/Documentation/powerpc/dscr.rst
index ece300c64f76..2ab99006014c 100644
--- a/Documentation/powerpc/dscr.txt
+++ b/Documentation/powerpc/dscr.rst
@@ -1,5 +1,6 @@
-			DSCR (Data Stream Control Register)
-		================================================
+===================================
+DSCR (Data Stream Control Register)
+===================================
 
 DSCR register in powerpc allows user to have some control of prefetch of data
 stream in the processor. Please refer to the ISA documents or related manual
@@ -10,14 +11,17 @@ user interface.
 
 (A) Data Structures:
 
-	(1) thread_struct:
+	(1) thread_struct::
+
 		dscr		/* Thread DSCR value */
 		dscr_inherit	/* Thread has changed default DSCR */
 
-	(2) PACA:
+	(2) PACA::
+
 		dscr_default	/* per-CPU DSCR default value */
 
-	(3) sysfs.c:
+	(3) sysfs.c::
+
 		dscr_default	/* System DSCR default value */
 
 (B) Scheduler Changes:
@@ -35,8 +39,8 @@ user interface.
 
 (C) SYSFS Interface:
 
-	Global DSCR default:		/sys/devices/system/cpu/dscr_default
-	CPU specific DSCR default:	/sys/devices/system/cpu/cpuN/dscr
+	- Global DSCR default:		/sys/devices/system/cpu/dscr_default
+	- CPU specific DSCR default:	/sys/devices/system/cpu/cpuN/dscr
 
 	Changing the global DSCR default in the sysfs will change all the CPU
 	specific DSCR defaults immediately in their PACA structures. Again if
diff --git a/Documentation/powerpc/eeh-pci-error-recovery.txt b/Documentation/powerpc/eeh-pci-error-recovery.rst
index 678189280bb4..438a87ebc095 100644
--- a/Documentation/powerpc/eeh-pci-error-recovery.txt
+++ b/Documentation/powerpc/eeh-pci-error-recovery.rst
@@ -1,10 +1,10 @@
+==========================
+PCI Bus EEH Error Recovery
+==========================
 
+Linas Vepstas <linas@austin.ibm.com>
 
-                      PCI Bus EEH Error Recovery
-                      --------------------------
-                           Linas Vepstas
-                       <linas@austin.ibm.com>
-                          12 January 2005
+12 January 2005
 
 
 Overview:
@@ -143,17 +143,17 @@ seen in /proc/ppc64/eeh (subject to change).  Normally, almost
 all of these occur during boot, when the PCI bus is scanned, where
 a large number of 0xff reads are part of the bus scan procedure.
 
-If a frozen slot is detected, code in 
-arch/powerpc/platforms/pseries/eeh.c will print a stack trace to 
-syslog (/var/log/messages).  This stack trace has proven to be very 
-useful to device-driver authors for finding out at what point the EEH 
-error was detected, as the error itself usually occurs slightly 
+If a frozen slot is detected, code in
+arch/powerpc/platforms/pseries/eeh.c will print a stack trace to
+syslog (/var/log/messages).  This stack trace has proven to be very
+useful to device-driver authors for finding out at what point the EEH
+error was detected, as the error itself usually occurs slightly
 beforehand.
 
 Next, it uses the Linux kernel notifier chain/work queue mechanism to
 allow any interested parties to find out about the failure.  Device
 drivers, or other parts of the kernel, can use
-eeh_register_notifier(struct notifier_block *) to find out about EEH
+`eeh_register_notifier(struct notifier_block *)` to find out about EEH
 events.  The event will include a pointer to the pci device, the
 device node and some state info.  Receivers of the event can "do as
 they wish"; the default handler will be described further in this
@@ -162,10 +162,13 @@ section.
 To assist in the recovery of the device, eeh.c exports the
 following functions:
 
-rtas_set_slot_reset() -- assert the  PCI #RST line for 1/8th of a second
-rtas_configure_bridge() -- ask firmware to configure any PCI bridges
+rtas_set_slot_reset()
+   assert the  PCI #RST line for 1/8th of a second
+rtas_configure_bridge()
+   ask firmware to configure any PCI bridges
    located topologically under the pci slot.
-eeh_save_bars() and eeh_restore_bars(): save and restore the PCI
+eeh_save_bars() and eeh_restore_bars():
+   save and restore the PCI
    config-space info for a device and any devices under it.
 
 
@@ -191,7 +194,7 @@ events get delivered to user-space scripts.
 
 Following is an example sequence of events that cause a device driver
 close function to be called during the first phase of an EEH reset.
-The following sequence is an example of the pcnet32 device driver.
+The following sequence is an example of the pcnet32 device driver::
 
     rpa_php_unconfig_pci_adapter (struct slot *)  // in rpaphp_pci.c
     {
@@ -241,53 +244,54 @@ The following sequence is an example of the pcnet32 device driver.
      }}}}}}
 
 
-    in drivers/pci/pci_driver.c,
-    struct device_driver->remove() is just pci_device_remove()
-    which calls struct pci_driver->remove() which is pcnet32_remove_one()
-    which calls unregister_netdev()  (in net/core/dev.c)
-    which calls dev_close()  (in net/core/dev.c)
-    which calls dev->stop() which is pcnet32_close()
-    which then does the appropriate shutdown.
+in drivers/pci/pci_driver.c,
+struct device_driver->remove() is just pci_device_remove()
+which calls struct pci_driver->remove() which is pcnet32_remove_one()
+which calls unregister_netdev()  (in net/core/dev.c)
+which calls dev_close()  (in net/core/dev.c)
+which calls dev->stop() which is pcnet32_close()
+which then does the appropriate shutdown.
 
 ---
+
 Following is the analogous stack trace for events sent to user-space
-when the pci device is unconfigured.
+when the pci device is unconfigured::
 
-rpa_php_unconfig_pci_adapter() {             // in rpaphp_pci.c
-  calls
-  pci_remove_bus_device (struct pci_dev *) { // in /drivers/pci/remove.c
+  rpa_php_unconfig_pci_adapter() {             // in rpaphp_pci.c
     calls
-    pci_destroy_dev (struct pci_dev *) {
+    pci_remove_bus_device (struct pci_dev *) { // in /drivers/pci/remove.c
       calls
-      device_unregister (&dev->dev) {        // in /drivers/base/core.c
+      pci_destroy_dev (struct pci_dev *) {
         calls
-        device_del(struct device * dev) {    // in /drivers/base/core.c
+        device_unregister (&dev->dev) {        // in /drivers/base/core.c
           calls
-          kobject_del() {                    //in /libs/kobject.c
+          device_del(struct device * dev) {    // in /drivers/base/core.c
             calls
-            kobject_uevent() {               // in /libs/kobject.c
+            kobject_del() {                    //in /libs/kobject.c
               calls
-              kset_uevent() {                // in /lib/kobject.c
+              kobject_uevent() {               // in /libs/kobject.c
                 calls
-                kset->uevent_ops->uevent()   // which is really just
-                a call to
-                dev_uevent() {               // in /drivers/base/core.c
+                kset_uevent() {                // in /lib/kobject.c
                   calls
-                  dev->bus->uevent() which is really just a call to
-                  pci_uevent () {            // in drivers/pci/hotplug.c
-                    which prints device name, etc....
+                  kset->uevent_ops->uevent()   // which is really just
+                  a call to
+                  dev_uevent() {               // in /drivers/base/core.c
+                    calls
+                    dev->bus->uevent() which is really just a call to
+                    pci_uevent () {            // in drivers/pci/hotplug.c
+                      which prints device name, etc....
+                   }
                  }
-               }
-               then kobject_uevent() sends a netlink uevent to userspace
-               --> userspace uevent
-               (during early boot, nobody listens to netlink events and
-               kobject_uevent() executes uevent_helper[], which runs the
-               event process /sbin/hotplug)
+                 then kobject_uevent() sends a netlink uevent to userspace
+                 --> userspace uevent
+                 (during early boot, nobody listens to netlink events and
+                 kobject_uevent() executes uevent_helper[], which runs the
+                 event process /sbin/hotplug)
+             }
            }
-         }
-         kobject_del() then calls sysfs_remove_dir(), which would
-         trigger any user-space daemon that was watching /sysfs,
-         and notice the delete event.
+           kobject_del() then calls sysfs_remove_dir(), which would
+           trigger any user-space daemon that was watching /sysfs,
+           and notice the delete event.
 
 
 Pro's and Con's of the Current Design
@@ -299,12 +303,12 @@ individual device drivers, so that the current design throws a wide net.
 The biggest negative of the design is that it potentially disturbs
 network daemons and file systems that didn't need to be disturbed.
 
--- A minor complaint is that resetting the network card causes
+-  A minor complaint is that resetting the network card causes
    user-space back-to-back ifdown/ifup burps that potentially disturb
    network daemons, that didn't need to even know that the pci
    card was being rebooted.
 
--- A more serious concern is that the same reset, for SCSI devices,
+-  A more serious concern is that the same reset, for SCSI devices,
    causes havoc to mounted file systems.  Scripts cannot post-facto
    unmount a file system without flushing pending buffers, but this
    is impossible, because I/O has already been stopped.  Thus,
@@ -322,7 +326,7 @@ network daemons and file systems that didn't need to be disturbed.
    from the block layer.  It would be very natural to add an EEH
    reset into this chain of events.
 
--- If a SCSI error occurs for the root device, all is lost unless
+-  If a SCSI error occurs for the root device, all is lost unless
    the sysadmin had the foresight to run /bin, /sbin, /etc, /var
    and so on, out of ramdisk/tmpfs.
 
@@ -330,5 +334,3 @@ network daemons and file systems that didn't need to be disturbed.
 Conclusions
 -----------
 There's forward progress ...
-
-
diff --git a/Documentation/powerpc/firmware-assisted-dump.txt b/Documentation/powerpc/firmware-assisted-dump.rst
index 10e7f4d16c14..9ca12830a48e 100644
--- a/Documentation/powerpc/firmware-assisted-dump.txt
+++ b/Documentation/powerpc/firmware-assisted-dump.rst
@@ -1,7 +1,8 @@
+======================
+Firmware-Assisted Dump
+======================
 
-                   Firmware-Assisted Dump
-                   ------------------------
-                       July 2011
+July 2011
 
 The goal of firmware-assisted dump is to enable the dump of
 a crashed system, and to do so from a fully-reset system, and
@@ -27,11 +28,11 @@ in production use.
 Comparing with kdump or other strategies, firmware-assisted
 dump offers several strong, practical advantages:
 
--- Unlike kdump, the system has been reset, and loaded
+-  Unlike kdump, the system has been reset, and loaded
    with a fresh copy of the kernel.  In particular,
    PCI and I/O devices have been reinitialized and are
    in a clean, consistent state.
--- Once the dump is copied out, the memory that held the dump
+-  Once the dump is copied out, the memory that held the dump
    is immediately available to the running kernel. And therefore,
    unlike kdump, fadump doesn't need a 2nd reboot to get back
    the system to the production configuration.
@@ -40,17 +41,18 @@ The above can only be accomplished by coordination with,
 and assistance from the Power firmware. The procedure is
 as follows:
 
--- The first kernel registers the sections of memory with the
+-  The first kernel registers the sections of memory with the
    Power firmware for dump preservation during OS initialization.
    These registered sections of memory are reserved by the first
    kernel during early boot.
 
--- When a system crashes, the Power firmware will save
+-  When a system crashes, the Power firmware will save
    the low memory (boot memory of size larger of 5% of system RAM
    or 256MB) of RAM to the previous registered region. It will
    also save system registers, and hardware PTE's.
 
-   NOTE: The term 'boot memory' means size of the low memory chunk
+   NOTE:
+         The term 'boot memory' means size of the low memory chunk
          that is required for a kernel to boot successfully when
          booted with restricted memory. By default, the boot memory
          size will be the larger of 5% of system RAM or 256MB.
@@ -64,12 +66,12 @@ as follows:
          as fadump uses a predefined offset to reserve memory
          for boot memory dump preservation in case of a crash.
 
--- After the low memory (boot memory) area has been saved, the
+-  After the low memory (boot memory) area has been saved, the
    firmware will reset PCI and other hardware state.  It will
    *not* clear the RAM. It will then launch the bootloader, as
    normal.
 
--- The freshly booted kernel will notice that there is a new
+-  The freshly booted kernel will notice that there is a new
    node (ibm,dump-kernel) in the device tree, indicating that
    there is crash data available from a previous boot. During
    the early boot OS will reserve rest of the memory above
@@ -77,17 +79,18 @@ as follows:
    size. This will make sure that the second kernel will not
    touch any of the dump memory area.
 
--- User-space tools will read /proc/vmcore to obtain the contents
+-  User-space tools will read /proc/vmcore to obtain the contents
    of memory, which holds the previous crashed kernel dump in ELF
    format. The userspace tools may copy this info to disk, or
    network, nas, san, iscsi, etc. as desired.
 
--- Once the userspace tool is done saving dump, it will echo
+-  Once the userspace tool is done saving dump, it will echo
    '1' to /sys/kernel/fadump_release_mem to release the reserved
    memory back to general use, except the memory required for
    next firmware-assisted dump registration.
 
-   e.g.
+   e.g.::
+
      # echo 1 > /sys/kernel/fadump_release_mem
 
 Please note that the firmware-assisted dump feature
@@ -95,7 +98,7 @@ is only available on Power6 and above systems with recent
 firmware versions.
 
 Implementation details:
-----------------------
+-----------------------
 
 During boot, a check is made to see if firmware supports
 this feature on that particular machine. If it does, then
@@ -121,7 +124,7 @@ Allocator (CMA) for memory reservation if CMA is configured for kernel.
 With CMA reservation this memory will be available for applications to
 use it, while kernel is prevented from using it. With this fadump will
 still be able to capture all of the kernel memory and most of the user
-space memory except the user pages that were present in CMA region.
+space memory except the user pages that were present in CMA region::
 
   o Memory Reservation during first kernel
 
@@ -166,7 +169,7 @@ The tools to examine the dump will be same as the ones
 used for kdump.
 
 How to enable firmware-assisted dump (fadump):
--------------------------------------
+----------------------------------------------
 
 1. Set config option CONFIG_FA_DUMP=y and build kernel.
 2. Boot into linux kernel with 'fadump=on' kernel cmdline option.
@@ -177,19 +180,20 @@ How to enable firmware-assisted dump (fadump):
    to specify size of the memory to reserve for boot memory dump
    preservation.
 
-NOTE: 1. 'fadump_reserve_mem=' parameter has been deprecated. Instead
-         use 'crashkernel=' to specify size of the memory to reserve
-         for boot memory dump preservation.
-      2. If firmware-assisted dump fails to reserve memory then it
-         will fallback to existing kdump mechanism if 'crashkernel='
-         option is set at kernel cmdline.
-      3. if user wants to capture all of user space memory and ok with
-         reserved memory not available to production system, then
-         'fadump=nocma' kernel parameter can be used to fallback to
-         old behaviour.
+NOTE:
+     1. 'fadump_reserve_mem=' parameter has been deprecated. Instead
+        use 'crashkernel=' to specify size of the memory to reserve
+        for boot memory dump preservation.
+     2. If firmware-assisted dump fails to reserve memory then it
+        will fallback to existing kdump mechanism if 'crashkernel='
+        option is set at kernel cmdline.
+     3. if user wants to capture all of user space memory and ok with
+        reserved memory not available to production system, then
+        'fadump=nocma' kernel parameter can be used to fallback to
+        old behaviour.
 
 Sysfs/debugfs files:
-------------
+--------------------
 
 Firmware-assisted dump feature uses sysfs file system to hold
 the control files and debugfs file to display memory reserved region.
@@ -197,20 +201,20 @@ the control files and debugfs file to display memory reserved region.
 Here is the list of files under kernel sysfs:
 
  /sys/kernel/fadump_enabled
-
     This is used to display the fadump status.
-    0 = fadump is disabled
-    1 = fadump is enabled
+
+    - 0 = fadump is disabled
+    - 1 = fadump is enabled
 
     This interface can be used by kdump init scripts to identify if
     fadump is enabled in the kernel and act accordingly.
 
  /sys/kernel/fadump_registered
-
     This is used to display the fadump registration status as well
     as to control (start/stop) the fadump registration.
-    0 = fadump is not registered.
-    1 = fadump is registered and ready to handle system crash.
+
+    - 0 = fadump is not registered.
+    - 1 = fadump is registered and ready to handle system crash.
 
     To register fadump echo 1 > /sys/kernel/fadump_registered and
     echo 0 > /sys/kernel/fadump_registered for un-register and stop the
@@ -219,13 +223,12 @@ Here is the list of files under kernel sysfs:
     easily integrated with kdump service start/stop.
 
  /sys/kernel/fadump_release_mem
-
     This file is available only when fadump is active during
     second kernel. This is used to release the reserved memory
     region that are held for saving crash dump. To release the
-    reserved memory echo 1 to it:
+    reserved memory echo 1 to it::
 
-    echo 1  > /sys/kernel/fadump_release_mem
+	echo 1  > /sys/kernel/fadump_release_mem
 
     After echo 1, the content of the /sys/kernel/debug/powerpc/fadump_region
     file will change to reflect the new memory reservations.
@@ -238,38 +241,39 @@ Here is the list of files under powerpc debugfs:
 (Assuming debugfs is mounted on /sys/kernel/debug directory.)
 
  /sys/kernel/debug/powerpc/fadump_region
-
     This file shows the reserved memory regions if fadump is
     enabled otherwise this file is empty. The output format
-    is:
-    <region>: [<start>-<end>] <reserved-size> bytes, Dumped: <dump-size>
+    is::
+
+      <region>: [<start>-<end>] <reserved-size> bytes, Dumped: <dump-size>
 
     e.g.
-    Contents when fadump is registered during first kernel
+    Contents when fadump is registered during first kernel::
 
-    # cat /sys/kernel/debug/powerpc/fadump_region
-    CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x0
-    HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x0
-    DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x0
+      # cat /sys/kernel/debug/powerpc/fadump_region
+      CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x0
+      HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x0
+      DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x0
 
-    Contents when fadump is active during second kernel
+    Contents when fadump is active during second kernel::
 
-    # cat /sys/kernel/debug/powerpc/fadump_region
-    CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x40020
-    HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x1000
-    DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x10000000
-        : [0x00000010000000-0x0000006ffaffff] 0x5ffb0000 bytes, Dumped: 0x5ffb0000
+      # cat /sys/kernel/debug/powerpc/fadump_region
+      CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x40020
+      HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x1000
+      DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x10000000
+          : [0x00000010000000-0x0000006ffaffff] 0x5ffb0000 bytes, Dumped: 0x5ffb0000
 
-NOTE: Please refer to Documentation/filesystems/debugfs.txt on
+NOTE:
+      Please refer to Documentation/filesystems/debugfs.txt on
       how to mount the debugfs filesystem.
 
 
 TODO:
 -----
- o Need to come up with the better approach to find out more
+ - Need to come up with the better approach to find out more
    accurate boot memory size that is required for a kernel to
    boot successfully when booted with restricted memory.
- o The fadump implementation introduces a fadump crash info structure
+ - The fadump implementation introduces a fadump crash info structure
    in the scratch area before the ELF core header. The idea of introducing
    this structure is to pass some important crash info data to the second
    kernel which will help second kernel to populate ELF core header with
@@ -277,7 +281,9 @@ TODO:
    design implementation does not address a possibility of introducing
    additional fields (in future) to this structure without affecting
    compatibility. Need to come up with the better approach to address this.
+
    The possible approaches are:
+
 	1. Introduce version field for version tracking, bump up the version
 	whenever a new field is added to the structure in future. The version
 	field can be used to find out what fields are valid for the current
@@ -285,8 +291,11 @@ TODO:
 	2. Reserve the area of predefined size (say PAGE_SIZE) for this
 	structure and have unused area as reserved (initialized to zero)
 	for future field additions.
+
    The advantage of approach 1 over 2 is we don't need to reserve extra space.
----
+
 Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+
 This document is based on the original documentation written for phyp
+
 assisted dump by Linas Vepstas and Manish Ahuja.
diff --git a/Documentation/powerpc/hvcs.txt b/Documentation/powerpc/hvcs.rst
index a730ca5a07f8..6808acde672f 100644
--- a/Documentation/powerpc/hvcs.txt
+++ b/Documentation/powerpc/hvcs.rst
@@ -1,19 +1,22 @@
-===========================================================================
-				   HVCS
-	IBM "Hypervisor Virtual Console Server" Installation Guide
-			  for Linux Kernel 2.6.4+
-		    Copyright (C) 2004 IBM Corporation
+===============================================================
+HVCS IBM "Hypervisor Virtual Console Server" Installation Guide
+===============================================================
 
-===========================================================================
-NOTE:Eight space tabs are the optimum editor setting for reading this file.
-===========================================================================
+for Linux Kernel 2.6.4+
 
-	       Author(s) :  Ryan S. Arnold <rsa@us.ibm.com>
-		       Date Created: March, 02, 2004
-		       Last Changed: August, 24, 2004
+Copyright (C) 2004 IBM Corporation
 
----------------------------------------------------------------------------
-Table of contents:
+.. ===========================================================================
+.. NOTE:Eight space tabs are the optimum editor setting for reading this file.
+.. ===========================================================================
+
+
+Author(s): Ryan S. Arnold <rsa@us.ibm.com>
+
+Date Created: March, 02, 2004
+Last Changed: August, 24, 2004
+
+.. Table of contents:
 
 	1.  Driver Introduction:
 	2.  System Requirements
@@ -27,8 +30,8 @@ Table of contents:
 	8.  Questions & Answers:
 	9.  Reporting Bugs:
 
----------------------------------------------------------------------------
 1. Driver Introduction:
+=======================
 
 This is the device driver for the IBM Hypervisor Virtual Console Server,
 "hvcs".  The IBM hvcs provides a tty driver interface to allow Linux user
@@ -38,8 +41,8 @@ ppc64 system.  Physical hardware consoles per partition are not practical
 on this hardware so system consoles are accessed by this driver using
 firmware interfaces to virtual terminal devices.
 
----------------------------------------------------------------------------
 2. System Requirements:
+=======================
 
 This device driver was written using 2.6.4 Linux kernel APIs and will only
 build and run on kernels of this version or later.
@@ -52,8 +55,8 @@ Sysfs must be mounted on the system so that the user can determine which
 major and minor numbers are associated with each vty-server.  Directions
 for sysfs mounting are outside the scope of this document.
 
----------------------------------------------------------------------------
 3. Build Options:
+=================
 
 The hvcs driver registers itself as a tty driver.  The tty layer
 dynamically allocates a block of major and minor numbers in a quantity
@@ -65,11 +68,11 @@ If the default number of device entries is adequate then this driver can be
 built into the kernel.  If not, the default can be over-ridden by inserting
 the driver as a module with insmod parameters.
 
----------------------------------------------------------------------------
 3.1 Built-in:
+-------------
 
 The following menuconfig example demonstrates selecting to build this
-driver into the kernel.
+driver into the kernel::
 
 	Device Drivers  --->
 		Character devices  --->
@@ -77,11 +80,11 @@ driver into the kernel.
 
 Begin the kernel make process.
 
----------------------------------------------------------------------------
 3.2 Module:
+-----------
 
 The following menuconfig example demonstrates selecting to build this
-driver as a kernel module.
+driver as a kernel module::
 
 	Device Drivers  --->
 		Character devices  --->
@@ -89,11 +92,11 @@ driver as a kernel module.
 
 The make process will build the following kernel modules:
 
-	hvcs.ko
-	hvcserver.ko
+	- hvcs.ko
+	- hvcserver.ko
 
 To insert the module with the default allocation execute the following
-commands in the order they appear:
+commands in the order they appear::
 
 	insmod hvcserver.ko
 	insmod hvcs.ko
@@ -103,7 +106,7 @@ be inserted first, otherwise the hvcs module will not find some of the
 symbols it expects.
 
 To override the default use an insmod parameter as follows (requesting 4
-tty devices as an example):
+tty devices as an example)::
 
 	insmod hvcs.ko hvcs_parm_num_devs=4
 
@@ -115,31 +118,31 @@ source file before building.
 NOTE: The length of time it takes to insmod the driver seems to be related
 to the number of tty interfaces the registering driver requests.
 
-In order to remove the driver module execute the following command:
+In order to remove the driver module execute the following command::
 
 	rmmod hvcs.ko
 
 The recommended method for installing hvcs as a module is to use depmod to
 build a current modules.dep file in /lib/modules/`uname -r` and then
-execute:
+execute::
 
-modprobe hvcs hvcs_parm_num_devs=4
+	modprobe hvcs hvcs_parm_num_devs=4
 
 The modules.dep file indicates that hvcserver.ko needs to be inserted
 before hvcs.ko and modprobe uses this file to smartly insert the modules in
 the proper order.
 
 The following modprobe command is used to remove hvcs and hvcserver in the
-proper order:
+proper order::
 
-modprobe -r hvcs
+	modprobe -r hvcs
 
----------------------------------------------------------------------------
 4. Installation:
+================
 
 The tty layer creates sysfs entries which contain the major and minor
 numbers allocated for the hvcs driver.  The following snippet of "tree"
-output of the sysfs directory shows where these numbers are presented:
+output of the sysfs directory shows where these numbers are presented::
 
 	sys/
 	|-- *other sysfs base dirs*
@@ -164,7 +167,7 @@ output of the sysfs directory shows where these numbers are presented:
 	|-- *other sysfs base dirs*
 
 For the above examples the following output is a result of cat'ing the
-"dev" entry in the hvcs directory:
+"dev" entry in the hvcs directory::
 
 	Pow5:/sys/class/tty/hvcs0/ # cat dev
 	254:0
@@ -184,7 +187,7 @@ systems running hvcs will already have the device entries created or udev
 will do it automatically.
 
 Given the example output above, to manually create a /dev/hvcs* node entry
-mknod can be used as follows:
+mknod can be used as follows::
 
 	mknod /dev/hvcs0 c 254 0
 	mknod /dev/hvcs1 c 254 1
@@ -195,15 +198,15 @@ Using mknod to manually create the device entries makes these device nodes
 persistent.  Once created they will exist prior to the driver insmod.
 
 Attempting to connect an application to /dev/hvcs* prior to insertion of
-the hvcs module will result in an error message similar to the following:
+the hvcs module will result in an error message similar to the following::
 
 	"/dev/hvcs*: No such device".
 
 NOTE: Just because there is a device node present doesn't mean that there
 is a vty-server device configured for that node.
 
----------------------------------------------------------------------------
 5. Connection
+=============
 
 Since this driver controls devices that provide a tty interface a user can
 interact with the device node entries using any standard tty-interactive
@@ -249,7 +252,7 @@ vty-server adapter is associated with which /dev/hvcs* node a special sysfs
 attribute has been added to each vty-server sysfs entry.  This entry is
 called "index" and showing it reveals an integer that refers to the
 /dev/hvcs* entry to use to connect to that device.  For instance cating the
-index attribute of vty-server adapter 30000004 shows the following.
+index attribute of vty-server adapter 30000004 shows the following::
 
 	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat index
 	2
@@ -262,8 +265,8 @@ system the /dev/hvcs* entry that interacts with a particular vty-server
 adapter is not guaranteed to remain the same across system reboots.  Look
 in the Q & A section for more on this issue.
 
----------------------------------------------------------------------------
 6. Disconnection
+================
 
 As a security feature to prevent the delivery of stale data to an
 unintended target the Power5 system firmware disables the fetching of data
@@ -305,7 +308,7 @@ connection between the vty-server and target vty ONLY if the vterm_state
 previously read '1'.  The write directive is ignored if the vterm_state
 read '0' or if any value other than '0' was written to the vterm_state
 attribute.  The following example will show the method used for verifying
-the vty-server connection status and disconnecting a vty-server connection.
+the vty-server connection status and disconnecting a vty-server connection::
 
 	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat vterm_state
 	1
@@ -318,12 +321,12 @@ the vty-server connection status and disconnecting a vty-server connection.
 All vty-server connections are automatically terminated when the device is
 hotplug removed and when the module is removed.
 
----------------------------------------------------------------------------
 7. Configuration
+================
 
 Each vty-server has a sysfs entry in the /sys/devices/vio directory, which
 is symlinked in several other sysfs tree directories, notably under the
-hvcs driver entry, which looks like the following example:
+hvcs driver entry, which looks like the following example::
 
 	Pow5:/sys/bus/vio/drivers/hvcs # ls
 	.  ..  30000003  30000004  rescan
@@ -344,7 +347,7 @@ completed or was never executed.
 
 Vty-server entries in this directory are a 32 bit partition unique unit
 address that is created by firmware.  An example vty-server sysfs entry
-looks like the following:
+looks like the following::
 
 	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # ls
 	.   current_vty   devspec       name          partner_vtys
@@ -352,21 +355,21 @@ looks like the following:
 
 Each entry is provided, by default with a "name" attribute.  Reading the
 "name" attribute will reveal the device type as shown in the following
-example:
+example::
 
 	Pow5:/sys/bus/vio/drivers/hvcs/30000003 # cat name
 	vty-server
 
 Each entry is also provided, by default, with a "devspec" attribute which
 reveals the full device specification when read, as shown in the following
-example:
+example::
 
 	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat devspec
 	/vdevice/vty-server@30000004
 
 Each vty-server sysfs dir is provided with two read-only attributes that
 provide lists of easily parsed partner vty data: "partner_vtys" and
-"partner_clcs".
+"partner_clcs"::
 
 	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat partner_vtys
 	30000000
@@ -396,7 +399,7 @@ A vty-server can only be connected to a single vty at a time.  The entry,
 read.
 
 The current_vty can be changed by writing a valid partner clc to the entry
-as in the following example:
+as in the following example::
 
 	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # echo U5112.428.10304
 	8A-V4-C0 > current_vty
@@ -408,9 +411,9 @@ currently open connection is freed.
 Information on the "vterm_state" attribute was covered earlier on the
 chapter entitled "disconnection".
 
----------------------------------------------------------------------------
 8. Questions & Answers:
-===========================================================================
+=======================
+
 Q: What are the security concerns involving hvcs?
 
 A: There are three main security concerns:
@@ -429,6 +432,7 @@ A: There are three main security concerns:
 	partition) will experience the previously logged in session.
 
 ---------------------------------------------------------------------------
+
 Q: How do I multiplex a console that I grab through hvcs so that other
 people can see it:
 
@@ -440,6 +444,7 @@ term type "screen" to others.  This means that curses based programs may
 not display properly in screen sessions.
 
 ---------------------------------------------------------------------------
+
 Q: Why are the colors all messed up?
 Q: Why are the control characters acting strange or not working?
 Q: Why is the console output all strange and unintelligible?
@@ -455,6 +460,7 @@ disconnect from the console.  This will ensure that the next user gets
 their own TERM type set when they login.
 
 ---------------------------------------------------------------------------
+
 Q: When I try to CONNECT kermit to an hvcs device I get:
 "Sorry, can't open connection: /dev/hvcs*"What is happening?
 
@@ -490,6 +496,7 @@ A: There is not a corresponding vty-server device that maps to an existing
 /dev/hvcs* entry.
 
 ---------------------------------------------------------------------------
+
 Q: When I try to CONNECT kermit to an hvcs device I get:
 "Sorry, write access to UUCP lockfile directory denied."
 
@@ -497,6 +504,7 @@ A: The /dev/hvcs* entry you have specified doesn't exist where you said it
 does?  Maybe you haven't inserted the module (on systems with udev).
 
 ---------------------------------------------------------------------------
+
 Q: If I already have one Linux partition installed can I use hvcs on said
 partition to provide the console for the install of a second Linux
 partition?
@@ -505,6 +513,7 @@ A: Yes granted that your are connected to the /dev/hvcs* device using
 kermit or cu or some other program that doesn't provide terminal emulation.
 
 ---------------------------------------------------------------------------
+
 Q: Can I connect to more than one partition's console at a time using this
 driver?
 
@@ -512,6 +521,7 @@ A: Yes.  Of course this means that there must be more than one vty-server
 configured for this partition and each must point to a disconnected vty.
 
 ---------------------------------------------------------------------------
+
 Q: Does the hvcs driver support dynamic (hotplug) addition of devices?
 
 A: Yes, if you have dlpar and hotplug enabled for your system and it has
@@ -519,6 +529,7 @@ been built into the kernel the hvcs drivers is configured to dynamically
 handle additions of new devices and removals of unused devices.
 
 ---------------------------------------------------------------------------
+
 Q: For some reason /dev/hvcs* doesn't map to the same vty-server adapter
 after a reboot.  What happened?
 
@@ -533,6 +544,7 @@ on how to determine which vty-server goes with which /dev/hvcs* node.
 Hint; look at the sysfs "index" attribute for the vty-server.
 
 ---------------------------------------------------------------------------
+
 Q: Can I use /dev/hvcs* as a conduit to another partition and use a tty
 device on that partition as the other end of the pipe?
 
@@ -554,7 +566,9 @@ read or write to /dev/hvcs*.  Now you have a tty conduit between two
 partitions.
 
 ---------------------------------------------------------------------------
+
 9. Reporting Bugs:
+==================
 
 The proper channel for reporting bugs is either through the Linux OS
 distribution company that provided your OS or by posting issues to the
diff --git a/Documentation/powerpc/index.rst b/Documentation/powerpc/index.rst
new file mode 100644
index 000000000000..549b1cdd77ae
--- /dev/null
+++ b/Documentation/powerpc/index.rst
@@ -0,0 +1,34 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======
+powerpc
+=======
+
+.. toctree::
+    :maxdepth: 1
+
+    bootwrapper
+    cpu_families
+    cpu_features
+    cxl
+    cxlflash
+    dawr-power9
+    dscr
+    eeh-pci-error-recovery
+    firmware-assisted-dump
+    hvcs
+    isa-versions
+    mpc52xx
+    pci_iov_resource_on_powernv
+    pmu-ebb
+    ptrace
+    qe_firmware
+    syscall64-abi
+    transactional_memory
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/powerpc/isa-versions.rst b/Documentation/powerpc/isa-versions.rst
index 66c24140ebf1..a363d8c1603c 100644
--- a/Documentation/powerpc/isa-versions.rst
+++ b/Documentation/powerpc/isa-versions.rst
@@ -1,13 +1,12 @@
-:orphan:
-
+==========================
 CPU to ISA Version Mapping
 ==========================
 
 Mapping of some CPU versions to relevant ISA versions.
 
-========= ====================
+========= ====================================================================
 CPU       Architecture version
-========= ====================
+========= ====================================================================
 Power9    Power ISA v3.0B
 Power8    Power ISA v2.07
 Power7    Power ISA v2.06
@@ -24,7 +23,7 @@ PPC970    - PowerPC User Instruction Set Architecture Book I v2.01
           - PowerPC Virtual Environment Architecture Book II v2.01
           - PowerPC Operating Environment Architecture Book III v2.01
           - Plus Altivec/VMX ~= 2.03
-========= ====================
+========= ====================================================================
 
 
 Key Features
@@ -60,9 +59,9 @@ Power5     No
 PPC970     No
 ========== ====
 
-========== ====================
+========== ====================================
 CPU        Transactional Memory
-========== ====================
+========== ====================================
 Power9     Yes (* see transactional_memory.txt)
 Power8     Yes
 Power7     No
@@ -73,4 +72,4 @@ Power5++   No
 Power5+    No
 Power5     No
 PPC970     No
-========== ====================
+========== ====================================
diff --git a/Documentation/powerpc/mpc52xx.txt b/Documentation/powerpc/mpc52xx.rst
index 0d540a31ea1a..8676ac63e077 100644
--- a/Documentation/powerpc/mpc52xx.txt
+++ b/Documentation/powerpc/mpc52xx.rst
@@ -1,11 +1,13 @@
+=============================
 Linux 2.6.x on MPC52xx family
------------------------------
+=============================
 
 For the latest info, go to http://www.246tNt.com/mpc52xx/
 
 To compile/use :
 
-  - U-Boot:
+  - U-Boot::
+
      # <edit Makefile to set ARCH=ppc & CROSS_COMPILE=... ( also EXTRAVERSION
         if you wish to ).
      # make lite5200_defconfig
@@ -16,7 +18,8 @@ To compile/use :
      => tftpboot 400000 pRamdisk
      => bootm 200000 400000
 
-  - DBug:
+  - DBug::
+
      # <edit Makefile to set ARCH=ppc & CROSS_COMPILE=... ( also EXTRAVERSION
         if you wish to ).
      # make lite5200_defconfig
@@ -28,7 +31,8 @@ To compile/use :
      DBug> dn -i zImage.initrd.lite5200
 
 
-Some remarks :
+Some remarks:
+
  - The port is named mpc52xxx, and config options are PPC_MPC52xx. The MGT5100
    is not supported, and I'm not sure anyone is interesting in working on it
    so. I didn't took 5xxx because there's apparently a lot of 5xxx that have
diff --git a/Documentation/powerpc/pci_iov_resource_on_powernv.txt b/Documentation/powerpc/pci_iov_resource_on_powernv.rst
index b55c5cd83f8d..f5a5793e1613 100644
--- a/Documentation/powerpc/pci_iov_resource_on_powernv.txt
+++ b/Documentation/powerpc/pci_iov_resource_on_powernv.rst
@@ -1,6 +1,13 @@
+===================================================
+PCI Express I/O Virtualization Resource on Powerenv
+===================================================
+
 Wei Yang <weiyang@linux.vnet.ibm.com>
+
 Benjamin Herrenschmidt <benh@au1.ibm.com>
+
 Bjorn Helgaas <bhelgaas@google.com>
+
 26 Aug 2014
 
 This document describes the requirement from hardware for PCI MMIO resource
@@ -10,6 +17,7 @@ Endpoints and the implementation on P8 (IODA2). The next two sections talks
 about considerations on enabling SRIOV on IODA2.
 
 1. Introduction to Partitionable Endpoints
+==========================================
 
 A Partitionable Endpoint (PE) is a way to group the various resources
 associated with a device or a set of devices to provide isolation between
@@ -35,6 +43,7 @@ is a completely separate HW entity that replicates the entire logic, so has
 its own set of PEs, etc.
 
 2. Implementation of Partitionable Endpoints on P8 (IODA2)
+==========================================================
 
 P8 supports up to 256 Partitionable Endpoints per PHB.
 
@@ -149,6 +158,7 @@ P8 supports up to 256 Partitionable Endpoints per PHB.
     sense, but we haven't done it yet.
 
 3. Considerations for SR-IOV on PowerKVM
+========================================
 
   * SR-IOV Background
 
@@ -224,7 +234,7 @@ P8 supports up to 256 Partitionable Endpoints per PHB.
   IODA supports 256 PEs, so segmented windows contain 256 segments, so if
   total_VFs is less than 256, we have the situation in Figure 1.0, where
   segments [total_VFs, 255] of the M64 window may map to some MMIO range on
-  other devices:
+  other devices::
 
      0      1                     total_VFs - 1
      +------+------+-     -+------+------+
@@ -243,7 +253,7 @@ P8 supports up to 256 Partitionable Endpoints per PHB.
 		Figure 1.0 Direct map VF(n) BAR space
 
   Our current solution is to allocate 256 segments even if the VF(n) BAR
-  space doesn't need that much, as shown in Figure 1.1:
+  space doesn't need that much, as shown in Figure 1.1::
 
      0      1                     total_VFs - 1                255
      +------+------+-     -+------+------+-      -+------+------+
@@ -269,6 +279,7 @@ P8 supports up to 256 Partitionable Endpoints per PHB.
   responds to segments [total_VFs, 255].
 
 4. Implications for the Generic PCI Code
+========================================
 
 The PCIe SR-IOV spec requires that the base of the VF(n) BAR space be
 aligned to the size of an individual VF BAR.
diff --git a/Documentation/powerpc/pmu-ebb.txt b/Documentation/powerpc/pmu-ebb.rst
index 73cd163dbfb8..4f474758eb55 100644
--- a/Documentation/powerpc/pmu-ebb.txt
+++ b/Documentation/powerpc/pmu-ebb.rst
@@ -1,3 +1,4 @@
+========================
 PMU Event Based Branches
 ========================
 
diff --git a/Documentation/powerpc/ptrace.rst b/Documentation/powerpc/ptrace.rst
new file mode 100644
index 000000000000..864d4b6dddd1
--- /dev/null
+++ b/Documentation/powerpc/ptrace.rst
@@ -0,0 +1,156 @@
+======
+Ptrace
+======
+
+GDB intends to support the following hardware debug features of BookE
+processors:
+
+4 hardware breakpoints (IAC)
+2 hardware watchpoints (read, write and read-write) (DAC)
+2 value conditions for the hardware watchpoints (DVC)
+
+For that, we need to extend ptrace so that GDB can query and set these
+resources. Since we're extending, we're trying to create an interface
+that's extendable and that covers both BookE and server processors, so
+that GDB doesn't need to special-case each of them. We added the
+following 3 new ptrace requests.
+
+1. PTRACE_PPC_GETHWDEBUGINFO
+============================
+
+Query for GDB to discover the hardware debug features. The main info to
+be returned here is the minimum alignment for the hardware watchpoints.
+BookE processors don't have restrictions here, but server processors have
+an 8-byte alignment restriction for hardware watchpoints. We'd like to avoid
+adding special cases to GDB based on what it sees in AUXV.
+
+Since we're at it, we added other useful info that the kernel can return to
+GDB: this query will return the number of hardware breakpoints, hardware
+watchpoints and whether it supports a range of addresses and a condition.
+The query will fill the following structure provided by the requesting process::
+
+  struct ppc_debug_info {
+       unit32_t version;
+       unit32_t num_instruction_bps;
+       unit32_t num_data_bps;
+       unit32_t num_condition_regs;
+       unit32_t data_bp_alignment;
+       unit32_t sizeof_condition; /* size of the DVC register */
+       uint64_t features; /* bitmask of the individual flags */
+  };
+
+features will have bits indicating whether there is support for::
+
+  #define PPC_DEBUG_FEATURE_INSN_BP_RANGE		0x1
+  #define PPC_DEBUG_FEATURE_INSN_BP_MASK		0x2
+  #define PPC_DEBUG_FEATURE_DATA_BP_RANGE		0x4
+  #define PPC_DEBUG_FEATURE_DATA_BP_MASK		0x8
+  #define PPC_DEBUG_FEATURE_DATA_BP_DAWR		0x10
+
+2. PTRACE_SETHWDEBUG
+
+Sets a hardware breakpoint or watchpoint, according to the provided structure::
+
+  struct ppc_hw_breakpoint {
+        uint32_t version;
+  #define PPC_BREAKPOINT_TRIGGER_EXECUTE  0x1
+  #define PPC_BREAKPOINT_TRIGGER_READ     0x2
+ #define PPC_BREAKPOINT_TRIGGER_WRITE    0x4
+        uint32_t trigger_type;       /* only some combinations allowed */
+  #define PPC_BREAKPOINT_MODE_EXACT               0x0
+  #define PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE     0x1
+  #define PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE     0x2
+  #define PPC_BREAKPOINT_MODE_MASK                0x3
+        uint32_t addr_mode;          /* address match mode */
+
+  #define PPC_BREAKPOINT_CONDITION_MODE   0x3
+  #define PPC_BREAKPOINT_CONDITION_NONE   0x0
+  #define PPC_BREAKPOINT_CONDITION_AND    0x1
+  #define PPC_BREAKPOINT_CONDITION_EXACT  0x1	/* different name for the same thing as above */
+  #define PPC_BREAKPOINT_CONDITION_OR     0x2
+  #define PPC_BREAKPOINT_CONDITION_AND_OR 0x3
+  #define PPC_BREAKPOINT_CONDITION_BE_ALL 0x00ff0000	/* byte enable bits */
+  #define PPC_BREAKPOINT_CONDITION_BE(n)  (1<<((n)+16))
+        uint32_t condition_mode;     /* break/watchpoint condition flags */
+
+        uint64_t addr;
+        uint64_t addr2;
+        uint64_t condition_value;
+  };
+
+A request specifies one event, not necessarily just one register to be set.
+For instance, if the request is for a watchpoint with a condition, both the
+DAC and DVC registers will be set in the same request.
+
+With this GDB can ask for all kinds of hardware breakpoints and watchpoints
+that the BookE supports. COMEFROM breakpoints available in server processors
+are not contemplated, but that is out of the scope of this work.
+
+ptrace will return an integer (handle) uniquely identifying the breakpoint or
+watchpoint just created. This integer will be used in the PTRACE_DELHWDEBUG
+request to ask for its removal. Return -ENOSPC if the requested breakpoint
+can't be allocated on the registers.
+
+Some examples of using the structure to:
+
+- set a breakpoint in the first breakpoint register::
+
+    p.version         = PPC_DEBUG_CURRENT_VERSION;
+    p.trigger_type    = PPC_BREAKPOINT_TRIGGER_EXECUTE;
+    p.addr_mode       = PPC_BREAKPOINT_MODE_EXACT;
+    p.condition_mode  = PPC_BREAKPOINT_CONDITION_NONE;
+    p.addr            = (uint64_t) address;
+    p.addr2           = 0;
+    p.condition_value = 0;
+
+- set a watchpoint which triggers on reads in the second watchpoint register::
+
+    p.version         = PPC_DEBUG_CURRENT_VERSION;
+    p.trigger_type    = PPC_BREAKPOINT_TRIGGER_READ;
+    p.addr_mode       = PPC_BREAKPOINT_MODE_EXACT;
+    p.condition_mode  = PPC_BREAKPOINT_CONDITION_NONE;
+    p.addr            = (uint64_t) address;
+    p.addr2           = 0;
+    p.condition_value = 0;
+
+- set a watchpoint which triggers only with a specific value::
+
+    p.version         = PPC_DEBUG_CURRENT_VERSION;
+    p.trigger_type    = PPC_BREAKPOINT_TRIGGER_READ;
+    p.addr_mode       = PPC_BREAKPOINT_MODE_EXACT;
+    p.condition_mode  = PPC_BREAKPOINT_CONDITION_AND | PPC_BREAKPOINT_CONDITION_BE_ALL;
+    p.addr            = (uint64_t) address;
+    p.addr2           = 0;
+    p.condition_value = (uint64_t) condition;
+
+- set a ranged hardware breakpoint::
+
+    p.version         = PPC_DEBUG_CURRENT_VERSION;
+    p.trigger_type    = PPC_BREAKPOINT_TRIGGER_EXECUTE;
+    p.addr_mode       = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+    p.condition_mode  = PPC_BREAKPOINT_CONDITION_NONE;
+    p.addr            = (uint64_t) begin_range;
+    p.addr2           = (uint64_t) end_range;
+    p.condition_value = 0;
+
+- set a watchpoint in server processors (BookS)::
+
+    p.version         = 1;
+    p.trigger_type    = PPC_BREAKPOINT_TRIGGER_RW;
+    p.addr_mode       = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+    or
+    p.addr_mode       = PPC_BREAKPOINT_MODE_EXACT;
+
+    p.condition_mode  = PPC_BREAKPOINT_CONDITION_NONE;
+    p.addr            = (uint64_t) begin_range;
+    /* For PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE addr2 needs to be specified, where
+     * addr2 - addr <= 8 Bytes.
+     */
+    p.addr2           = (uint64_t) end_range;
+    p.condition_value = 0;
+
+3. PTRACE_DELHWDEBUG
+
+Takes an integer which identifies an existing breakpoint or watchpoint
+(i.e., the value returned from PTRACE_SETHWDEBUG), and deletes the
+corresponding breakpoint or watchpoint..
diff --git a/Documentation/powerpc/ptrace.txt b/Documentation/powerpc/ptrace.txt
deleted file mode 100644
index 99c5ce88d0fe..000000000000
--- a/Documentation/powerpc/ptrace.txt
+++ /dev/null
@@ -1,151 +0,0 @@
-GDB intends to support the following hardware debug features of BookE
-processors:
-
-4 hardware breakpoints (IAC)
-2 hardware watchpoints (read, write and read-write) (DAC)
-2 value conditions for the hardware watchpoints (DVC)
-
-For that, we need to extend ptrace so that GDB can query and set these
-resources. Since we're extending, we're trying to create an interface
-that's extendable and that covers both BookE and server processors, so
-that GDB doesn't need to special-case each of them. We added the
-following 3 new ptrace requests.
-
-1. PTRACE_PPC_GETHWDEBUGINFO
-
-Query for GDB to discover the hardware debug features. The main info to
-be returned here is the minimum alignment for the hardware watchpoints.
-BookE processors don't have restrictions here, but server processors have
-an 8-byte alignment restriction for hardware watchpoints. We'd like to avoid
-adding special cases to GDB based on what it sees in AUXV.
-
-Since we're at it, we added other useful info that the kernel can return to
-GDB: this query will return the number of hardware breakpoints, hardware
-watchpoints and whether it supports a range of addresses and a condition.
-The query will fill the following structure provided by the requesting process:
-
-struct ppc_debug_info {
-       unit32_t version;
-       unit32_t num_instruction_bps;
-       unit32_t num_data_bps;
-       unit32_t num_condition_regs;
-       unit32_t data_bp_alignment;
-       unit32_t sizeof_condition; /* size of the DVC register */
-       uint64_t features; /* bitmask of the individual flags */
-};
-
-features will have bits indicating whether there is support for:
-
-#define PPC_DEBUG_FEATURE_INSN_BP_RANGE		0x1
-#define PPC_DEBUG_FEATURE_INSN_BP_MASK		0x2
-#define PPC_DEBUG_FEATURE_DATA_BP_RANGE		0x4
-#define PPC_DEBUG_FEATURE_DATA_BP_MASK		0x8
-#define PPC_DEBUG_FEATURE_DATA_BP_DAWR		0x10
-
-2. PTRACE_SETHWDEBUG
-
-Sets a hardware breakpoint or watchpoint, according to the provided structure:
-
-struct ppc_hw_breakpoint {
-        uint32_t version;
-#define PPC_BREAKPOINT_TRIGGER_EXECUTE  0x1
-#define PPC_BREAKPOINT_TRIGGER_READ     0x2
-#define PPC_BREAKPOINT_TRIGGER_WRITE    0x4
-        uint32_t trigger_type;       /* only some combinations allowed */
-#define PPC_BREAKPOINT_MODE_EXACT               0x0
-#define PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE     0x1
-#define PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE     0x2
-#define PPC_BREAKPOINT_MODE_MASK                0x3
-        uint32_t addr_mode;          /* address match mode */
-
-#define PPC_BREAKPOINT_CONDITION_MODE   0x3
-#define PPC_BREAKPOINT_CONDITION_NONE   0x0
-#define PPC_BREAKPOINT_CONDITION_AND    0x1
-#define PPC_BREAKPOINT_CONDITION_EXACT  0x1	/* different name for the same thing as above */
-#define PPC_BREAKPOINT_CONDITION_OR     0x2
-#define PPC_BREAKPOINT_CONDITION_AND_OR 0x3
-#define PPC_BREAKPOINT_CONDITION_BE_ALL 0x00ff0000	/* byte enable bits */
-#define PPC_BREAKPOINT_CONDITION_BE(n)  (1<<((n)+16))
-        uint32_t condition_mode;     /* break/watchpoint condition flags */
-
-        uint64_t addr;
-        uint64_t addr2;
-        uint64_t condition_value;
-};
-
-A request specifies one event, not necessarily just one register to be set.
-For instance, if the request is for a watchpoint with a condition, both the
-DAC and DVC registers will be set in the same request.
-
-With this GDB can ask for all kinds of hardware breakpoints and watchpoints
-that the BookE supports. COMEFROM breakpoints available in server processors
-are not contemplated, but that is out of the scope of this work.
-
-ptrace will return an integer (handle) uniquely identifying the breakpoint or
-watchpoint just created. This integer will be used in the PTRACE_DELHWDEBUG
-request to ask for its removal. Return -ENOSPC if the requested breakpoint
-can't be allocated on the registers.
-
-Some examples of using the structure to:
-
-- set a breakpoint in the first breakpoint register
-
-  p.version         = PPC_DEBUG_CURRENT_VERSION;
-  p.trigger_type    = PPC_BREAKPOINT_TRIGGER_EXECUTE;
-  p.addr_mode       = PPC_BREAKPOINT_MODE_EXACT;
-  p.condition_mode  = PPC_BREAKPOINT_CONDITION_NONE;
-  p.addr            = (uint64_t) address;
-  p.addr2           = 0;
-  p.condition_value = 0;
-
-- set a watchpoint which triggers on reads in the second watchpoint register
-
-  p.version         = PPC_DEBUG_CURRENT_VERSION;
-  p.trigger_type    = PPC_BREAKPOINT_TRIGGER_READ;
-  p.addr_mode       = PPC_BREAKPOINT_MODE_EXACT;
-  p.condition_mode  = PPC_BREAKPOINT_CONDITION_NONE;
-  p.addr            = (uint64_t) address;
-  p.addr2           = 0;
-  p.condition_value = 0;
-
-- set a watchpoint which triggers only with a specific value
-
-  p.version         = PPC_DEBUG_CURRENT_VERSION;
-  p.trigger_type    = PPC_BREAKPOINT_TRIGGER_READ;
-  p.addr_mode       = PPC_BREAKPOINT_MODE_EXACT;
-  p.condition_mode  = PPC_BREAKPOINT_CONDITION_AND | PPC_BREAKPOINT_CONDITION_BE_ALL;
-  p.addr            = (uint64_t) address;
-  p.addr2           = 0;
-  p.condition_value = (uint64_t) condition;
-
-- set a ranged hardware breakpoint
-
-  p.version         = PPC_DEBUG_CURRENT_VERSION;
-  p.trigger_type    = PPC_BREAKPOINT_TRIGGER_EXECUTE;
-  p.addr_mode       = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
-  p.condition_mode  = PPC_BREAKPOINT_CONDITION_NONE;
-  p.addr            = (uint64_t) begin_range;
-  p.addr2           = (uint64_t) end_range;
-  p.condition_value = 0;
-
-- set a watchpoint in server processors (BookS)
-
-  p.version         = 1;
-  p.trigger_type    = PPC_BREAKPOINT_TRIGGER_RW;
-  p.addr_mode       = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
-  or
-  p.addr_mode       = PPC_BREAKPOINT_MODE_EXACT;
-
-  p.condition_mode  = PPC_BREAKPOINT_CONDITION_NONE;
-  p.addr            = (uint64_t) begin_range;
-  /* For PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE addr2 needs to be specified, where
-   * addr2 - addr <= 8 Bytes.
-   */
-  p.addr2           = (uint64_t) end_range;
-  p.condition_value = 0;
-
-3. PTRACE_DELHWDEBUG
-
-Takes an integer which identifies an existing breakpoint or watchpoint
-(i.e., the value returned from PTRACE_SETHWDEBUG), and deletes the
-corresponding breakpoint or watchpoint..
diff --git a/Documentation/powerpc/qe_firmware.txt b/Documentation/powerpc/qe_firmware.rst
index e7ac24aec4ff..42f5103140c9 100644
--- a/Documentation/powerpc/qe_firmware.txt
+++ b/Documentation/powerpc/qe_firmware.rst
@@ -1,23 +1,23 @@
-	   Freescale QUICC Engine Firmware Uploading
-	   -----------------------------------------
+=========================================
+Freescale QUICC Engine Firmware Uploading
+=========================================
 
 (c) 2007 Timur Tabi <timur at freescale.com>,
     Freescale Semiconductor
 
-Table of Contents
-=================
+.. Table of Contents
 
-  I - Software License for Firmware
+   I - Software License for Firmware
 
-  II - Microcode Availability
+   II - Microcode Availability
 
-  III - Description and Terminology
+   III - Description and Terminology
 
-  IV - Microcode Programming Details
+   IV - Microcode Programming Details
 
-  V - Firmware Structure Layout
+   V - Firmware Structure Layout
 
-  VI - Sample Code for Creating Firmware Files
+   VI - Sample Code for Creating Firmware Files
 
 Revision Information
 ====================
@@ -39,7 +39,7 @@ http://opensource.freescale.com.  For other firmware files, please contact
 your Freescale representative or your operating system vendor.
 
 III - Description and Terminology
-================================
+=================================
 
 In this document, the term 'microcode' refers to the sequence of 32-bit
 integers that compose the actual QE microcode.
@@ -89,7 +89,7 @@ being fixed in the RAM package utilizing they should be activated.  This data
 structure signals the microcode which of these virtual traps is active.
 
 This structure contains 6 words that the application should copy to some
-specific been defined.  This table describes the structure.
+specific been defined.  This table describes the structure::
 
 	---------------------------------------------------------------
 	| Offset in |                  | Destination Offset | Size of |
@@ -119,7 +119,7 @@ Extended Modes
 This is a double word bit array (64 bits) that defines special functionality
 which has an impact on the software drivers.  Each bit has its own impact
 and has special instructions for the s/w associated with it.  This structure is
-described in this table:
+described in this table::
 
 	-----------------------------------------------------------------------
 	| Bit #  |     Name     |   Description                               |
@@ -220,7 +220,8 @@ The 'model' field is a 16-bit number that matches the actual SOC. The
 'major' and 'minor' fields are the major and minor revision numbers,
 respectively, of the SOC.
 
-For example, to match the 8323, revision 1.0:
+For example, to match the 8323, revision 1.0::
+
      soc.model = 8323
      soc.major = 1
      soc.minor = 0
@@ -273,10 +274,10 @@ library and available to any driver that calles qe_get_firmware_info().
 	'reserved'.
 
 After the last microcode is a 32-bit CRC.  It can be calculated using
-this algorithm:
+this algorithm::
 
-u32 crc32(const u8 *p, unsigned int len)
-{
+  u32 crc32(const u8 *p, unsigned int len)
+  {
 	unsigned int i;
 	u32 crc = 0;
 
@@ -286,7 +287,7 @@ u32 crc32(const u8 *p, unsigned int len)
 		   crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
 	}
 	return crc;
-}
+  }
 
 VI - Sample Code for Creating Firmware Files
 ============================================
diff --git a/Documentation/powerpc/syscall64-abi.txt b/Documentation/powerpc/syscall64-abi.rst
index fa716a0d88bd..e49f69f941b9 100644
--- a/Documentation/powerpc/syscall64-abi.txt
+++ b/Documentation/powerpc/syscall64-abi.rst
@@ -5,12 +5,12 @@ Power Architecture 64-bit Linux system call ABI
 syscall
 =======
 
-syscall calling sequence[*] matches the Power Architecture 64-bit ELF ABI
+syscall calling sequence\ [1]_ matches the Power Architecture 64-bit ELF ABI
 specification C function calling sequence, including register preservation
 rules, with the following differences.
 
-[*] Some syscalls (typically low-level management functions) may have
-    different calling sequences (e.g., rt_sigreturn).
+.. [1] Some syscalls (typically low-level management functions) may have
+       different calling sequences (e.g., rt_sigreturn).
 
 Parameters and return value
 ---------------------------
@@ -33,12 +33,14 @@ Register preservation rules
 Register preservation rules match the ELF ABI calling sequence with the
 following differences:
 
-r0:         Volatile.   (System call number.)
-r3:         Volatile.   (Parameter 1, and return value.)
-r4-r8:      Volatile.   (Parameters 2-6.)
-cr0:        Volatile    (cr0.SO is the return error condition)
-cr1, cr5-7: Nonvolatile.
-lr:         Nonvolatile.
+=========== ============= ========================================
+r0          Volatile      (System call number.)
+r3          Volatile      (Parameter 1, and return value.)
+r4-r8       Volatile      (Parameters 2-6.)
+cr0         Volatile      (cr0.SO is the return error condition)
+cr1, cr5-7  Nonvolatile
+lr          Nonvolatile
+=========== ============= ========================================
 
 All floating point and vector data registers as well as control and status
 registers are nonvolatile.
@@ -90,9 +92,12 @@ The vsyscall may or may not use the caller's stack frame save areas.
 
 Register preservation rules
 ---------------------------
-r0: Volatile.
-cr1, cr5-7: Volatile.
-lr: Volatile.
+
+=========== ========
+r0          Volatile
+cr1, cr5-7  Volatile
+lr          Volatile
+=========== ========
 
 Invocation
 ----------
diff --git a/Documentation/powerpc/transactional_memory.txt b/Documentation/powerpc/transactional_memory.rst
index 52c023e14f26..09955103acb4 100644
--- a/Documentation/powerpc/transactional_memory.txt
+++ b/Documentation/powerpc/transactional_memory.rst
@@ -1,3 +1,4 @@
+============================
 Transactional Memory support
 ============================
 
@@ -17,29 +18,29 @@ instructions are presented to delimit transactions; transactions are
 guaranteed to either complete atomically or roll back and undo any partial
 changes.
 
-A simple transaction looks like this:
+A simple transaction looks like this::
 
-begin_move_money:
-  tbegin
-  beq   abort_handler
+  begin_move_money:
+    tbegin
+    beq   abort_handler
 
-  ld    r4, SAVINGS_ACCT(r3)
-  ld    r5, CURRENT_ACCT(r3)
-  subi  r5, r5, 1
-  addi  r4, r4, 1
-  std   r4, SAVINGS_ACCT(r3)
-  std   r5, CURRENT_ACCT(r3)
+    ld    r4, SAVINGS_ACCT(r3)
+    ld    r5, CURRENT_ACCT(r3)
+    subi  r5, r5, 1
+    addi  r4, r4, 1
+    std   r4, SAVINGS_ACCT(r3)
+    std   r5, CURRENT_ACCT(r3)
 
-  tend
+    tend
 
-  b     continue
+    b     continue
 
-abort_handler:
-  ... test for odd failures ...
+  abort_handler:
+    ... test for odd failures ...
 
-  /* Retry the transaction if it failed because it conflicted with
-   * someone else: */
-  b     begin_move_money
+    /* Retry the transaction if it failed because it conflicted with
+     * someone else: */
+    b     begin_move_money
 
 
 The 'tbegin' instruction denotes the start point, and 'tend' the end point.
@@ -123,7 +124,7 @@ Transaction-aware signal handlers can read the transactional register state
 from the second ucontext.  This will be necessary for crash handlers to
 determine, for example, the address of the instruction causing the SIGSEGV.
 
-Example signal handler:
+Example signal handler::
 
     void crash_handler(int sig, siginfo_t *si, void *uc)
     {
@@ -133,9 +134,9 @@ Example signal handler:
       if (ucp_link) {
         u64 msr = ucp->uc_mcontext.regs->msr;
         /* May have transactional ucontext! */
-#ifndef __powerpc64__
+  #ifndef __powerpc64__
         msr |= ((u64)transactional_ucp->uc_mcontext.regs->msr) << 32;
-#endif
+  #endif
         if (MSR_TM_ACTIVE(msr)) {
            /* Yes, we crashed during a transaction.  Oops. */
    fprintf(stderr, "Transaction to be restarted at 0x%llx, but "
@@ -176,6 +177,7 @@ Failure cause codes used by kernel
 These are defined in <asm/reg.h>, and distinguish different reasons why the
 kernel aborted a transaction:
 
+ ====================== ================================
  TM_CAUSE_RESCHED       Thread was rescheduled.
  TM_CAUSE_TLBI          Software TLB invalid.
  TM_CAUSE_FAC_UNAV      FP/VEC/VSX unavailable trap.
@@ -184,6 +186,7 @@ kernel aborted a transaction:
  TM_CAUSE_MISC          Currently unused.
  TM_CAUSE_ALIGNMENT     Alignment fault.
  TM_CAUSE_EMULATE       Emulation that touched memory.
+ ====================== ================================
 
 These can be checked by the user program's abort handler as TEXASR[0:7].  If
 bit 7 is set, it indicates that the error is consider persistent.  For example
@@ -203,7 +206,7 @@ POWER9
 ======
 
 TM on POWER9 has issues with storing the complete register state. This
-is described in this commit:
+is described in this commit::
 
     commit 4bb3c7a0208fc13ca70598efd109901a7cd45ae7
     Author: Paul Mackerras <paulus@ozlabs.org>
diff --git a/Documentation/process/conf.py b/Documentation/process/conf.py
deleted file mode 100644
index 1b01a80ad9ce..000000000000
--- a/Documentation/process/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = 'Linux Kernel Development Documentation'
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'process.tex', 'Linux Kernel Development Documentation',
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/sh/conf.py b/Documentation/sh/conf.py
deleted file mode 100644
index 1eb684a13ac8..000000000000
--- a/Documentation/sh/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "SuperH architecture implementation manual"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'sh.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/sound/conf.py b/Documentation/sound/conf.py
deleted file mode 100644
index 3f1fc5e74e7b..000000000000
--- a/Documentation/sound/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux Sound Subsystem Documentation"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'sound.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/sphinx/load_config.py b/Documentation/sphinx/load_config.py
index 301a21aa4f63..eeb394b39e2c 100644
--- a/Documentation/sphinx/load_config.py
+++ b/Documentation/sphinx/load_config.py
@@ -21,6 +21,29 @@ def loadConfig(namespace):
         and os.path.normpath(namespace["__file__"]) != os.path.normpath(config_file) ):
         config_file = os.path.abspath(config_file)
 
+        # Let's avoid one conf.py file just due to latex_documents
+        start = config_file.find('Documentation/')
+        if start >= 0:
+            start = config_file.find('/', start + 1)
+
+        end = config_file.rfind('/')
+        if start >= 0 and end > 0:
+            dir = config_file[start + 1:end]
+
+            print("source directory: %s" % dir)
+            new_latex_docs = []
+            latex_documents = namespace['latex_documents']
+
+            for l in latex_documents:
+                if l[0].find(dir + '/') == 0:
+                    has = True
+                    fn = l[0][len(dir) + 1:]
+                    new_latex_docs.append((fn, l[1], l[2], l[3], l[4]))
+                    break
+
+            namespace['latex_documents'] = new_latex_docs
+
+        # If there is an extra conf.py file, load it
         if os.path.isfile(config_file):
             sys.stdout.write("load additional sphinx-config: %s\n" % config_file)
             config = namespace.copy()
@@ -29,4 +52,6 @@ def loadConfig(namespace):
             del config['__file__']
             namespace.update(config)
         else:
-            sys.stderr.write("WARNING: additional sphinx-config not found: %s\n" % config_file)
+            config = namespace.copy()
+            config['tags'].add("subproject")
+            namespace.update(config)
diff --git a/Documentation/translations/it_IT/doc-guide/sphinx.rst b/Documentation/translations/it_IT/doc-guide/sphinx.rst
index 1739cba8863e..f1ad4504b734 100644
--- a/Documentation/translations/it_IT/doc-guide/sphinx.rst
+++ b/Documentation/translations/it_IT/doc-guide/sphinx.rst
@@ -242,8 +242,9 @@ del kernel:
 * Per inserire blocchi di testo con caratteri a dimensione fissa (codici di
   esempio, casi d'uso, eccetera): utilizzate ``::`` quando non è necessario
   evidenziare la sintassi, specialmente per piccoli frammenti; invece,
-  utilizzate ``.. code-block:: <language>`` per blocchi di più lunghi che
-  potranno beneficiare dell'avere la sintassi evidenziata.
+  utilizzate ``.. code-block:: <language>`` per blocchi più lunghi che
+  beneficeranno della sintassi evidenziata. Per un breve pezzo di codice da
+  inserire nel testo, usate \`\`.
 
 
 Il dominio C
@@ -267,12 +268,14 @@ molto comune come ``open`` o ``ioctl``:
 
 Il nome della funzione (per esempio ioctl) rimane nel testo ma il nome del suo
 riferimento cambia da ``ioctl`` a ``VIDIOC_LOG_STATUS``. Anche la voce
-nell'indice cambia in ``VIDIOC_LOG_STATUS`` e si potrà quindi fare riferimento
-a questa funzione scrivendo:
-
-.. code-block:: rst
-
-     :c:func:`VIDIOC_LOG_STATUS`
+nell'indice cambia in ``VIDIOC_LOG_STATUS``.
+
+Notate che per una funzione non c'è bisogno di usare ``c:func:`` per generarne
+i riferimenti nella documentazione. Grazie a qualche magica estensione a
+Sphinx, il sistema di generazione della documentazione trasformerà
+automaticamente un riferimento ad una ``funzione()`` in un riferimento
+incrociato quando questa ha una voce nell'indice.  Se trovate degli usi di
+``c:func:`` nella documentazione del kernel, sentitevi liberi di rimuoverli.
 
 
 Tabelle a liste
diff --git a/Documentation/translations/it_IT/process/index.rst b/Documentation/translations/it_IT/process/index.rst
index 2eda85d5cd1e..012de0f3154a 100644
--- a/Documentation/translations/it_IT/process/index.rst
+++ b/Documentation/translations/it_IT/process/index.rst
@@ -27,6 +27,7 @@ Di seguito le guide che ogni sviluppatore dovrebbe leggere.
    code-of-conduct
    development-process
    submitting-patches
+   programming-language
    coding-style
    maintainer-pgp-guide
    email-clients
diff --git a/Documentation/translations/it_IT/process/kernel-docs.rst b/Documentation/translations/it_IT/process/kernel-docs.rst
index 7bd70d661737..38e0a955121a 100644
--- a/Documentation/translations/it_IT/process/kernel-docs.rst
+++ b/Documentation/translations/it_IT/process/kernel-docs.rst
@@ -1,6 +1,7 @@
 .. include:: ../disclaimer-ita.rst
 
 :Original: :ref:`Documentation/process/kernel-docs.rst <kernel_docs>`
+:Translator: Federico Vaga <federico.vaga@vaga.pv.it>
 
 
 .. _it_kernel_docs:
@@ -8,6 +9,10 @@
 Indice di documenti per le persone interessate a capire e/o scrivere per il kernel Linux
 ========================================================================================
 
-.. warning::
-
-    TODO ancora da tradurre
+.. note::
+   Questo documento contiene riferimenti a documenti in lingua inglese; inoltre
+   utilizza dai campi *ReStructuredText* di supporto alla ricerca e che per
+   questo motivo è meglio non tradurre al fine di garantirne un corretto
+   utilizzo.
+   Per questi motivi il documento non verrà tradotto. Per favore fate
+   riferimento al documento originale in lingua inglese.
diff --git a/Documentation/translations/it_IT/process/maintainer-pgp-guide.rst b/Documentation/translations/it_IT/process/maintainer-pgp-guide.rst
index 276db0e37f43..118fb4153e8f 100644
--- a/Documentation/translations/it_IT/process/maintainer-pgp-guide.rst
+++ b/Documentation/translations/it_IT/process/maintainer-pgp-guide.rst
@@ -248,7 +248,10 @@ possano ricevere la vostra nuova sottochiave::
     kernel.
 
     Se per qualche ragione preferite rimanere con sottochiavi RSA, nel comando
-    precedente, sostituite "ed25519" con "rsa2048".
+    precedente, sostituite "ed25519" con "rsa2048". In aggiunta, se avete
+    intenzione di usare un dispositivo hardware che non supporta le chiavi
+    ED25519 ECC, come la Nitrokey Pro o la Yubikey, allora dovreste usare
+    "nistp256" al posto di "ed25519".
 
 Copia di riserva della chiave primaria per gestire il recupero da disastro
 --------------------------------------------------------------------------
@@ -449,23 +452,27 @@ implementi le funzionalità delle smartcard.  Sul mercato ci sono diverse
 soluzioni disponibili:
 
 - `Nitrokey Start`_: è Open hardware e Free Software, è basata sul progetto
-  `GnuK`_ della FSIJ. Ha il supporto per chiavi ECC, ma meno funzionalità di
-  sicurezza (come la resistenza alla manomissione o alcuni attacchi ad un
-  canale laterale).
+  `GnuK`_ della FSIJ. Questo è uno dei pochi dispositivi a supportare le chiavi
+  ECC ED25519, ma offre meno funzionalità di sicurezza (come la resistenza
+  alla manomissione o alcuni attacchi ad un canale laterale).
 - `Nitrokey Pro`_: è simile alla Nitrokey Start, ma è più resistente alla
-  manomissione e offre più funzionalità di sicurezza, ma l'ECC.
-- `Yubikey 4`_: l'hardware e il software sono proprietari, ma è più economica
+  manomissione e offre più funzionalità di sicurezza. La Pro 2 supporta la
+  crittografia ECC (NISTP).
+- `Yubikey 5`_: l'hardware e il software sono proprietari, ma è più economica
   della  Nitrokey Pro ed è venduta anche con porta USB-C il che è utile con i
   computer portatili più recenti. In aggiunta, offre altre funzionalità di
-  sicurezza come FIDO, U2F, ma non l'ECC
+  sicurezza come FIDO, U2F, e ora supporta anche le chiavi ECC (NISTP)
 
 `Su LWN c'è una buona recensione`_ dei modelli elencati qui sopra e altri.
+La scelta dipenderà dal costo, dalla disponibilità nella vostra area
+geografica e vostre considerazioni sull'hardware aperto/proprietario.
+
 Se volete usare chiavi ECC, la vostra migliore scelta sul mercato è la
 Nitrokey Start.
 
 .. _`Nitrokey Start`: https://shop.nitrokey.com/shop/product/nitrokey-start-6
-.. _`Nitrokey Pro`: https://shop.nitrokey.com/shop/product/nitrokey-pro-3
-.. _`Yubikey 4`: https://www.yubico.com/product/yubikey-4-series/
+.. _`Nitrokey Pro 2`: https://shop.nitrokey.com/shop/product/nitrokey-pro-2-3
+.. _`Yubikey 5`: https://www.yubico.com/product/yubikey-5-overview/
 .. _Gnuk: http://www.fsij.org/doc-gnuk/
 .. _`Su LWN c'è una buona recensione`: https://lwn.net/Articles/736231/
 
diff --git a/Documentation/translations/it_IT/process/programming-language.rst b/Documentation/translations/it_IT/process/programming-language.rst
new file mode 100644
index 000000000000..f4b006395849
--- /dev/null
+++ b/Documentation/translations/it_IT/process/programming-language.rst
@@ -0,0 +1,51 @@
+.. include:: ../disclaimer-ita.rst
+
+:Original: :ref:`Documentation/process/programming-language.rst <programming_language>`
+:Translator: Federico Vaga <federico.vaga@vaga.pv.it>
+
+.. _it_programming_language:
+
+Linguaggio di programmazione
+============================
+
+Il kernel è scritto nel linguaggio di programmazione C [c-language]_.
+Più precisamente, il kernel viene compilato con ``gcc`` [gcc]_ usando
+l'opzione ``-std=gnu89`` [gcc-c-dialect-options]_: il dialetto GNU
+dello standard ISO C90 (con l'aggiunta di alcune funzionalità da C99)
+
+Questo dialetto contiene diverse estensioni al linguaggio [gnu-extensions]_,
+e molte di queste vengono usate sistematicamente dal kernel.
+
+Il kernel offre un certo livello di supporto per la compilazione con ``clang``
+[clang]_ e ``icc`` [icc]_ su diverse architetture, tuttavia in questo momento
+il supporto non è completo e richiede delle patch aggiuntive.
+
+Attributi
+---------
+
+Una delle estensioni più comuni e usate nel kernel sono gli attributi
+[gcc-attribute-syntax]_. Gli attributi permettono di aggiungere una semantica,
+definita dell'implementazione, alle entità del linguaggio (come le variabili,
+le funzioni o i tipi) senza dover fare importanti modifiche sintattiche al
+linguaggio stesso (come l'aggiunta di nuove parole chiave) [n2049]_.
+
+In alcuni casi, gli attributi sono opzionali (ovvero un compilatore che non
+dovesse supportarli dovrebbe produrre comunque codice corretto, anche se
+più lento o che non esegue controlli aggiuntivi durante la compilazione).
+
+Il kernel definisce alcune pseudo parole chiave (per esempio ``__pure``)
+in alternativa alla sintassi GNU per gli attributi (per esempio
+``__attribute__((__pure__))``) allo scopo di mostrare quali funzionalità si
+possono usare e/o per accorciare il codice.
+
+Per maggiori informazioni consultate il file d'intestazione
+``include/linux/compiler_attributes.h``.
+
+.. [c-language] http://www.open-std.org/jtc1/sc22/wg14/www/standards
+.. [gcc] https://gcc.gnu.org
+.. [clang] https://clang.llvm.org
+.. [icc] https://software.intel.com/en-us/c-compilers
+.. [gcc-c-dialect-options] https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html
+.. [gnu-extensions] https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html
+.. [gcc-attribute-syntax] https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html
+.. [n2049] http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2049.pdf
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index a33c2a536542..2774624ee843 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -569,7 +569,7 @@ ACQUIRE 는 해당 오퍼레이션의 로드 부분에만 적용되고 RELEASE �
 
 	[*] 버스 마스터링 DMA 와 일관성에 대해서는 다음을 참고하시기 바랍니다:
 
-	    Documentation/PCI/pci.rst
+	    Documentation/driver-api/pci/pci.rst
 	    Documentation/DMA-API-HOWTO.txt
 	    Documentation/DMA-API.txt
 
diff --git a/Documentation/userspace-api/conf.py b/Documentation/userspace-api/conf.py
deleted file mode 100644
index 2eaf59f844e5..000000000000
--- a/Documentation/userspace-api/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "The Linux kernel user-space API guide"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'userspace-api.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/virtual/index.rst b/Documentation/virt/index.rst
index 062ffb527043..062ffb527043 100644
--- a/Documentation/virtual/index.rst
+++ b/Documentation/virt/index.rst
diff --git a/Documentation/virtual/kvm/amd-memory-encryption.rst b/Documentation/virt/kvm/amd-memory-encryption.rst
index d18c97b4e140..d18c97b4e140 100644
--- a/Documentation/virtual/kvm/amd-memory-encryption.rst
+++ b/Documentation/virt/kvm/amd-memory-encryption.rst
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virt/kvm/api.txt
index 2cd6250b2896..2d067767b617 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virt/kvm/api.txt
@@ -3781,7 +3781,7 @@ encrypted VMs.
 
 Currently, this ioctl is used for issuing Secure Encrypted Virtualization
 (SEV) commands on AMD Processors. The SEV commands are defined in
-Documentation/virtual/kvm/amd-memory-encryption.rst.
+Documentation/virt/kvm/amd-memory-encryption.rst.
 
 4.111 KVM_MEMORY_ENCRYPT_REG_REGION
 
@@ -4090,17 +4090,22 @@ Parameters: struct kvm_pmu_event_filter (in)
 Returns: 0 on success, -1 on error
 
 struct kvm_pmu_event_filter {
-       __u32 action;
-       __u32 nevents;
-       __u64 events[0];
+	__u32 action;
+	__u32 nevents;
+	__u32 fixed_counter_bitmap;
+	__u32 flags;
+	__u32 pad[4];
+	__u64 events[0];
 };
 
 This ioctl restricts the set of PMU events that the guest can program.
 The argument holds a list of events which will be allowed or denied.
 The eventsel+umask of each event the guest attempts to program is compared
 against the events field to determine whether the guest should have access.
-This only affects general purpose counters; fixed purpose counters can
-be disabled by changing the perfmon CPUID leaf.
+The events field only controls general purpose counters; fixed purpose
+counters are controlled by the fixed_counter_bitmap.
+
+No flags are defined yet, the field must be zero.
 
 Valid values for 'action':
 #define KVM_PMU_EVENT_ALLOW 0
diff --git a/Documentation/virtual/kvm/arm/hyp-abi.txt b/Documentation/virt/kvm/arm/hyp-abi.txt
index a20a0bee268d..a20a0bee268d 100644
--- a/Documentation/virtual/kvm/arm/hyp-abi.txt
+++ b/Documentation/virt/kvm/arm/hyp-abi.txt
diff --git a/Documentation/virtual/kvm/arm/psci.txt b/Documentation/virt/kvm/arm/psci.txt
index 559586fc9d37..559586fc9d37 100644
--- a/Documentation/virtual/kvm/arm/psci.txt
+++ b/Documentation/virt/kvm/arm/psci.txt
diff --git a/Documentation/virtual/kvm/cpuid.rst b/Documentation/virt/kvm/cpuid.rst
index 01b081f6e7ea..01b081f6e7ea 100644
--- a/Documentation/virtual/kvm/cpuid.rst
+++ b/Documentation/virt/kvm/cpuid.rst
diff --git a/Documentation/virtual/kvm/devices/README b/Documentation/virt/kvm/devices/README
index 34a69834124a..34a69834124a 100644
--- a/Documentation/virtual/kvm/devices/README
+++ b/Documentation/virt/kvm/devices/README
diff --git a/Documentation/virtual/kvm/devices/arm-vgic-its.txt b/Documentation/virt/kvm/devices/arm-vgic-its.txt
index eeaa95b893a8..eeaa95b893a8 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic-its.txt
+++ b/Documentation/virt/kvm/devices/arm-vgic-its.txt
diff --git a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt b/Documentation/virt/kvm/devices/arm-vgic-v3.txt
index ff290b43c8e5..ff290b43c8e5 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
+++ b/Documentation/virt/kvm/devices/arm-vgic-v3.txt
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virt/kvm/devices/arm-vgic.txt
index 97b6518148f8..97b6518148f8 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virt/kvm/devices/arm-vgic.txt
diff --git a/Documentation/virtual/kvm/devices/mpic.txt b/Documentation/virt/kvm/devices/mpic.txt
index 8257397adc3c..8257397adc3c 100644
--- a/Documentation/virtual/kvm/devices/mpic.txt
+++ b/Documentation/virt/kvm/devices/mpic.txt
diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virt/kvm/devices/s390_flic.txt
index a4e20a090174..a4e20a090174 100644
--- a/Documentation/virtual/kvm/devices/s390_flic.txt
+++ b/Documentation/virt/kvm/devices/s390_flic.txt
diff --git a/Documentation/virtual/kvm/devices/vcpu.txt b/Documentation/virt/kvm/devices/vcpu.txt
index 2b5dab16c4f2..2b5dab16c4f2 100644
--- a/Documentation/virtual/kvm/devices/vcpu.txt
+++ b/Documentation/virt/kvm/devices/vcpu.txt
diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virt/kvm/devices/vfio.txt
index 528c77c8022c..528c77c8022c 100644
--- a/Documentation/virtual/kvm/devices/vfio.txt
+++ b/Documentation/virt/kvm/devices/vfio.txt
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virt/kvm/devices/vm.txt
index 4ffb82b02468..4ffb82b02468 100644
--- a/Documentation/virtual/kvm/devices/vm.txt
+++ b/Documentation/virt/kvm/devices/vm.txt
diff --git a/Documentation/virtual/kvm/devices/xics.txt b/Documentation/virt/kvm/devices/xics.txt
index 42864935ac5d..42864935ac5d 100644
--- a/Documentation/virtual/kvm/devices/xics.txt
+++ b/Documentation/virt/kvm/devices/xics.txt
diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virt/kvm/devices/xive.txt
index 9a24a4525253..9a24a4525253 100644
--- a/Documentation/virtual/kvm/devices/xive.txt
+++ b/Documentation/virt/kvm/devices/xive.txt
diff --git a/Documentation/virtual/kvm/halt-polling.txt b/Documentation/virt/kvm/halt-polling.txt
index 4f791b128dd2..4f791b128dd2 100644
--- a/Documentation/virtual/kvm/halt-polling.txt
+++ b/Documentation/virt/kvm/halt-polling.txt
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virt/kvm/hypercalls.txt
index da210651f714..5f6d291bd004 100644
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ b/Documentation/virt/kvm/hypercalls.txt
@@ -18,7 +18,7 @@ S390:
   number in R1.
 
   For further information on the S390 diagnose call as supported by KVM,
-  refer to Documentation/virtual/kvm/s390-diag.txt.
+  refer to Documentation/virt/kvm/s390-diag.txt.
 
  PowerPC:
   It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers.
@@ -26,7 +26,7 @@ S390:
 
   KVM hypercalls uses 4 byte opcode, that are patched with 'hypercall-instructions'
   property inside the device tree's /hypervisor node.
-  For more information refer to Documentation/virtual/kvm/ppc-pv.txt
+  For more information refer to Documentation/virt/kvm/ppc-pv.txt
 
 MIPS:
   KVM hypercalls use the HYPCALL instruction with code 0 and the hypercall
diff --git a/Documentation/virtual/kvm/index.rst b/Documentation/virt/kvm/index.rst
index 0b206a06f5be..ada224a511fe 100644
--- a/Documentation/virtual/kvm/index.rst
+++ b/Documentation/virt/kvm/index.rst
@@ -9,3 +9,4 @@ KVM
 
    amd-memory-encryption
    cpuid
+   vcpu-requests
diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virt/kvm/locking.txt
index 635cd6eaf714..635cd6eaf714 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virt/kvm/locking.txt
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virt/kvm/mmu.txt
index 2efe0efc516e..1b9880dfba0a 100644
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virt/kvm/mmu.txt
@@ -298,7 +298,7 @@ Handling a page fault is performed as follows:
      vcpu->arch.mmio_gfn, and call the emulator
  - If both P bit and R/W bit of error code are set, this could possibly
    be handled as a "fast page fault" (fixed without taking the MMU lock).  See
-   the description in Documentation/virtual/kvm/locking.txt.
+   the description in Documentation/virt/kvm/locking.txt.
  - if needed, walk the guest page tables to determine the guest translation
    (gva->gpa or ngpa->gpa)
    - if permissions are insufficient, reflect the fault back to the guest
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virt/kvm/msr.txt
index df1f4338b3ca..df1f4338b3ca 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virt/kvm/msr.txt
diff --git a/Documentation/virtual/kvm/nested-vmx.txt b/Documentation/virt/kvm/nested-vmx.txt
index 97eb1353e962..97eb1353e962 100644
--- a/Documentation/virtual/kvm/nested-vmx.txt
+++ b/Documentation/virt/kvm/nested-vmx.txt
diff --git a/Documentation/virtual/kvm/ppc-pv.txt b/Documentation/virt/kvm/ppc-pv.txt
index e26115ce4258..e26115ce4258 100644
--- a/Documentation/virtual/kvm/ppc-pv.txt
+++ b/Documentation/virt/kvm/ppc-pv.txt
diff --git a/Documentation/virtual/kvm/review-checklist.txt b/Documentation/virt/kvm/review-checklist.txt
index a83b27635fdd..499af499e296 100644
--- a/Documentation/virtual/kvm/review-checklist.txt
+++ b/Documentation/virt/kvm/review-checklist.txt
@@ -7,7 +7,7 @@ Review checklist for kvm patches
 2.  Patches should be against kvm.git master branch.
 
 3.  If the patch introduces or modifies a new userspace API:
-    - the API must be documented in Documentation/virtual/kvm/api.txt
+    - the API must be documented in Documentation/virt/kvm/api.txt
     - the API must be discoverable using KVM_CHECK_EXTENSION
 
 4.  New state must include support for save/restore.
diff --git a/Documentation/virtual/kvm/s390-diag.txt b/Documentation/virt/kvm/s390-diag.txt
index 7c52e5f8b210..7c52e5f8b210 100644
--- a/Documentation/virtual/kvm/s390-diag.txt
+++ b/Documentation/virt/kvm/s390-diag.txt
diff --git a/Documentation/virtual/kvm/timekeeping.txt b/Documentation/virt/kvm/timekeeping.txt
index 76808a17ad84..76808a17ad84 100644
--- a/Documentation/virtual/kvm/timekeeping.txt
+++ b/Documentation/virt/kvm/timekeeping.txt
diff --git a/Documentation/virtual/kvm/vcpu-requests.rst b/Documentation/virt/kvm/vcpu-requests.rst
index 5feb3706a7ae..5feb3706a7ae 100644
--- a/Documentation/virtual/kvm/vcpu-requests.rst
+++ b/Documentation/virt/kvm/vcpu-requests.rst
diff --git a/Documentation/virtual/paravirt_ops.rst b/Documentation/virt/paravirt_ops.rst
index 6b789d27cead..6b789d27cead 100644
--- a/Documentation/virtual/paravirt_ops.rst
+++ b/Documentation/virt/paravirt_ops.rst
diff --git a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt b/Documentation/virt/uml/UserModeLinux-HOWTO.txt
index 87b80f589e1c..87b80f589e1c 100644
--- a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
+++ b/Documentation/virt/uml/UserModeLinux-HOWTO.txt
diff --git a/Documentation/vm/conf.py b/Documentation/vm/conf.py
deleted file mode 100644
index 3b0b601af558..000000000000
--- a/Documentation/vm/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux Memory Management Documentation"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'memory-management.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/Documentation/watchdog/hpwdt.rst b/Documentation/watchdog/hpwdt.rst
index c165d92cfd12..c824cd7f6e32 100644
--- a/Documentation/watchdog/hpwdt.rst
+++ b/Documentation/watchdog/hpwdt.rst
@@ -63,7 +63,7 @@ Last reviewed: 08/20/2018
  and loop forever.  This is generally not what a watchdog user wants.
 
  For those wishing to learn more please see:
-	Documentation/kdump/kdump.rst
+	Documentation/admin-guide/kdump/kdump.rst
 	Documentation/admin-guide/kernel-parameters.txt (panic=)
 	Your Linux Distribution specific documentation.
 
diff --git a/Documentation/x86/conf.py b/Documentation/x86/conf.py
deleted file mode 100644
index 33c5c3142e20..000000000000
--- a/Documentation/x86/conf.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8; mode: python -*-
-
-project = "X86 architecture specific documentation"
-
-tags.add("subproject")
-
-latex_documents = [
-    ('index', 'x86.tex', project,
-     'The kernel development community', 'manual'),
-]
diff --git a/MAINTAINERS b/MAINTAINERS
index bd3fe4fe13c4..4b59bdc1aaf2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -899,7 +899,7 @@ L:	linux-iio@vger.kernel.org
 W:	http://ez.analog.com/community/linux-device-drivers
 S:	Supported
 F:	drivers/iio/adc/ad7124.c
-F:	Documentation/devicetree/bindings/iio/adc/adi,ad7124.txt
+F:	Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml
 
 ANALOG DEVICES INC AD7606 DRIVER
 M:	Stefan Popa <stefan.popa@analog.com>
@@ -1194,7 +1194,7 @@ F:	include/uapi/linux/if_arcnet.h
 
 ARM ARCHITECTED TIMER DRIVER
 M:	Mark Rutland <mark.rutland@arm.com>
-M:	Marc Zyngier <marc.zyngier@arm.com>
+M:	Marc Zyngier <maz@kernel.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/include/asm/arch_timer.h
@@ -4190,7 +4190,7 @@ M:	Jens Axboe <axboe@kernel.dk>
 L:	cgroups@vger.kernel.org
 L:	linux-block@vger.kernel.org
 T:	git git://git.kernel.dk/linux-block
-F:	Documentation/cgroup-v1/blkio-controller.rst
+F:	Documentation/admin-guide/cgroup-v1/blkio-controller.rst
 F:	block/blk-cgroup.c
 F:	include/linux/blk-cgroup.h
 F:	block/blk-throttle.c
@@ -4469,7 +4469,7 @@ F:	arch/powerpc/platforms/powernv/pci-cxl.c
 F:	drivers/misc/cxl/
 F:	include/misc/cxl*
 F:	include/uapi/misc/cxl.h
-F:	Documentation/powerpc/cxl.txt
+F:	Documentation/powerpc/cxl.rst
 F:	Documentation/ABI/testing/sysfs-class-cxl
 
 CXLFLASH (IBM Coherent Accelerator Processor Interface CAPI Flash) SCSI DRIVER
@@ -4480,7 +4480,7 @@ L:	linux-scsi@vger.kernel.org
 S:	Supported
 F:	drivers/scsi/cxlflash/
 F:	include/uapi/scsi/cxlflash_ioctl.h
-F:	Documentation/powerpc/cxlflash.txt
+F:	Documentation/powerpc/cxlflash.rst
 
 CYBERPRO FB DRIVER
 M:	Russell King <linux@armlinux.org.uk>
@@ -6856,7 +6856,7 @@ R:	Sagi Shahar <sagis@google.com>
 R:	Jon Olson <jonolson@google.com>
 L:	netdev@vger.kernel.org
 S:	Supported
-F:	Documentation/networking/device_drivers/google/gve.txt
+F:	Documentation/networking/device_drivers/google/gve.rst
 F:	drivers/net/ethernet/google
 
 GPD POCKET FAN DRIVER
@@ -8490,7 +8490,7 @@ S:	Obsolete
 F:	include/uapi/linux/ipx.h
 
 IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY)
-M:	Marc Zyngier <marc.zyngier@arm.com>
+M:	Marc Zyngier <maz@kernel.org>
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core
 F:	Documentation/IRQ-domain.txt
@@ -8508,7 +8508,7 @@ F:	kernel/irq/
 IRQCHIP DRIVERS
 M:	Thomas Gleixner <tglx@linutronix.de>
 M:	Jason Cooper <jason@lakedaemon.net>
-M:	Marc Zyngier <marc.zyngier@arm.com>
+M:	Marc Zyngier <maz@kernel.org>
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core
@@ -8808,7 +8808,7 @@ L:	kvm@vger.kernel.org
 W:	http://www.linux-kvm.org
 T:	git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
 S:	Supported
-F:	Documentation/virtual/kvm/
+F:	Documentation/virt/kvm/
 F:	include/trace/events/kvm.h
 F:	include/uapi/asm-generic/kvm*
 F:	include/uapi/linux/kvm*
@@ -8828,10 +8828,10 @@ F:	arch/x86/include/asm/svm.h
 F:	arch/x86/kvm/svm.c
 
 KERNEL VIRTUAL MACHINE FOR ARM/ARM64 (KVM/arm, KVM/arm64)
-M:	Marc Zyngier <marc.zyngier@arm.com>
+M:	Marc Zyngier <maz@kernel.org>
 R:	James Morse <james.morse@arm.com>
-R:	Julien Thierry <julien.thierry@arm.com>
-R:	Suzuki K Pouloze <suzuki.poulose@arm.com>
+R:	Julien Thierry <julien.thierry.kdev@gmail.com>
+R:	Suzuki K Poulose <suzuki.poulose@arm.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	kvmarm@lists.cs.columbia.edu
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git
@@ -8878,6 +8878,8 @@ F:	arch/s390/include/asm/gmap.h
 F:	arch/s390/include/asm/kvm*
 F:	arch/s390/kvm/
 F:	arch/s390/mm/gmap.c
+F:	tools/testing/selftests/kvm/s390x/
+F:	tools/testing/selftests/kvm/*/s390x/
 
 KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86)
 M:	Paolo Bonzini <pbonzini@redhat.com>
@@ -12131,10 +12133,11 @@ F:	Documentation/driver-api/parport*.rst
 
 PARAVIRT_OPS INTERFACE
 M:	Juergen Gross <jgross@suse.com>
-M:	Alok Kataria <akataria@vmware.com>
+M:	Thomas Hellstrom <thellstrom@vmware.com>
+M:	"VMware, Inc." <pv-drivers@vmware.com>
 L:	virtualization@lists.linux-foundation.org
 S:	Supported
-F:	Documentation/virtual/paravirt_ops.txt
+F:	Documentation/virt/paravirt_ops.rst
 F:	arch/*/kernel/paravirt*
 F:	arch/*/include/asm/paravirt*.h
 F:	include/linux/hypervisor.h
@@ -12391,7 +12394,7 @@ F:	Documentation/PCI/pci-error-recovery.rst
 F:	drivers/pci/pcie/aer.c
 F:	drivers/pci/pcie/dpc.c
 F:	drivers/pci/pcie/err.c
-F:	Documentation/powerpc/eeh-pci-error-recovery.txt
+F:	Documentation/powerpc/eeh-pci-error-recovery.rst
 F:	arch/powerpc/kernel/eeh*.c
 F:	arch/powerpc/platforms/*/eeh*.c
 F:	arch/powerpc/include/*/eeh*.h
@@ -13722,6 +13725,7 @@ F:	drivers/mtd/nand/raw/r852.c
 F:	drivers/mtd/nand/raw/r852.h
 
 RISC-V ARCHITECTURE
+M:	Paul Walmsley <paul.walmsley@sifive.com>
 M:	Palmer Dabbelt <palmer@sifive.com>
 M:	Albert Ou <aou@eecs.berkeley.edu>
 L:	linux-riscv@lists.infradead.org
@@ -16851,7 +16855,7 @@ W:	http://user-mode-linux.sourceforge.net
 Q:	https://patchwork.ozlabs.org/project/linux-um/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml.git
 S:	Maintained
-F:	Documentation/virtual/uml/
+F:	Documentation/virt/uml/
 F:	arch/um/
 F:	arch/x86/um/
 F:	fs/hostfs/
@@ -17120,7 +17124,7 @@ F:	drivers/virtio/virtio_input.c
 F:	include/uapi/linux/virtio_input.h
 
 VIRTIO IOMMU DRIVER
-M:	Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
+M:	Jean-Philippe Brucker <jean-philippe@linaro.org>
 L:	virtualization@lists.linux-foundation.org
 S:	Maintained
 F:	drivers/iommu/virtio-iommu.c
@@ -17177,7 +17181,8 @@ S:	Maintained
 F:	drivers/misc/vmw_balloon.c
 
 VMWARE HYPERVISOR INTERFACE
-M:	Alok Kataria <akataria@vmware.com>
+M:	Thomas Hellstrom <thellstrom@vmware.com>
+M:	"VMware, Inc." <pv-drivers@vmware.com>
 L:	virtualization@lists.linux-foundation.org
 S:	Supported
 F:	arch/x86/kernel/cpu/vmware.c
diff --git a/Makefile b/Makefile
index 2c5d00ba537e..9be5834073f8 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 5
-PATCHLEVEL = 2
+PATCHLEVEL = 3
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
 NAME = Bobtail Squid
 
 # *DOCUMENTATION*
@@ -486,11 +486,6 @@ export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE
 export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
 export KBUILD_ARFLAGS
 
-# When compiling out-of-tree modules, put MODVERDIR in the module
-# tree rather than in the kernel tree. The kernel tree might
-# even be read-only.
-export MODVERDIR := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_versions
-
 # Files to ignore in find ... statements
 
 export RCS_FIND_IGNORE := \( -name SCCS -o -name BitKeeper -o -name .svn -o    \
@@ -887,6 +882,12 @@ KBUILD_CFLAGS   += $(call cc-option,-Werror=designated-init)
 # change __FILE__ to the relative path from the srctree
 KBUILD_CFLAGS	+= $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
 
+# ensure -fcf-protection is disabled when using retpoline as it is
+# incompatible with -mindirect-branch=thunk-extern
+ifdef CONFIG_RETPOLINE
+KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
+endif
+
 # use the deterministic mode of AR if available
 KBUILD_ARFLAGS := $(call ar-option,D)
 
@@ -900,10 +901,8 @@ KBUILD_CPPFLAGS += $(ARCH_CPPFLAGS) $(KCPPFLAGS)
 KBUILD_AFLAGS   += $(ARCH_AFLAGS)   $(KAFLAGS)
 KBUILD_CFLAGS   += $(ARCH_CFLAGS)   $(KCFLAGS)
 
-# Use --build-id when available.
-LDFLAGS_BUILD_ID := $(call ld-option, --build-id)
-KBUILD_LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID)
-LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID)
+KBUILD_LDFLAGS_MODULE += --build-id
+LDFLAGS_vmlinux += --build-id
 
 ifeq ($(CONFIG_STRIP_ASM_SYMS),y)
 LDFLAGS_vmlinux	+= $(call ld-option, -X,)
@@ -1031,8 +1030,8 @@ vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_OBJS) $(KBUILD_VMLINUX_LIBS)
 
 # Recurse until adjust_autoksyms.sh is satisfied
 PHONY += autoksyms_recursive
-autoksyms_recursive: $(vmlinux-deps)
 ifdef CONFIG_TRIM_UNUSED_KSYMS
+autoksyms_recursive: $(vmlinux-deps) modules.order
 	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \
 	  "$(MAKE) -f $(srctree)/Makefile vmlinux"
 endif
@@ -1074,7 +1073,7 @@ $(sort $(vmlinux-deps)): $(vmlinux-dirs) ;
 
 PHONY += $(vmlinux-dirs)
 $(vmlinux-dirs): prepare
-	$(Q)$(MAKE) $(build)=$@ need-builtin=1
+	$(Q)$(MAKE) $(build)=$@ need-builtin=1 need-modorder=1
 
 filechk_kernel.release = \
 	echo "$(KERNELVERSION)$$($(CONFIG_SHELL) $(srctree)/scripts/setlocalversion $(srctree))"
@@ -1096,7 +1095,7 @@ scripts: scripts_basic scripts_dtc
 # archprepare is used in arch Makefiles and when processed asm symlink,
 # version.h and scripts_basic is processed / created.
 
-PHONY += prepare archprepare prepare1 prepare3
+PHONY += prepare archprepare prepare3
 
 # prepare3 is used to check if we are building in a separate output directory,
 # and if so do:
@@ -1113,11 +1112,8 @@ ifdef building_out_of_srctree
 	fi;
 endif
 
-prepare1: prepare3 outputmakefile asm-generic $(version_h) $(autoksyms_h) \
-						include/generated/utsrelease.h
-	$(cmd_crmodverdir)
-
-archprepare: archheaders archscripts prepare1 scripts
+archprepare: archheaders archscripts scripts prepare3 outputmakefile \
+	asm-generic $(version_h) $(autoksyms_h) include/generated/utsrelease.h
 
 prepare0: archprepare
 	$(Q)$(MAKE) $(build)=scripts/mod
@@ -1331,8 +1327,8 @@ _modinst_:
 		rm -f $(MODLIB)/build ; \
 		ln -s $(CURDIR) $(MODLIB)/build ; \
 	fi
-	@cp -f $(objtree)/modules.order $(MODLIB)/
-	@cp -f $(objtree)/modules.builtin $(MODLIB)/
+	@sed 's:^:kernel/:' modules.order > $(MODLIB)/modules.order
+	@sed 's:^:kernel/:' modules.builtin > $(MODLIB)/modules.builtin
 	@cp -f $(objtree)/modules.builtin.modinfo $(MODLIB)/
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
 
@@ -1373,18 +1369,22 @@ endif # CONFIG_MODULES
 # make distclean Remove editor backup files, patch leftover files and the like
 
 # Directories & files removed with 'make clean'
-CLEAN_DIRS  += $(MODVERDIR) include/ksym
+CLEAN_DIRS  += include/ksym
 CLEAN_FILES += modules.builtin.modinfo
 
 # Directories & files removed with 'make mrproper'
 MRPROPER_DIRS  += include/config include/generated          \
 		  arch/$(SRCARCH)/include/generated .tmp_objdiff
 MRPROPER_FILES += .config .config.old .version \
-		  Module.symvers tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS \
+		  Module.symvers \
 		  signing_key.pem signing_key.priv signing_key.x509	\
 		  x509.genkey extra_certificates signing_key.x509.keyid	\
 		  signing_key.x509.signer vmlinux-gdb.py
 
+# Directories & files removed with 'make distclean'
+DISTCLEAN_DIRS  +=
+DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS
+
 # clean - Delete most, but leave enough to build external modules
 #
 clean: rm-dirs  := $(CLEAN_DIRS)
@@ -1417,9 +1417,14 @@ mrproper: clean $(mrproper-dirs)
 
 # distclean
 #
+distclean: rm-dirs  := $(wildcard $(DISTCLEAN_DIRS))
+distclean: rm-files := $(wildcard $(DISTCLEAN_FILES))
+
 PHONY += distclean
 
 distclean: mrproper
+	$(call cmd,rmdirs)
+	$(call cmd,rmfiles)
 	@find $(srctree) $(RCS_FIND_IGNORE) \
 		\( -name '*.orig' -o -name '*.rej' -o -name '*~' \
 		-o -name '*.bak' -o -name '#*#' -o -name '*%' \
@@ -1609,7 +1614,7 @@ $(objtree)/Module.symvers:
 module-dirs := $(addprefix _module_,$(KBUILD_EXTMOD))
 PHONY += $(module-dirs) modules
 $(module-dirs): prepare $(objtree)/Module.symvers
-	$(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@)
+	$(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@) need-modorder=1
 
 modules: $(module-dirs)
 	@$(kecho) '  Building modules, stage 2.';
@@ -1634,7 +1639,6 @@ PHONY += $(clean-dirs) clean
 $(clean-dirs):
 	$(Q)$(MAKE) $(clean)=$(patsubst _clean_%,%,$@)
 
-clean:	rm-dirs := $(MODVERDIR)
 clean: rm-files := $(KBUILD_EXTMOD)/Module.symvers
 
 PHONY += help
@@ -1648,8 +1652,6 @@ help:
 	@echo  ''
 
 PHONY += prepare
-prepare:
-	$(cmd_crmodverdir)
 endif # KBUILD_EXTMOD
 
 clean: $(clean-dirs)
@@ -1660,7 +1662,7 @@ clean: $(clean-dirs)
 		-o -name '*.ko.*' \
 		-o -name '*.dtb' -o -name '*.dtb.S' -o -name '*.dt.yaml' \
 		-o -name '*.dwo' -o -name '*.lst' \
-		-o -name '*.su'  \
+		-o -name '*.su' -o -name '*.mod' \
 		-o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \
 		-o -name '*.lex.c' -o -name '*.tab.[ch]' \
 		-o -name '*.asn1.[ch]' \
@@ -1765,8 +1767,6 @@ build-dir = $(patsubst %/,%,$(dir $(build-target)))
 	$(Q)$(MAKE) $(build)=$(build-dir) $(build-target)
 %.symtypes: prepare FORCE
 	$(Q)$(MAKE) $(build)=$(build-dir) $(build-target)
-%.ko: %.o
-	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
 
 # Modules
 PHONY += /
@@ -1789,11 +1789,6 @@ quiet_cmd_depmod = DEPMOD  $(KERNELRELEASE)
       cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \
                    $(KERNELRELEASE)
 
-# Create temporary dir for module support files
-# clean it up only when building all modules
-cmd_crmodverdir = $(Q)mkdir -p $(MODVERDIR) \
-                  $(if $(KBUILD_MODULES),; rm -f $(MODVERDIR)/*)
-
 # read saved command lines for existing targets
 existing-targets := $(wildcard $(sort $(targets)))
 
diff --git a/arch/Kconfig b/arch/Kconfig
index ac0fba400ded..a7b57dd42c26 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -796,6 +796,9 @@ config ARCH_NO_COHERENT_DMA_MMAP
 config ARCH_NO_PREEMPT
 	bool
 
+config ARCH_SUPPORTS_RT
+	bool
+
 config CPU_NO_EFFICIENT_FFS
 	def_bool n
 
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index ca85df247775..87b7769214e0 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -13,8 +13,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING
 ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8
 ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \
 	    -z max-page-size=4096 -nostdlib -shared $(ldflags-y) \
-	    $(call ld-option, --hash-style=sysv) \
-	    $(call ld-option, --build-id) \
+	    --hash-style=sysv --build-id \
 	    -T
 
 obj-$(CONFIG_VDSO) += vdso.o
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 3c7037c6ba9b..b618017205a3 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -202,7 +202,7 @@ typedef compat_elf_greg_t		compat_elf_gregset_t[COMPAT_ELF_NGREG];
 ({									\
 	set_thread_flag(TIF_32BIT);					\
  })
-#ifdef CONFIG_GENERIC_COMPAT_VDSO
+#ifdef CONFIG_COMPAT_VDSO
 #define COMPAT_ARCH_DLINFO						\
 do {									\
 	/*								\
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 87a4b2ddc1a1..3f5461f7b560 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -301,7 +301,6 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
 /*
  * Huge pte definitions.
  */
-#define pte_huge(pte)		(!(pte_val(pte) & PTE_TABLE_BIT))
 #define pte_mkhuge(pte)		(__pte(pte_val(pte) & ~PTE_TABLE_BIT))
 
 /*
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index fd5b1a4efc70..844e2964b0f5 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -193,6 +193,16 @@ static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
 		regs->pmr_save = GIC_PRIO_IRQON;
 }
 
+static inline void set_ssbs_bit(struct pt_regs *regs)
+{
+	regs->pstate |= PSR_SSBS_BIT;
+}
+
+static inline void set_compat_ssbs_bit(struct pt_regs *regs)
+{
+	regs->pstate |= PSR_AA32_SSBS_BIT;
+}
+
 static inline void start_thread(struct pt_regs *regs, unsigned long pc,
 				unsigned long sp)
 {
@@ -200,7 +210,7 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc,
 	regs->pstate = PSR_MODE_EL0t;
 
 	if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE)
-		regs->pstate |= PSR_SSBS_BIT;
+		set_ssbs_bit(regs);
 
 	regs->sp = sp;
 }
@@ -219,7 +229,7 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
 #endif
 
 	if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE)
-		regs->pstate |= PSR_AA32_SSBS_BIT;
+		set_compat_ssbs_bit(regs);
 
 	regs->compat_sp = sp;
 }
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index df45af931459..4d9b1f48dc39 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -8,19 +8,12 @@
 #include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
+#include <linux/types.h>
 
 #include <asm/memory.h>
 #include <asm/ptrace.h>
 #include <asm/sdei.h>
 
-struct stackframe {
-	unsigned long fp;
-	unsigned long pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	int graph;
-#endif
-};
-
 enum stack_type {
 	STACK_TYPE_UNKNOWN,
 	STACK_TYPE_TASK,
@@ -28,6 +21,7 @@ enum stack_type {
 	STACK_TYPE_OVERFLOW,
 	STACK_TYPE_SDEI_NORMAL,
 	STACK_TYPE_SDEI_CRITICAL,
+	__NR_STACK_TYPES
 };
 
 struct stack_info {
@@ -36,6 +30,37 @@ struct stack_info {
 	enum stack_type type;
 };
 
+/*
+ * A snapshot of a frame record or fp/lr register values, along with some
+ * accounting information necessary for robust unwinding.
+ *
+ * @fp:          The fp value in the frame record (or the real fp)
+ * @pc:          The fp value in the frame record (or the real lr)
+ *
+ * @stacks_done: Stacks which have been entirely unwound, for which it is no
+ *               longer valid to unwind to.
+ *
+ * @prev_fp:     The fp that pointed to this frame record, or a synthetic value
+ *               of 0. This is used to ensure that within a stack, each
+ *               subsequent frame record is at an increasing address.
+ * @prev_type:   The type of stack this frame record was on, or a synthetic
+ *               value of STACK_TYPE_UNKNOWN. This is used to detect a
+ *               transition from one stack to another.
+ *
+ * @graph:       When FUNCTION_GRAPH_TRACER is selected, holds the index of a
+ *               replacement lr value in the ftrace graph stack.
+ */
+struct stackframe {
+	unsigned long fp;
+	unsigned long pc;
+	DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
+	unsigned long prev_fp;
+	enum stack_type prev_type;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	int graph;
+#endif
+};
+
 extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
 extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
 			    int (*fn)(struct stackframe *, void *), void *data);
@@ -64,8 +89,9 @@ static inline bool on_irq_stack(unsigned long sp,
 	return true;
 }
 
-static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp,
-				struct stack_info *info)
+static inline bool on_task_stack(const struct task_struct *tsk,
+				 unsigned long sp,
+				 struct stack_info *info)
 {
 	unsigned long low = (unsigned long)task_stack_page(tsk);
 	unsigned long high = low + THREAD_SIZE;
@@ -112,10 +138,13 @@ static inline bool on_overflow_stack(unsigned long sp,
  * We can only safely access per-cpu stacks from current in a non-preemptible
  * context.
  */
-static inline bool on_accessible_stack(struct task_struct *tsk,
-					unsigned long sp,
-					struct stack_info *info)
+static inline bool on_accessible_stack(const struct task_struct *tsk,
+				       unsigned long sp,
+				       struct stack_info *info)
 {
+	if (info)
+		info->type = STACK_TYPE_UNKNOWN;
+
 	if (on_task_stack(tsk, sp, info))
 		return true;
 	if (tsk != current || preemptible())
@@ -130,4 +159,27 @@ static inline bool on_accessible_stack(struct task_struct *tsk,
 	return false;
 }
 
+static inline void start_backtrace(struct stackframe *frame,
+				   unsigned long fp, unsigned long pc)
+{
+	frame->fp = fp;
+	frame->pc = pc;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	frame->graph = 0;
+#endif
+
+	/*
+	 * Prime the first unwind.
+	 *
+	 * In unwind_frame() we'll check that the FP points to a valid stack,
+	 * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
+	 * treated as a transition to whichever stack that happens to be. The
+	 * prev_fp value won't be used, but we set it to 0 such that it is
+	 * definitely not an accessible stack address.
+	 */
+	bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
+	frame->prev_fp = 0;
+	frame->prev_type = STACK_TYPE_UNKNOWN;
+}
+
 #endif	/* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 9cdc4592da3e..320a30dbe35e 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -586,10 +586,8 @@ el1_sync:
 	b.eq	el1_ia
 	cmp	x24, #ESR_ELx_EC_SYS64		// configurable trap
 	b.eq	el1_undef
-	cmp	x24, #ESR_ELx_EC_SP_ALIGN	// stack alignment exception
-	b.eq	el1_sp_pc
 	cmp	x24, #ESR_ELx_EC_PC_ALIGN	// pc alignment exception
-	b.eq	el1_sp_pc
+	b.eq	el1_pc
 	cmp	x24, #ESR_ELx_EC_UNKNOWN	// unknown exception in EL1
 	b.eq	el1_undef
 	cmp	x24, #ESR_ELx_EC_BREAKPT_CUR	// debug exception in EL1
@@ -611,9 +609,11 @@ el1_da:
 	bl	do_mem_abort
 
 	kernel_exit 1
-el1_sp_pc:
+el1_pc:
 	/*
-	 * Stack or PC alignment exception handling
+	 * PC alignment exception handling. We don't handle SP alignment faults,
+	 * since we will have hit a recursive exception when trying to push the
+	 * initial pt_regs.
 	 */
 	mrs	x0, far_el1
 	inherit_daif	pstate=x23, tmp=x2
@@ -732,9 +732,9 @@ el0_sync:
 	ccmp	x24, #ESR_ELx_EC_WFx, #4, ne
 	b.eq	el0_sys
 	cmp	x24, #ESR_ELx_EC_SP_ALIGN	// stack alignment exception
-	b.eq	el0_sp_pc
+	b.eq	el0_sp
 	cmp	x24, #ESR_ELx_EC_PC_ALIGN	// pc alignment exception
-	b.eq	el0_sp_pc
+	b.eq	el0_pc
 	cmp	x24, #ESR_ELx_EC_UNKNOWN	// unknown exception in EL0
 	b.eq	el0_undef
 	cmp	x24, #ESR_ELx_EC_BREAKPT_LOW	// debug exception in EL0
@@ -758,7 +758,7 @@ el0_sync_compat:
 	cmp	x24, #ESR_ELx_EC_FP_EXC32	// FP/ASIMD exception
 	b.eq	el0_fpsimd_exc
 	cmp	x24, #ESR_ELx_EC_PC_ALIGN	// pc alignment exception
-	b.eq	el0_sp_pc
+	b.eq	el0_pc
 	cmp	x24, #ESR_ELx_EC_UNKNOWN	// unknown exception in EL0
 	b.eq	el0_undef
 	cmp	x24, #ESR_ELx_EC_CP15_32	// CP15 MRC/MCR trap
@@ -858,11 +858,15 @@ el0_fpsimd_exc:
 	mov	x1, sp
 	bl	do_fpsimd_exc
 	b	ret_to_user
+el0_sp:
+	ldr	x26, [sp, #S_SP]
+	b	el0_sp_pc
+el0_pc:
+	mrs	x26, far_el1
 el0_sp_pc:
 	/*
 	 * Stack or PC alignment exception handling
 	 */
-	mrs	x26, far_el1
 	gic_prio_kentry_setup tmp=x0
 	enable_da_f
 #ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index eec4776ae5f0..37d3912cfe06 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -406,6 +406,18 @@ static __uint128_t arm64_cpu_to_le128(__uint128_t x)
 
 #define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)
 
+static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst,
+			    unsigned int vq)
+{
+	unsigned int i;
+	__uint128_t *p;
+
+	for (i = 0; i < SVE_NUM_ZREGS; ++i) {
+		p = (__uint128_t *)ZREG(sst, vq, i);
+		*p = arm64_cpu_to_le128(fst->vregs[i]);
+	}
+}
+
 /*
  * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to
  * task->thread.sve_state.
@@ -423,17 +435,12 @@ static void fpsimd_to_sve(struct task_struct *task)
 	unsigned int vq;
 	void *sst = task->thread.sve_state;
 	struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
-	unsigned int i;
-	__uint128_t *p;
 
 	if (!system_supports_sve())
 		return;
 
 	vq = sve_vq_from_vl(task->thread.sve_vl);
-	for (i = 0; i < 32; ++i) {
-		p = (__uint128_t *)ZREG(sst, vq, i);
-		*p = arm64_cpu_to_le128(fst->vregs[i]);
-	}
+	__fpsimd_to_sve(sst, fst, vq);
 }
 
 /*
@@ -459,7 +466,7 @@ static void sve_to_fpsimd(struct task_struct *task)
 		return;
 
 	vq = sve_vq_from_vl(task->thread.sve_vl);
-	for (i = 0; i < 32; ++i) {
+	for (i = 0; i < SVE_NUM_ZREGS; ++i) {
 		p = (__uint128_t const *)ZREG(sst, vq, i);
 		fst->vregs[i] = arm64_le128_to_cpu(*p);
 	}
@@ -550,8 +557,6 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
 	unsigned int vq;
 	void *sst = task->thread.sve_state;
 	struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
-	unsigned int i;
-	__uint128_t *p;
 
 	if (!test_tsk_thread_flag(task, TIF_SVE))
 		return;
@@ -559,11 +564,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
 	vq = sve_vq_from_vl(task->thread.sve_vl);
 
 	memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
-
-	for (i = 0; i < 32; ++i) {
-		p = (__uint128_t *)ZREG(sst, vq, i);
-		*p = arm64_cpu_to_le128(fst->vregs[i]);
-	}
+	__fpsimd_to_sve(sst, fst, vq);
 }
 
 int sve_set_vector_length(struct task_struct *task,
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index 9d63514b9836..b0e03e052dd1 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -154,12 +154,7 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
 		return;
 	}
 
-	frame.fp = regs->regs[29];
-	frame.pc = regs->pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	frame.graph = 0;
-#endif
-
+	start_backtrace(&frame, regs->regs[29], regs->pc);
 	walk_stackframe(current, &frame, callchain_trace, entry);
 }
 
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6a869d9f304f..f674f28df663 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -398,7 +398,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 			childregs->pstate |= PSR_UAO_BIT;
 
 		if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE)
-			childregs->pstate |= PSR_SSBS_BIT;
+			set_ssbs_bit(childregs);
 
 		if (system_uses_irq_prio_masking())
 			childregs->pmr_save = GIC_PRIO_IRQON;
@@ -443,6 +443,32 @@ void uao_thread_switch(struct task_struct *next)
 }
 
 /*
+ * Force SSBS state on context-switch, since it may be lost after migrating
+ * from a CPU which treats the bit as RES0 in a heterogeneous system.
+ */
+static void ssbs_thread_switch(struct task_struct *next)
+{
+	struct pt_regs *regs = task_pt_regs(next);
+
+	/*
+	 * Nothing to do for kernel threads, but 'regs' may be junk
+	 * (e.g. idle task) so check the flags and bail early.
+	 */
+	if (unlikely(next->flags & PF_KTHREAD))
+		return;
+
+	/* If the mitigation is enabled, then we leave SSBS clear. */
+	if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) ||
+	    test_tsk_thread_flag(next, TIF_SSBD))
+		return;
+
+	if (compat_user_mode(regs))
+		set_compat_ssbs_bit(regs);
+	else if (user_mode(regs))
+		set_ssbs_bit(regs);
+}
+
+/*
  * We store our current task in sp_el0, which is clobbered by userspace. Keep a
  * shadow copy so that we can restore this upon entry from userspace.
  *
@@ -471,6 +497,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	entry_task_switch(next);
 	uao_thread_switch(next);
 	ptrauth_thread_switch(next);
+	ssbs_thread_switch(next);
 
 	/*
 	 * Complete any pending TLB or cache maintenance on this CPU in case
@@ -498,11 +525,8 @@ unsigned long get_wchan(struct task_struct *p)
 	if (!stack_page)
 		return 0;
 
-	frame.fp = thread_saved_fp(p);
-	frame.pc = thread_saved_pc(p);
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	frame.graph = 0;
-#endif
+	start_backtrace(&frame, thread_saved_fp(p), thread_saved_pc(p));
+
 	do {
 		if (unwind_frame(p, &frame))
 			goto out;
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index b21cba90f82d..c4ae647d2306 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -38,12 +38,9 @@ void *return_address(unsigned int level)
 	data.level = level + 2;
 	data.addr = NULL;
 
-	frame.fp = (unsigned long)__builtin_frame_address(0);
-	frame.pc = (unsigned long)return_address; /* dummy */
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	frame.graph = 0;
-#endif
-
+	start_backtrace(&frame,
+			(unsigned long)__builtin_frame_address(0),
+			(unsigned long)return_address);
 	walk_stackframe(current, &frame, save_return_addr, &data);
 
 	if (!data.level)
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 62d395151abe..2b160ae594eb 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -29,9 +29,18 @@
  *	ldp	x29, x30, [sp]
  *	add	sp, sp, #0x10
  */
+
+/*
+ * Unwind from one frame record (A) to the next frame record (B).
+ *
+ * We terminate early if the location of B indicates a malformed chain of frame
+ * records (e.g. a cycle), determined based on the location and fp value of A
+ * and the location (but not the fp value) of B.
+ */
 int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 {
 	unsigned long fp = frame->fp;
+	struct stack_info info;
 
 	if (fp & 0xf)
 		return -EINVAL;
@@ -39,11 +48,40 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	if (!tsk)
 		tsk = current;
 
-	if (!on_accessible_stack(tsk, fp, NULL))
+	if (!on_accessible_stack(tsk, fp, &info))
+		return -EINVAL;
+
+	if (test_bit(info.type, frame->stacks_done))
 		return -EINVAL;
 
+	/*
+	 * As stacks grow downward, any valid record on the same stack must be
+	 * at a strictly higher address than the prior record.
+	 *
+	 * Stacks can nest in several valid orders, e.g.
+	 *
+	 * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
+	 * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
+	 *
+	 * ... but the nesting itself is strict. Once we transition from one
+	 * stack to another, it's never valid to unwind back to that first
+	 * stack.
+	 */
+	if (info.type == frame->prev_type) {
+		if (fp <= frame->prev_fp)
+			return -EINVAL;
+	} else {
+		set_bit(frame->prev_type, frame->stacks_done);
+	}
+
+	/*
+	 * Record this frame record's values and location. The prev_fp and
+	 * prev_type are only meaningful to the next unwind_frame() invocation.
+	 */
 	frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
 	frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
+	frame->prev_fp = fp;
+	frame->prev_type = info.type;
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	if (tsk->ret_stack &&
@@ -122,12 +160,7 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
 	data.skip = trace->skip;
 	data.no_sched_functions = 0;
 
-	frame.fp = regs->regs[29];
-	frame.pc = regs->pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	frame.graph = 0;
-#endif
-
+	start_backtrace(&frame, regs->regs[29], regs->pc);
 	walk_stackframe(current, &frame, save_trace, &data);
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_regs);
@@ -146,17 +179,15 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
 	data.no_sched_functions = nosched;
 
 	if (tsk != current) {
-		frame.fp = thread_saved_fp(tsk);
-		frame.pc = thread_saved_pc(tsk);
+		start_backtrace(&frame, thread_saved_fp(tsk),
+				thread_saved_pc(tsk));
 	} else {
 		/* We don't want this function nor the caller */
 		data.skip += 2;
-		frame.fp = (unsigned long)__builtin_frame_address(0);
-		frame.pc = (unsigned long)__save_stack_trace;
+		start_backtrace(&frame,
+				(unsigned long)__builtin_frame_address(0),
+				(unsigned long)__save_stack_trace);
 	}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	frame.graph = 0;
-#endif
 
 	walk_stackframe(tsk, &frame, save_trace, &data);
 
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 9f25aedeac9d..0b2946414dc9 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -38,11 +38,8 @@ unsigned long profile_pc(struct pt_regs *regs)
 	if (!in_lock_functions(regs->pc))
 		return regs->pc;
 
-	frame.fp = regs->regs[29];
-	frame.pc = regs->pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	frame.graph = 0;
-#endif
+	start_backtrace(&frame, regs->regs[29], regs->pc);
+
 	do {
 		int ret = unwind_frame(NULL, &frame);
 		if (ret < 0)
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8c03456dade6..d3313797cca9 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -100,18 +100,17 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 		return;
 
 	if (tsk == current) {
-		frame.fp = (unsigned long)__builtin_frame_address(0);
-		frame.pc = (unsigned long)dump_backtrace;
+		start_backtrace(&frame,
+				(unsigned long)__builtin_frame_address(0),
+				(unsigned long)dump_backtrace);
 	} else {
 		/*
 		 * task blocked in __switch_to
 		 */
-		frame.fp = thread_saved_fp(tsk);
-		frame.pc = thread_saved_pc(tsk);
+		start_backtrace(&frame,
+				thread_saved_fp(tsk),
+				thread_saved_pc(tsk));
 	}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	frame.graph = 0;
-#endif
 
 	printk("Call trace:\n");
 	do {
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 4ab863045188..dd2514bb1511 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -32,10 +32,10 @@ UBSAN_SANITIZE			:= n
 OBJECT_FILES_NON_STANDARD	:= y
 KCOV_INSTRUMENT			:= n
 
-ifeq ($(c-gettimeofday-y),)
 CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
-else
-CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y)
+
+ifneq ($(c-gettimeofday-y),)
+  CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
 endif
 
 # Clang versions less than 8 do not support -mcmodel=tiny
@@ -57,8 +57,7 @@ $(obj)/vdso.o : $(obj)/vdso.so
 
 # Link rule for the .so file, .lds has to be first
 $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
-	$(call if_changed,ld)
-	$(call if_changed,vdso_check)
+	$(call if_changed,vdsold_and_vdso_check)
 
 # Strip rule for the .so file
 $(obj)/%.so: OBJCOPYFLAGS := -S
@@ -74,8 +73,8 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
 	$(call if_changed,vdsosym)
 
 # Actual build commands
-quiet_cmd_vdsocc = VDSOCC   $@
-      cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $<
+quiet_cmd_vdsold_and_vdso_check = LD      $@
+      cmd_vdsold_and_vdso_check = $(cmd_ld); $(cmd_vdso_check)
 
 # Install commands for the unstripped file
 quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 288c14d30b45..1fba0776ed40 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -96,8 +96,8 @@ VDSO_LDFLAGS := $(VDSO_CPPFLAGS)
 VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1
 VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
 VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft
-VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--hash-style=sysv)
-VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--build-id)
+VDSO_LDFLAGS += -Wl,--hash-style=sysv
+VDSO_LDFLAGS += -Wl,--build-id
 VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd)
 
 
@@ -144,8 +144,7 @@ $(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
 
 # Link rule for the .so file, .lds has to be first
 $(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE
-	$(call if_changed,vdsold)
-	$(call if_changed,vdso_check)
+	$(call if_changed,vdsold_and_vdso_check)
 
 # Compilation rules for the vDSO sources
 $(c-obj-vdso): %.o: %.c FORCE
@@ -156,14 +155,17 @@ $(asm-obj-vdso): %.o: %.S FORCE
 	$(call if_changed_dep,vdsoas)
 
 # Actual build commands
-quiet_cmd_vdsold = VDSOL   $@
+quiet_cmd_vdsold_and_vdso_check = LD32    $@
+      cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check)
+
+quiet_cmd_vdsold = LD32    $@
       cmd_vdsold = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \
                    -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
-quiet_cmd_vdsocc = VDSOC   $@
+quiet_cmd_vdsocc = CC32    $@
       cmd_vdsocc = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $<
-quiet_cmd_vdsocc_gettimeofday = VDSOC_GTD   $@
+quiet_cmd_vdsocc_gettimeofday = CC32    $@
       cmd_vdsocc_gettimeofday = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $<
-quiet_cmd_vdsoas = VDSOA   $@
+quiet_cmd_vdsoas = AS32    $@
       cmd_vdsoas = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $<
 
 quiet_cmd_vdsomunge = MUNGE   $@
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index 3c9e1bd9a3e9..d6544dc71258 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -11,6 +11,8 @@
 #include <asm/mem-layout.h>
 #include <asm/atomic.h>
 
+#include <asm-generic/pgalloc.h>	/* for pte_{alloc,free}_one */
+
 #define check_pgt_cache() do {} while (0)
 
 extern unsigned long long kmap_generation;
@@ -46,38 +48,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long) pgd);
 }
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
-	struct page *pte;
-
-	pte = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (!pte)
-		return NULL;
-	if (!pgtable_page_ctor(pte)) {
-		__free_page(pte);
-		return NULL;
-	}
-	return pte;
-}
-
-/* _kernel variant gets to use a different allocator */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
-	return (pte_t *) __get_free_page(flags);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
-	pgtable_page_dtor(pte);
-	__free_page(pte);
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-	free_page((unsigned long)pte);
-}
-
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 				pgtable_t pte)
 {
diff --git a/arch/parisc/include/asm/kprobes.h b/arch/parisc/include/asm/kprobes.h
index e09cf2deeafe..904034da4974 100644
--- a/arch/parisc/include/asm/kprobes.h
+++ b/arch/parisc/include/asm/kprobes.h
@@ -50,6 +50,10 @@ struct kprobe_ctlblk {
 
 int __kprobes parisc_kprobe_break_handler(struct pt_regs *regs);
 int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs);
+static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	return 0;
+}
 
 #endif /* CONFIG_KPROBES */
 #endif /* _PARISC_KPROBES_H */
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index ba67893a1d72..df46b0e5a915 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -63,7 +63,7 @@ ENTRY_CFI(flush_tlb_all_local)
 
 	/* Flush Instruction Tlb */
 
-	LDREG		ITLB_SID_BASE(%r1), %r20
+88:	LDREG		ITLB_SID_BASE(%r1), %r20
 	LDREG		ITLB_SID_STRIDE(%r1), %r21
 	LDREG		ITLB_SID_COUNT(%r1), %r22
 	LDREG		ITLB_OFF_BASE(%r1), %arg0
@@ -103,6 +103,7 @@ fitonemiddle:					/* Loop if LOOP = 1 */
 	add		%r21, %r20, %r20		/* increment space */
 
 fitdone:
+	ALTERNATIVE(88b, fitdone, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
 
 	/* Flush Data Tlb */
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d8dcd8820369..77f6ebf97113 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -121,6 +121,7 @@ config PPC
 	select ARCH_32BIT_OFF_T if PPC32
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
+	select ARCH_HAS_DMA_MMAP_PGPROT
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 463c63a9fcf1..11112023e327 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -302,9 +302,14 @@
 #define H_SCM_UNBIND_MEM        0x3F0
 #define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4
 #define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8
-#define H_SCM_MEM_QUERY	        0x3FC
-#define H_SCM_BLOCK_CLEAR       0x400
-#define MAX_HCALL_OPCODE	H_SCM_BLOCK_CLEAR
+#define H_SCM_UNBIND_ALL        0x3FC
+#define H_SCM_HEALTH            0x400
+#define H_SCM_PERFORMANCE_STATS 0x418
+#define MAX_HCALL_OPCODE	H_SCM_PERFORMANCE_STATS
+
+/* Scope args for H_SCM_UNBIND_ALL */
+#define H_UNBIND_SCOPE_ALL (0x1)
+#define H_UNBIND_SCOPE_DRC (0x2)
 
 /* H_VIOCTL functions */
 #define H_GET_VIOA_DUMP_SIZE	0x01
diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h
index dc9a1ca70edf..c6bbe9778d3c 100644
--- a/arch/powerpc/include/asm/pmc.h
+++ b/arch/powerpc/include/asm/pmc.h
@@ -27,11 +27,10 @@ static inline void ppc_set_pmu_inuse(int inuse)
 #ifdef CONFIG_PPC_PSERIES
 		get_lppaca()->pmcregs_in_use = inuse;
 #endif
-	} else {
+	}
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-		get_paca()->pmcregs_in_use = inuse;
+	get_paca()->pmcregs_in_use = inuse;
 #endif
-	}
 #endif
 }
 
diff --git a/arch/powerpc/include/uapi/asm/kvm_para.h b/arch/powerpc/include/uapi/asm/kvm_para.h
index 01555c6ae0f5..be48c2215fa2 100644
--- a/arch/powerpc/include/uapi/asm/kvm_para.h
+++ b/arch/powerpc/include/uapi/asm/kvm_para.h
@@ -31,7 +31,7 @@
  * Struct fields are always 32 or 64 bit aligned, depending on them being 32
  * or 64 bit wide respectively.
  *
- * See Documentation/virtual/kvm/ppc-pv.txt
+ * See Documentation/virt/kvm/ppc-pv.txt
  */
 struct kvm_vcpu_arch_shared {
 	__u64 scratch1;
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 56dfa7a2a6f2..ea0c69236789 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -49,7 +49,8 @@ obj-y				:= cputable.o ptrace.o syscalls.o \
 				   signal.o sysfs.o cacheinfo.o time.o \
 				   prom.o traps.o setup-common.o \
 				   udbg.o misc.o io.o misc_$(BITS).o \
-				   of_platform.o prom_parse.o
+				   of_platform.o prom_parse.o \
+				   dma-common.o
 obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
 				   signal_64.o ptrace32.o \
 				   paca.o nvram_64.o firmware.o
diff --git a/arch/powerpc/kernel/dma-common.c b/arch/powerpc/kernel/dma-common.c
new file mode 100644
index 000000000000..dc7ef6b17b69
--- /dev/null
+++ b/arch/powerpc/kernel/dma-common.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Contains common dma routines for all powerpc platforms.
+ *
+ * Copyright (C) 2019 Shawn Anastasio.
+ */
+
+#include <linux/mm.h>
+#include <linux/dma-noncoherent.h>
+
+pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
+		unsigned long attrs)
+{
+	if (!dev_is_dma_coherent(dev))
+		return pgprot_noncached(prot);
+	return prot;
+}
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index eee5bef736c8..6ba3cc2ef8ab 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1531,7 +1531,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
  *
  * Call convention:
  *
- * syscall register convention is in Documentation/powerpc/syscall64-abi.txt
+ * syscall register convention is in Documentation/powerpc/syscall64-abi.rst
  *
  * For hypercalls, the register convention is as follows:
  * r0 volatile
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index f50b708d6d77..98600b276f76 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1198,6 +1198,9 @@ SYSCALL_DEFINE0(rt_sigreturn)
 			goto bad;
 
 		if (MSR_TM_ACTIVE(msr_hi<<32)) {
+			/* Trying to start TM on non TM system */
+			if (!cpu_has_feature(CPU_FTR_TM))
+				goto bad;
 			/* We only recheckpoint on return if we're
 			 * transaction.
 			 */
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 2f80e270c7b0..117515564ec7 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -771,6 +771,11 @@ SYSCALL_DEFINE0(rt_sigreturn)
 	if (MSR_TM_ACTIVE(msr)) {
 		/* We recheckpoint on return. */
 		struct ucontext __user *uc_transact;
+
+		/* Trying to start TM on non TM system */
+		if (!cpu_has_feature(CPU_FTR_TM))
+			goto badframe;
+
 		if (__get_user(uc_transact, &uc->uc_link))
 			goto badframe;
 		if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ec1804f822af..cde3f5a4b3e4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3569,9 +3569,18 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
 
 	if (kvmhv_on_pseries()) {
+		/*
+		 * We need to save and restore the guest visible part of the
+		 * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
+		 * doesn't do this for us. Note only required if pseries since
+		 * this is done in kvmhv_load_hv_regs_and_go() below otherwise.
+		 */
+		unsigned long host_psscr;
 		/* call our hypervisor to load up HV regs and go */
 		struct hv_guest_state hvregs;
 
+		host_psscr = mfspr(SPRN_PSSCR_PR);
+		mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
 		kvmhv_save_hv_regs(vcpu, &hvregs);
 		hvregs.lpcr = lpcr;
 		vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
@@ -3590,6 +3599,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 		vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
 		vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
 		vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+		vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
+		mtspr(SPRN_PSSCR_PR, host_psscr);
 
 		/* H_CEDE has to be handled now, not later */
 		if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
@@ -3654,6 +3665,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 		vcpu->arch.vpa.dirty = 1;
 		save_pmu = lp->pmcregs_in_use;
 	}
+	/* Must save pmu if this guest is capable of running nested guests */
+	save_pmu |= nesting_enabled(vcpu->kvm);
 
 	kvmhv_save_guest_pmu(vcpu, save_pmu);
 
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 6ca0d7376a9f..e3ba67095895 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1986,10 +1986,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
 
 	xive->single_escalation = xive_native_has_single_escalation();
 
-	if (ret) {
-		kfree(xive);
+	if (ret)
 		return ret;
-	}
 
 	return 0;
 }
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 5596c8ec221a..a998823f68a3 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -1090,9 +1090,9 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
 	xive->ops = &kvmppc_xive_native_ops;
 
 	if (ret)
-		kfree(xive);
+		return ret;
 
-	return ret;
+	return 0;
 }
 
 /*
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 9a5963e07a82..b8ad14bb1170 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1899,11 +1899,20 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
 	 *
 	 * For guests on platforms before POWER9, we clamp the it limit to 1G
 	 * to avoid some funky things such as RTAS bugs etc...
+	 *
+	 * On POWER9 we limit to 1TB in case the host erroneously told us that
+	 * the RMA was >1TB. Effective address bits 0:23 are treated as zero
+	 * (meaning the access is aliased to zero i.e. addr = addr % 1TB)
+	 * for virtual real mode addressing and so it doesn't make sense to
+	 * have an area larger than 1TB as it can't be addressed.
 	 */
 	if (!early_cpu_has_feature(CPU_FTR_HVMODE)) {
 		ppc64_rma_size = first_memblock_size;
 		if (!early_cpu_has_feature(CPU_FTR_ARCH_300))
 			ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000);
+		else
+			ppc64_rma_size = min_t(u64, ppc64_rma_size,
+					       1UL << SID_SHIFT_1T);
 
 		/* Finally limit subsequent allocations */
 		memblock_set_current_limit(ppc64_rma_size);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 9259337d7374..9191a66b3bc5 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -239,7 +239,7 @@ void __init paging_init(void)
 
 #ifdef CONFIG_ZONE_DMA
 	max_zone_pfns[ZONE_DMA]	= min(max_low_pfn,
-			((1UL << ARCH_ZONE_DMA_BITS) - 1) >> PAGE_SHIFT);
+				      1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT));
 #endif
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 #ifdef CONFIG_HIGHMEM
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index c8ec670ee924..2c07908359b2 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/libnvdimm.h>
 #include <linux/platform_device.h>
+#include <linux/delay.h>
 
 #include <asm/plpar_wrappers.h>
 
@@ -43,8 +44,9 @@ struct papr_scm_priv {
 static int drc_pmem_bind(struct papr_scm_priv *p)
 {
 	unsigned long ret[PLPAR_HCALL_BUFSIZE];
-	uint64_t rc, token;
 	uint64_t saved = 0;
+	uint64_t token;
+	int64_t rc;
 
 	/*
 	 * When the hypervisor cannot map all the requested memory in a single
@@ -64,6 +66,10 @@ static int drc_pmem_bind(struct papr_scm_priv *p)
 	} while (rc == H_BUSY);
 
 	if (rc) {
+		/* H_OVERLAP needs a separate error path */
+		if (rc == H_OVERLAP)
+			return -EBUSY;
+
 		dev_err(&p->pdev->dev, "bind err: %lld\n", rc);
 		return -ENXIO;
 	}
@@ -78,22 +84,36 @@ static int drc_pmem_bind(struct papr_scm_priv *p)
 static int drc_pmem_unbind(struct papr_scm_priv *p)
 {
 	unsigned long ret[PLPAR_HCALL_BUFSIZE];
-	uint64_t rc, token;
+	uint64_t token = 0;
+	int64_t rc;
 
-	token = 0;
+	dev_dbg(&p->pdev->dev, "unbind drc %x\n", p->drc_index);
 
-	/* NB: unbind has the same retry requirements mentioned above */
+	/* NB: unbind has the same retry requirements as drc_pmem_bind() */
 	do {
-		rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index,
-				p->bound_addr, p->blocks, token);
+
+		/* Unbind of all SCM resources associated with drcIndex */
+		rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC,
+				 p->drc_index, token);
 		token = ret[0];
-		cond_resched();
+
+		/* Check if we are stalled for some time */
+		if (H_IS_LONG_BUSY(rc)) {
+			msleep(get_longbusy_msecs(rc));
+			rc = H_BUSY;
+		} else if (rc == H_BUSY) {
+			cond_resched();
+		}
+
 	} while (rc == H_BUSY);
 
 	if (rc)
 		dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
+	else
+		dev_dbg(&p->pdev->dev, "unbind drc %x complete\n",
+			p->drc_index);
 
-	return !!rc;
+	return rc == H_SUCCESS ? 0 : -ENXIO;
 }
 
 static int papr_scm_meta_get(struct papr_scm_priv *p,
@@ -389,6 +409,14 @@ static int papr_scm_probe(struct platform_device *pdev)
 
 	/* request the hypervisor to bind this region to somewhere in memory */
 	rc = drc_pmem_bind(p);
+
+	/* If phyp says drc memory still bound then force unbound and retry */
+	if (rc == -EBUSY) {
+		dev_warn(&pdev->dev, "Retrying bind after unbinding\n");
+		drc_pmem_unbind(p);
+		rc = drc_pmem_bind(p);
+	}
+
 	if (rc)
 		goto err;
 
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 082c7e1c20f0..1cdb39575eae 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -479,7 +479,7 @@ static int xive_find_target_in_mask(const struct cpumask *mask,
 	 * Now go through the entire mask until we find a valid
 	 * target.
 	 */
-	for (;;) {
+	do {
 		/*
 		 * We re-check online as the fallback case passes us
 		 * an untested affinity mask
@@ -487,12 +487,11 @@ static int xive_find_target_in_mask(const struct cpumask *mask,
 		if (cpu_online(cpu) && xive_try_pick_target(cpu))
 			return cpu;
 		cpu = cpumask_next(cpu, mask);
-		if (cpu == first)
-			break;
 		/* Wrap around */
 		if (cpu >= nr_cpu_ids)
 			cpu = cpumask_first(mask);
-	}
+	} while (cpu != first);
+
 	return -1;
 }
 
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index f8b3b07e4247..7a117be8297c 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -34,8 +34,6 @@ else
 	KBUILD_LDFLAGS += -melf32lriscv
 endif
 
-KBUILD_CFLAGS += -Wall
-
 # ISA string setting
 riscv-march-$(CONFIG_ARCH_RV32I)	:= rv32ima
 riscv-march-$(CONFIG_ARCH_RV64I)	:= rv64ima
diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
index 40983491b95f..9bf63f0ab253 100644
--- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
@@ -217,5 +217,20 @@
 			#size-cells = <0>;
 			status = "disabled";
 		};
+		eth0: ethernet@10090000 {
+			compatible = "sifive,fu540-c000-gem";
+			interrupt-parent = <&plic0>;
+			interrupts = <53>;
+			reg = <0x0 0x10090000 0x0 0x2000
+			       0x0 0x100a0000 0x0 0x1000>;
+			local-mac-address = [00 00 00 00 00 00];
+			clock-names = "pclk", "hclk";
+			clocks = <&prci PRCI_CLK_GEMGXLPLL>,
+				 <&prci PRCI_CLK_GEMGXLPLL>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
 	};
 };
diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
index 0b55c53c08c7..93d68cbd64fe 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
@@ -76,3 +76,12 @@
 		disable-wp;
 	};
 };
+
+&eth0 {
+	status = "okay";
+	phy-mode = "gmii";
+	phy-handle = <&phy0>;
+	phy0: ethernet-phy@0 {
+		reg = <0>;
+	};
+};
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 1efaeddf1e4b..16970f246860 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -22,6 +22,7 @@ generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mm-arch-hooks.h
+generic-y += msi.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += sections.h
diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h
index 0e2eeeb1fd27..13ce76cc5aff 100644
--- a/arch/riscv/include/uapi/asm/unistd.h
+++ b/arch/riscv/include/uapi/asm/unistd.h
@@ -18,6 +18,7 @@
 #ifdef __LP64__
 #define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_SET_GET_RLIMIT
+#define __ARCH_WANT_SYS_CLONE3
 #endif /* __LP64__ */
 
 #include <asm-generic/unistd.h>
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 5d8570ed6cab..a4ad2733eedf 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -189,6 +189,7 @@ config S390
 	select VIRT_CPU_ACCOUNTING
 	select ARCH_HAS_SCALED_CPUTIME
 	select HAVE_NMI
+	select ARCH_HAS_FORCE_DMA_UNENCRYPTED
 	select SWIOTLB
 	select GENERIC_ALLOCATOR
 
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 9dde4d7d8704..b5fd6e85657c 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1224,28 +1224,11 @@ no_timer:
 
 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
 {
-	/*
-	 * We cannot move this into the if, as the CPU might be already
-	 * in kvm_vcpu_block without having the waitqueue set (polling)
-	 */
 	vcpu->valid_wakeup = true;
+	kvm_vcpu_wake_up(vcpu);
+
 	/*
-	 * This is mostly to document, that the read in swait_active could
-	 * be moved before other stores, leading to subtle races.
-	 * All current users do not store or use an atomic like update
-	 */
-	smp_mb__after_atomic();
-	if (swait_active(&vcpu->wq)) {
-		/*
-		 * The vcpu gave up the cpu voluntarily, mark it as a good
-		 * yield-candidate.
-		 */
-		vcpu->preempted = true;
-		swake_up_one(&vcpu->wq);
-		vcpu->stat.halt_wakeup++;
-	}
-	/*
-	 * The VCPU might not be sleeping but is executing the VSIE. Let's
+	 * The VCPU might not be sleeping but rather executing VSIE. Let's
 	 * kick it, so it leaves the SIE to process the request.
 	 */
 	kvm_s390_vsie_kick(vcpu);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 4e5bbe328594..20340a03ad90 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -30,7 +30,7 @@
 #include <linux/export.h>
 #include <linux/cma.h>
 #include <linux/gfp.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
 #include <asm/processor.h>
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
@@ -161,6 +161,11 @@ bool sev_active(void)
 	return is_prot_virt_guest();
 }
 
+bool force_dma_unencrypted(struct device *dev)
+{
+	return sev_active();
+}
+
 /* protected virtualization */
 static void pv_init(void)
 {
diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile
index 5a9e4e1f9f81..324a23947585 100644
--- a/arch/sparc/vdso/Makefile
+++ b/arch/sparc/vdso/Makefile
@@ -115,8 +115,7 @@ quiet_cmd_vdso = VDSO    $@
 		       -T $(filter %.lds,$^) $(filter %.o,$^) && \
 		sh $(srctree)/$(src)/checkundef.sh '$(OBJDUMP)' '$@'
 
-VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \
-	$(call ld-option, --build-id) -Bsymbolic
+VDSO_LDFLAGS = -shared --hash-style=both --build-id -Bsymbolic
 GCOV_PROFILE := n
 
 #
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 78772870facd..222855cc0158 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1526,6 +1526,7 @@ config AMD_MEM_ENCRYPT
 	depends on X86_64 && CPU_SUP_AMD
 	select DYNAMIC_PHYSICAL_MASK
 	select ARCH_USE_MEMREMAP_PROT
+	select ARCH_HAS_FORCE_DMA_UNENCRYPTED
 	---help---
 	  Say yes to enable support for the encryption of system memory.
 	  This requires an AMD processor that supports Secure Memory
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 220d1279d0e2..d6662fdef300 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -384,14 +384,11 @@ struct boot_params *make_boot_params(struct efi_config *c)
 	struct apm_bios_info *bi;
 	struct setup_header *hdr;
 	efi_loaded_image_t *image;
-	void *options, *handle;
+	void *handle;
 	efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
 	int options_size = 0;
 	efi_status_t status;
 	char *cmdline_ptr;
-	u16 *s2;
-	u8 *s1;
-	int i;
 	unsigned long ramdisk_addr;
 	unsigned long ramdisk_size;
 
@@ -494,8 +491,6 @@ static void add_e820ext(struct boot_params *params,
 			struct setup_data *e820ext, u32 nr_entries)
 {
 	struct setup_data *data;
-	efi_status_t status;
-	unsigned long size;
 
 	e820ext->type = SETUP_E820_EXT;
 	e820ext->len  = nr_entries * sizeof(struct boot_e820_entry);
@@ -677,8 +672,6 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
 				   void *priv)
 {
 	const char *signature;
-	__u32 nr_desc;
-	efi_status_t status;
 	struct exit_boot_struct *p = priv;
 
 	signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE
@@ -747,7 +740,6 @@ struct boot_params *
 efi_main(struct efi_config *c, struct boot_params *boot_params)
 {
 	struct desc_ptr *gdt = NULL;
-	efi_loaded_image_t *image;
 	struct setup_header *hdr = &boot_params->hdr;
 	efi_status_t status;
 	struct desc_struct *desc;
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 24e65a0f756d..53ac0cb2396d 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -17,6 +17,7 @@
 #include "pgtable.h"
 #include "../string.h"
 #include "../voffset.h"
+#include <asm/bootparam_utils.h>
 
 /*
  * WARNING!!
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index d2f184165934..c8181392f70d 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -23,7 +23,6 @@
 #include <asm/page.h>
 #include <asm/boot.h>
 #include <asm/bootparam.h>
-#include <asm/bootparam_utils.h>
 
 #define BOOT_CTYPE_H
 #include <linux/acpi.h>
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index f8debf7aeb4c..5f2d03067ae5 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -40,7 +40,6 @@ int cmdline_find_option_bool(const char *option);
 static unsigned long find_trampoline_placement(void)
 {
 	unsigned long bios_start = 0, ebda_start = 0;
-	unsigned long trampoline_start;
 	struct boot_e820_entry *entry;
 	char *signature;
 	int i;
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 9f1f9e3b8230..830bd984182b 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -343,3 +343,9 @@ For 32-bit we have the following conventions - kernel is built with
 .Lafter_call_\@:
 #endif
 .endm
+
+#ifdef CONFIG_PARAVIRT_XXL
+#define GET_CR2_INTO(reg) GET_CR2_INTO_AX ; _ASM_MOV %_ASM_AX, reg
+#else
+#define GET_CR2_INTO(reg) _ASM_MOV %cr2, reg
+#endif
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 90b473297299..2bb986f305ac 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -294,9 +294,11 @@
 .Lfinished_frame_\@:
 .endm
 
-.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
+.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0
 	cld
+.if \skip_gs == 0
 	PUSH_GS
+.endif
 	FIXUP_FRAME
 	pushl	%fs
 	pushl	%es
@@ -313,13 +315,13 @@
 	movl	%edx, %es
 	movl	$(__KERNEL_PERCPU), %edx
 	movl	%edx, %fs
+.if \skip_gs == 0
 	SET_KERNEL_GS %edx
-
+.endif
 	/* Switch to kernel stack if necessary */
 .if \switch_stacks > 0
 	SWITCH_TO_KERNEL_STACK
 .endif
-
 .endm
 
 .macro SAVE_ALL_NMI cr3_reg:req
@@ -1441,39 +1443,46 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
 
 ENTRY(page_fault)
 	ASM_CLAC
-	pushl	$do_page_fault
-	ALIGN
-	jmp common_exception
+	pushl	$0; /* %gs's slot on the stack */
+
+	SAVE_ALL switch_stacks=1 skip_gs=1
+
+	ENCODE_FRAME_POINTER
+	UNWIND_ESPFIX_STACK
+
+	/* fixup %gs */
+	GS_TO_REG %ecx
+	REG_TO_PTGS %ecx
+	SET_KERNEL_GS %ecx
+
+	GET_CR2_INTO(%ecx)			# might clobber %eax
+
+	/* fixup orig %eax */
+	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
+	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
+
+	TRACE_IRQS_OFF
+	movl	%esp, %eax			# pt_regs pointer
+	call	do_page_fault
+	jmp	ret_from_exception
 END(page_fault)
 
 common_exception:
 	/* the function address is in %gs's slot on the stack */
-	FIXUP_FRAME
-	pushl	%fs
-	pushl	%es
-	pushl	%ds
-	pushl	%eax
-	movl	$(__USER_DS), %eax
-	movl	%eax, %ds
-	movl	%eax, %es
-	movl	$(__KERNEL_PERCPU), %eax
-	movl	%eax, %fs
-	pushl	%ebp
-	pushl	%edi
-	pushl	%esi
-	pushl	%edx
-	pushl	%ecx
-	pushl	%ebx
-	SWITCH_TO_KERNEL_STACK
+	SAVE_ALL switch_stacks=1 skip_gs=1
 	ENCODE_FRAME_POINTER
-	cld
 	UNWIND_ESPFIX_STACK
+
+	/* fixup %gs */
 	GS_TO_REG %ecx
 	movl	PT_GS(%esp), %edi		# get the function address
-	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
-	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
 	REG_TO_PTGS %ecx
 	SET_KERNEL_GS %ecx
+
+	/* fixup orig %eax */
+	movl	PT_ORIG_EAX(%esp), %edx		# get the error code
+	movl	$-1, PT_ORIG_EAX(%esp)		# no syscall to restart
+
 	TRACE_IRQS_OFF
 	movl	%esp, %eax			# pt_regs pointer
 	CALL_NOSPEC %edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 35a66fcfcb91..3f5a978a02a7 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -864,18 +864,84 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
  */
 #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
 
+.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0
+
+	.if \paranoid
+	call	paranoid_entry
+	/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
+	.else
+	call	error_entry
+	.endif
+	UNWIND_HINT_REGS
+
+	.if \read_cr2
+	/*
+	 * Store CR2 early so subsequent faults cannot clobber it. Use R12 as
+	 * intermediate storage as RDX can be clobbered in enter_from_user_mode().
+	 * GET_CR2_INTO can clobber RAX.
+	 */
+	GET_CR2_INTO(%r12);
+	.endif
+
+	.if \shift_ist != -1
+	TRACE_IRQS_OFF_DEBUG			/* reload IDT in case of recursion */
+	.else
+	TRACE_IRQS_OFF
+	.endif
+
+	.if \paranoid == 0
+	testb	$3, CS(%rsp)
+	jz	.Lfrom_kernel_no_context_tracking_\@
+	CALL_enter_from_user_mode
+.Lfrom_kernel_no_context_tracking_\@:
+	.endif
+
+	movq	%rsp, %rdi			/* pt_regs pointer */
+
+	.if \has_error_code
+	movq	ORIG_RAX(%rsp), %rsi		/* get error code */
+	movq	$-1, ORIG_RAX(%rsp)		/* no syscall to restart */
+	.else
+	xorl	%esi, %esi			/* no error code */
+	.endif
+
+	.if \shift_ist != -1
+	subq	$\ist_offset, CPU_TSS_IST(\shift_ist)
+	.endif
+
+	.if \read_cr2
+	movq	%r12, %rdx			/* Move CR2 into 3rd argument */
+	.endif
+
+	call	\do_sym
+
+	.if \shift_ist != -1
+	addq	$\ist_offset, CPU_TSS_IST(\shift_ist)
+	.endif
+
+	.if \paranoid
+	/* this procedure expect "no swapgs" flag in ebx */
+	jmp	paranoid_exit
+	.else
+	jmp	error_exit
+	.endif
+
+.endm
+
 /**
  * idtentry - Generate an IDT entry stub
  * @sym:		Name of the generated entry point
- * @do_sym: 		C function to be called
- * @has_error_code: 	True if this IDT vector has an error code on the stack
- * @paranoid: 		non-zero means that this vector may be invoked from
+ * @do_sym:		C function to be called
+ * @has_error_code:	True if this IDT vector has an error code on the stack
+ * @paranoid:		non-zero means that this vector may be invoked from
  *			kernel mode with user GSBASE and/or user CR3.
  *			2 is special -- see below.
  * @shift_ist:		Set to an IST index if entries from kernel mode should
- *             		decrement the IST stack so that nested entries get a
+ *			decrement the IST stack so that nested entries get a
  *			fresh stack.  (This is for #DB, which has a nasty habit
- *             		of recursing.)
+ *			of recursing.)
+ * @create_gap:		create a 6-word stack gap when coming from kernel mode.
+ * @read_cr2:		load CR2 into the 3rd argument; done before calling any C code
  *
  * idtentry generates an IDT stub that sets up a usable kernel context,
  * creates struct pt_regs, and calls @do_sym.  The stub has the following
@@ -900,15 +966,19 @@ apicinterrupt IRQ_WORK_VECTOR			irq_work_interrupt		smp_irq_work_interrupt
  * @paranoid == 2 is special: the stub will never switch stacks.  This is for
  * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
  */
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0
 ENTRY(\sym)
 	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
 
 	/* Sanity check */
-	.if \shift_ist != -1 && \paranoid == 0
+	.if \shift_ist != -1 && \paranoid != 1
 	.error "using shift_ist requires paranoid=1"
 	.endif
 
+	.if \create_gap && \paranoid
+	.error "using create_gap requires paranoid=0"
+	.endif
+
 	ASM_CLAC
 
 	.if \has_error_code == 0
@@ -934,47 +1004,7 @@ ENTRY(\sym)
 .Lfrom_usermode_no_gap_\@:
 	.endif
 
-	.if \paranoid
-	call	paranoid_entry
-	.else
-	call	error_entry
-	.endif
-	UNWIND_HINT_REGS
-	/* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
-
-	.if \paranoid
-	.if \shift_ist != -1
-	TRACE_IRQS_OFF_DEBUG			/* reload IDT in case of recursion */
-	.else
-	TRACE_IRQS_OFF
-	.endif
-	.endif
-
-	movq	%rsp, %rdi			/* pt_regs pointer */
-
-	.if \has_error_code
-	movq	ORIG_RAX(%rsp), %rsi		/* get error code */
-	movq	$-1, ORIG_RAX(%rsp)		/* no syscall to restart */
-	.else
-	xorl	%esi, %esi			/* no error code */
-	.endif
-
-	.if \shift_ist != -1
-	subq	$\ist_offset, CPU_TSS_IST(\shift_ist)
-	.endif
-
-	call	\do_sym
-
-	.if \shift_ist != -1
-	addq	$\ist_offset, CPU_TSS_IST(\shift_ist)
-	.endif
-
-	/* these procedures expect "no swapgs" flag in ebx */
-	.if \paranoid
-	jmp	paranoid_exit
-	.else
-	jmp	error_exit
-	.endif
+	idtentry_part \do_sym, \has_error_code, \read_cr2, \paranoid, \shift_ist, \ist_offset
 
 	.if \paranoid == 1
 	/*
@@ -983,21 +1013,9 @@ ENTRY(\sym)
 	 * run in real process context if user_mode(regs).
 	 */
 .Lfrom_usermode_switch_stack_\@:
-	call	error_entry
-
-	movq	%rsp, %rdi			/* pt_regs pointer */
-
-	.if \has_error_code
-	movq	ORIG_RAX(%rsp), %rsi		/* get error code */
-	movq	$-1, ORIG_RAX(%rsp)		/* no syscall to restart */
-	.else
-	xorl	%esi, %esi			/* no error code */
+	idtentry_part \do_sym, \has_error_code, \read_cr2, paranoid=0
 	.endif
 
-	call	\do_sym
-
-	jmp	error_exit
-	.endif
 _ASM_NOKPROBE(\sym)
 END(\sym)
 .endm
@@ -1007,7 +1025,7 @@ idtentry overflow			do_overflow			has_error_code=0
 idtentry bounds				do_bounds			has_error_code=0
 idtentry invalid_op			do_invalid_op			has_error_code=0
 idtentry device_not_available		do_device_not_available		has_error_code=0
-idtentry double_fault			do_double_fault			has_error_code=1 paranoid=2
+idtentry double_fault			do_double_fault			has_error_code=1 paranoid=2 read_cr2=1
 idtentry coprocessor_segment_overrun	do_coprocessor_segment_overrun	has_error_code=0
 idtentry invalid_TSS			do_invalid_TSS			has_error_code=1
 idtentry segment_not_present		do_segment_not_present		has_error_code=1
@@ -1179,10 +1197,10 @@ idtentry xendebug		do_debug		has_error_code=0
 #endif
 
 idtentry general_protection	do_general_protection	has_error_code=1
-idtentry page_fault		do_page_fault		has_error_code=1
+idtentry page_fault		do_page_fault		has_error_code=1	read_cr2=1
 
 #ifdef CONFIG_KVM_GUEST
-idtentry async_page_fault	do_async_page_fault	has_error_code=1
+idtentry async_page_fault	do_async_page_fault	has_error_code=1	read_cr2=1
 #endif
 
 #ifdef CONFIG_X86_MCE
@@ -1281,18 +1299,9 @@ ENTRY(error_entry)
 	movq	%rax, %rsp			/* switch stack */
 	ENCODE_FRAME_POINTER
 	pushq	%r12
-
-	/*
-	 * We need to tell lockdep that IRQs are off.  We can't do this until
-	 * we fix gsbase, and we should do it before enter_from_user_mode
-	 * (which can take locks).
-	 */
-	TRACE_IRQS_OFF
-	CALL_enter_from_user_mode
 	ret
 
 .Lerror_entry_done:
-	TRACE_IRQS_OFF
 	ret
 
 	/*
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index cfdca8b42c70..cc20465b2867 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -12,9 +12,7 @@
 
 	/* rdi:	arg1 ... normal C conventions. rax is saved/restored. */
 	.macro THUNK name, func, put_ret_addr_in_rdi=0
-	.globl \name
-	.type \name, @function
-\name:
+	ENTRY(\name)
 	pushq %rbp
 	movq %rsp, %rbp
 
@@ -35,6 +33,7 @@
 
 	call \func
 	jmp  .L_restore
+	ENDPROC(\name)
 	_ASM_NOKPROBE(\name)
 	.endm
 
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 34773395139a..8df549138193 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -176,9 +176,8 @@ quiet_cmd_vdso = VDSO    $@
 		       -T $(filter %.lds,$^) $(filter %.o,$^) && \
 		 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
 
-VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \
-	$(call ld-option, --build-id) $(call ld-option, --eh-frame-hdr) \
-	-Bsymbolic
+VDSO_LDFLAGS = -shared --hash-style=both --build-id \
+	$(call ld-option, --eh-frame-hdr) -Bsymbolic
 GCOV_PROFILE := n
 
 quiet_cmd_vdso_and_check = VDSO    $@
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 0e033ef11a9f..0d258688c8cf 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -60,8 +60,17 @@ static int hv_cpu_init(unsigned int cpu)
 	if (!hv_vp_assist_page)
 		return 0;
 
-	if (!*hvp)
-		*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
+	/*
+	 * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's Section
+	 * 5.2.1 "GPA Overlay Pages"). Here it must be zeroed out to make sure
+	 * we always write the EOI MSR in hv_apic_eoi_write() *after* the
+	 * EOI optimization is disabled in hv_cpu_die(), otherwise a CPU may
+	 * not be stopped in the case of CPU offlining and the VM will hang.
+	 */
+	if (!*hvp) {
+		*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO,
+				 PAGE_KERNEL);
+	}
 
 	if (*hvp) {
 		u64 val;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 050e5f9ebf81..e647aa095867 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -49,7 +49,7 @@ static inline void generic_apic_probe(void)
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
-extern unsigned int apic_verbosity;
+extern int apic_verbosity;
 extern int local_apic_timer_c2_ok;
 
 extern int disable_apic;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0cc5b611a113..7b0a4ee77313 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -607,15 +607,16 @@ struct kvm_vcpu_arch {
 
 	/*
 	 * QEMU userspace and the guest each have their own FPU state.
-	 * In vcpu_run, we switch between the user, maintained in the
-	 * task_struct struct, and guest FPU contexts. While running a VCPU,
-	 * the VCPU thread will have the guest FPU context.
+	 * In vcpu_run, we switch between the user and guest FPU contexts.
+	 * While running a VCPU, the VCPU thread will have the guest FPU
+	 * context.
 	 *
 	 * Note that while the PKRU state lives inside the fpu registers,
 	 * it is switched out separately at VMENTER and VMEXIT time. The
 	 * "guest_fpu" state here contains the guest FPU context, with the
 	 * host PRKU bits.
 	 */
+	struct fpu *user_fpu;
 	struct fpu *guest_fpu;
 
 	u64 xcr0;
@@ -1496,25 +1497,29 @@ enum {
 #define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
 #define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
 
+asmlinkage void __noreturn kvm_spurious_fault(void);
+
 /*
  * Hardware virtualization extension instructions may fault if a
  * reboot turns off virtualization while processes are running.
- * Trap the fault and ignore the instruction if that happens.
+ * Usually after catching the fault we just panic; during reboot
+ * instead the instruction is ignored.
  */
-asmlinkage void kvm_spurious_fault(void);
-
-#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn)	\
-	"666: " insn "\n\t" \
-	"668: \n\t"                           \
-	".pushsection .fixup, \"ax\" \n" \
-	"667: \n\t" \
-	cleanup_insn "\n\t"		      \
-	"cmpb $0, kvm_rebooting \n\t"	      \
-	"jne 668b \n\t"      		      \
-	__ASM_SIZE(push) " $666b \n\t"	      \
-	"jmp kvm_spurious_fault \n\t"	      \
-	".popsection \n\t" \
-	_ASM_EXTABLE(666b, 667b)
+#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn)		\
+	"666: \n\t"							\
+	insn "\n\t"							\
+	"jmp	668f \n\t"						\
+	"667: \n\t"							\
+	"call	kvm_spurious_fault \n\t"				\
+	"668: \n\t"							\
+	".pushsection .fixup, \"ax\" \n\t"				\
+	"700: \n\t"							\
+	cleanup_insn "\n\t"						\
+	"cmpb	$0, kvm_rebooting\n\t"					\
+	"je	667b \n\t"						\
+	"jmp	668b \n\t"						\
+	".popsection \n\t"						\
+	_ASM_EXTABLE(666b, 700b)
 
 #define __kvm_handle_fault_on_reboot(insn)		\
 	____kvm_handle_fault_on_reboot(insn, "")
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 5ed3cf1c3934..9b4df6eaa11a 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -92,7 +92,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
 void kvm_async_pf_task_wake(u32 token);
 u32 kvm_read_and_reset_pf_reason(void);
 extern void kvm_disable_steal_time(void);
-void do_async_page_fault(struct pt_regs *regs, unsigned long error_code);
+void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
 
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 void __init kvm_spinlock_init(void);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c25c38a05c1c..dce26f1d13e1 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -116,7 +116,7 @@ static inline void write_cr0(unsigned long x)
 
 static inline unsigned long read_cr2(void)
 {
-	return PVOP_CALL0(unsigned long, mmu.read_cr2);
+	return PVOP_CALLEE0(unsigned long, mmu.read_cr2);
 }
 
 static inline void write_cr2(unsigned long x)
@@ -746,6 +746,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
 	    PV_RESTORE_ALL_CALLER_REGS					\
 	    FRAME_END							\
 	    "ret;"							\
+	    ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";"	\
 	    ".popsection")
 
 /* Get a reference to a callee-save function */
@@ -909,13 +910,7 @@ extern void default_banner(void);
 		  ANNOTATE_RETPOLINE_SAFE;				\
 		  call PARA_INDIRECT(pv_ops+PV_CPU_swapgs);		\
 		 )
-#endif
-
-#define GET_CR2_INTO_RAX				\
-	ANNOTATE_RETPOLINE_SAFE;				\
-	call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2);
 
-#ifdef CONFIG_PARAVIRT_XXL
 #define USERGS_SYSRET64							\
 	PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64),			\
 		  ANNOTATE_RETPOLINE_SAFE;				\
@@ -929,9 +924,19 @@ extern void default_banner(void);
 		  call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);	    \
 		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
 #endif
-#endif
+#endif /* CONFIG_PARAVIRT_XXL */
+#endif	/* CONFIG_X86_64 */
+
+#ifdef CONFIG_PARAVIRT_XXL
+
+#define GET_CR2_INTO_AX							\
+	PARA_SITE(PARA_PATCH(PV_MMU_read_cr2),				\
+		  ANNOTATE_RETPOLINE_SAFE;				\
+		  call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2);		\
+		 )
+
+#endif /* CONFIG_PARAVIRT_XXL */
 
-#endif	/* CONFIG_X86_32 */
 
 #endif /* __ASSEMBLY__ */
 #else  /* CONFIG_PARAVIRT */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 946f8f1f1efc..639b2df445ee 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -220,7 +220,7 @@ struct pv_mmu_ops {
 	void (*exit_mmap)(struct mm_struct *mm);
 
 #ifdef CONFIG_PARAVIRT_XXL
-	unsigned long (*read_cr2)(void);
+	struct paravirt_callee_save read_cr2;
 	void (*write_cr2)(unsigned long);
 
 	unsigned long (*read_cr3)(void);
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index f2bd284abc16..b25e633033c3 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -74,14 +74,14 @@ dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);
 dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
 dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code);
 #ifdef CONFIG_X86_64
-dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code);
+dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long address);
 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
 asmlinkage __visible notrace
 struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
 void __init trap_init(void);
 #endif
 dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code);
-dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code);
+dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
 dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code);
 dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code);
 dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code);
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index e901b0ab116f..503d3f42da16 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -435,9 +435,12 @@ struct kvm_nested_state {
 
 /* for KVM_CAP_PMU_EVENT_FILTER */
 struct kvm_pmu_event_filter {
-       __u32 action;
-       __u32 nevents;
-       __u64 events[0];
+	__u32 action;
+	__u32 nevents;
+	__u32 fixed_counter_bitmap;
+	__u32 flags;
+	__u32 pad[4];
+	__u64 events[0];
 };
 
 #define KVM_PMU_EVENT_ALLOW 0
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 1bd91cb7b320..f5291362da1a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -183,7 +183,7 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 /*
  * Debug level, exported for io_apic.c
  */
-unsigned int apic_verbosity;
+int apic_verbosity;
 
 int pic_mode;
 
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index da64452584b0..5c7ee3df4d0b 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -76,6 +76,7 @@ static void __used common(void)
 	BLANK();
 	OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
 	OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
+	OFFSET(XEN_vcpu_info_arch_cr2, vcpu_info, arch.cr2);
 #endif
 
 	BLANK();
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index e69408bf664b..7da2bcd2b8eb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -86,9 +86,9 @@ static bool _e820__mapped_any(struct e820_table *table,
 			continue;
 		if (entry->addr >= end || entry->addr + entry->size <= start)
 			continue;
-		return 1;
+		return true;
 	}
-	return 0;
+	return false;
 }
 
 bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index bcd206c8ac90..a6342c899be5 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -29,9 +29,7 @@
 #ifdef CONFIG_PARAVIRT_XXL
 #include <asm/asm-offsets.h>
 #include <asm/paravirt.h>
-#define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg
 #else
-#define GET_CR2_INTO(reg) movq %cr2, reg
 #define INTERRUPT_RETURN iretq
 #endif
 
@@ -253,10 +251,10 @@ END(secondary_startup_64)
  * start_secondary() via .Ljump_to_C_code.
  */
 ENTRY(start_cpu0)
-	movq	initial_stack(%rip), %rsp
 	UNWIND_HINT_EMPTY
+	movq	initial_stack(%rip), %rsp
 	jmp	.Ljump_to_C_code
-ENDPROC(start_cpu0)
+END(start_cpu0)
 #endif
 
 	/* Both SMP bootup and ACPI suspend change these variables */
@@ -323,7 +321,7 @@ early_idt_handler_common:
 
 	cmpq $14,%rsi		/* Page fault? */
 	jnz 10f
-	GET_CR2_INTO(%rdi)	/* Can clobber any volatile register if pv */
+	GET_CR2_INTO(%rdi)	/* can clobber %rax if pv */
 	call early_make_pgtable
 	andl %eax,%eax
 	jz 20f			/* All good */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 82caf01b63dd..b7f34fe2171e 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -242,23 +242,23 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
 NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
 
 dotraplinkage void
-do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
+do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
 {
 	enum ctx_state prev_state;
 
 	switch (kvm_read_and_reset_pf_reason()) {
 	default:
-		do_page_fault(regs, error_code);
+		do_page_fault(regs, error_code, address);
 		break;
 	case KVM_PV_REASON_PAGE_NOT_PRESENT:
 		/* page is swapped out by the host. */
 		prev_state = exception_enter();
-		kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
+		kvm_async_pf_task_wait((u32)address, !user_mode(regs));
 		exception_exit(prev_state);
 		break;
 	case KVM_PV_REASON_PAGE_READY:
 		rcu_irq_enter();
-		kvm_async_pf_task_wake((u32)read_cr2());
+		kvm_async_pf_task_wake((u32)address);
 		rcu_irq_exit();
 		break;
 	}
@@ -838,6 +838,7 @@ asm(
 "cmpb	$0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
 "setne	%al;"
 "ret;"
+".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;"
 ".popsection");
 
 #endif
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 1bfe5c6e6cfe..afac7ccce72f 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -546,17 +546,15 @@ void __init default_get_smp_config(unsigned int early)
 			 * local APIC has default address
 			 */
 			mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-			return;
+			goto out;
 		}
 
 		pr_info("Default MP configuration #%d\n", mpf->feature1);
 		construct_default_ISA_mptable(mpf->feature1);
 
 	} else if (mpf->physptr) {
-		if (check_physptr(mpf, early)) {
-			early_memunmap(mpf, sizeof(*mpf));
-			return;
-		}
+		if (check_physptr(mpf, early))
+			goto out;
 	} else
 		BUG();
 
@@ -565,7 +563,7 @@ void __init default_get_smp_config(unsigned int early)
 	/*
 	 * Only use the first configuration found.
 	 */
-
+out:
 	early_memunmap(mpf, sizeof(*mpf));
 }
 
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 98039d7fb998..0aa6256eedd8 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -370,7 +370,7 @@ struct paravirt_patch_template pv_ops = {
 	.mmu.exit_mmap		= paravirt_nop,
 
 #ifdef CONFIG_PARAVIRT_XXL
-	.mmu.read_cr2		= native_read_cr2,
+	.mmu.read_cr2		= __PV_IS_CALLEE_SAVE(native_read_cr2),
 	.mmu.write_cr2		= native_write_cr2,
 	.mmu.read_cr3		= __native_read_cr3,
 	.mmu.write_cr3		= native_write_cr3,
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 250e4c4ac6d9..af64519b2695 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -143,17 +143,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
 
 void release_thread(struct task_struct *dead_task)
 {
-	if (dead_task->mm) {
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
-		if (dead_task->mm->context.ldt) {
-			pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
-				dead_task->comm,
-				dead_task->mm->context.ldt->entries,
-				dead_task->mm->context.ldt->nr_entries);
-			BUG();
-		}
-#endif
-	}
+	WARN_ON(dead_task->mm);
 }
 
 enum which_selector {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 71691a8310e7..0fdbe89d0754 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -369,12 +369,22 @@ static int putreg(struct task_struct *child,
 	case offsetof(struct user_regs_struct,fs_base):
 		if (value >= TASK_SIZE_MAX)
 			return -EIO;
-		x86_fsbase_write_task(child, value);
+		/*
+		 * When changing the FS base, use do_arch_prctl_64()
+		 * to set the index to zero and to set the base
+		 * as requested.
+		 */
+		if (child->thread.fsbase != value)
+			return do_arch_prctl_64(child, ARCH_SET_FS, value);
 		return 0;
 	case offsetof(struct user_regs_struct,gs_base):
+		/*
+		 * Exactly the same here as the %fs handling above.
+		 */
 		if (value >= TASK_SIZE_MAX)
 			return -EIO;
-		x86_gsbase_write_task(child, value);
+		if (child->thread.gsbase != value)
+			return do_arch_prctl_64(child, ARCH_SET_GS, value);
 		return 0;
 #endif
 	}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 87095a477154..4bb0f8447112 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -313,13 +313,10 @@ __visible void __noreturn handle_stack_overflow(const char *message,
 
 #ifdef CONFIG_X86_64
 /* Runs on IST stack */
-dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
+dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2)
 {
 	static const char str[] = "double fault";
 	struct task_struct *tsk = current;
-#ifdef CONFIG_VMAP_STACK
-	unsigned long cr2;
-#endif
 
 #ifdef CONFIG_X86_ESPFIX64
 	extern unsigned char native_irq_return_iret[];
@@ -415,7 +412,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 	 * stack even if the actual trigger for the double fault was
 	 * something else.
 	 */
-	cr2 = read_cr2();
 	if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
 		handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
 #endif
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index ead681210306..22c2720cd948 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -368,9 +368,13 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
 		F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
 		F(MD_CLEAR);
 
+	/* cpuid 7.1.eax */
+	const u32 kvm_cpuid_7_1_eax_x86_features =
+		F(AVX512_BF16);
+
 	switch (index) {
 	case 0:
-		entry->eax = 0;
+		entry->eax = min(entry->eax, 1u);
 		entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
 		cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
 		/* TSC_ADJUST is emulated */
@@ -394,6 +398,12 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
 		 */
 		entry->edx |= F(ARCH_CAPABILITIES);
 		break;
+	case 1:
+		entry->eax &= kvm_cpuid_7_1_eax_x86_features;
+		entry->ebx = 0;
+		entry->ecx = 0;
+		entry->edx = 0;
+		break;
 	default:
 		WARN_ON_ONCE(1);
 		entry->eax = 0;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8e409ad448f9..718f7d9afedc 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -312,29 +312,42 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
 
 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
 
-#define FOP_FUNC(name) \
+#define __FOP_FUNC(name) \
 	".align " __stringify(FASTOP_SIZE) " \n\t" \
 	".type " name ", @function \n\t" \
 	name ":\n\t"
 
-#define FOP_RET   "ret \n\t"
+#define FOP_FUNC(name) \
+	__FOP_FUNC(#name)
+
+#define __FOP_RET(name) \
+	"ret \n\t" \
+	".size " name ", .-" name "\n\t"
+
+#define FOP_RET(name) \
+	__FOP_RET(#name)
 
 #define FOP_START(op) \
 	extern void em_##op(struct fastop *fake); \
 	asm(".pushsection .text, \"ax\" \n\t" \
 	    ".global em_" #op " \n\t" \
-	    FOP_FUNC("em_" #op)
+	    ".align " __stringify(FASTOP_SIZE) " \n\t" \
+	    "em_" #op ":\n\t"
 
 #define FOP_END \
 	    ".popsection")
 
+#define __FOPNOP(name) \
+	__FOP_FUNC(name) \
+	__FOP_RET(name)
+
 #define FOPNOP() \
-	FOP_FUNC(__stringify(__UNIQUE_ID(nop))) \
-	FOP_RET
+	__FOPNOP(__stringify(__UNIQUE_ID(nop)))
 
 #define FOP1E(op,  dst) \
-	FOP_FUNC(#op "_" #dst) \
-	"10: " #op " %" #dst " \n\t" FOP_RET
+	__FOP_FUNC(#op "_" #dst) \
+	"10: " #op " %" #dst " \n\t" \
+	__FOP_RET(#op "_" #dst)
 
 #define FOP1EEX(op,  dst) \
 	FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
@@ -366,8 +379,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
 	FOP_END
 
 #define FOP2E(op,  dst, src)	   \
-	FOP_FUNC(#op "_" #dst "_" #src) \
-	#op " %" #src ", %" #dst " \n\t" FOP_RET
+	__FOP_FUNC(#op "_" #dst "_" #src) \
+	#op " %" #src ", %" #dst " \n\t" \
+	__FOP_RET(#op "_" #dst "_" #src)
 
 #define FASTOP2(op) \
 	FOP_START(op) \
@@ -405,8 +419,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
 	FOP_END
 
 #define FOP3E(op,  dst, src, src2) \
-	FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
-	#op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
+	__FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
+	#op " %" #src2 ", %" #src ", %" #dst " \n\t"\
+	__FOP_RET(#op "_" #dst "_" #src "_" #src2)
 
 /* 3-operand, word-only, src2=cl */
 #define FASTOP3WCL(op) \
@@ -423,7 +438,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
 	".type " #op ", @function \n\t" \
 	#op ": \n\t" \
 	#op " %al \n\t" \
-	FOP_RET
+	__FOP_RET(#op)
 
 asm(".pushsection .fixup, \"ax\"\n"
     ".global kvm_fastop_exception \n"
@@ -449,7 +464,10 @@ FOP_SETCC(setle)
 FOP_SETCC(setnle)
 FOP_END;
 
-FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
+FOP_START(salc)
+FOP_FUNC(salc)
+"pushf; sbb %al, %al; popf \n\t"
+FOP_RET(salc)
 FOP_END;
 
 /*
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index a39e38f13029..c10a8b10b203 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1594,7 +1594,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 {
 	u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
 	uint16_t code, rep_idx, rep_cnt;
-	bool fast, longmode, rep;
+	bool fast, rep;
 
 	/*
 	 * hypercall generates UD from non zero cpl and real mode
@@ -1605,9 +1605,14 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 		return 1;
 	}
 
-	longmode = is_64_bit_mode(vcpu);
-
-	if (!longmode) {
+#ifdef CONFIG_X86_64
+	if (is_64_bit_mode(vcpu)) {
+		param = kvm_rcx_read(vcpu);
+		ingpa = kvm_rdx_read(vcpu);
+		outgpa = kvm_r8_read(vcpu);
+	} else
+#endif
+	{
 		param = ((u64)kvm_rdx_read(vcpu) << 32) |
 			(kvm_rax_read(vcpu) & 0xffffffff);
 		ingpa = ((u64)kvm_rbx_read(vcpu) << 32) |
@@ -1615,13 +1620,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 		outgpa = ((u64)kvm_rdi_read(vcpu) << 32) |
 			(kvm_rsi_read(vcpu) & 0xffffffff);
 	}
-#ifdef CONFIG_X86_64
-	else {
-		param = kvm_rcx_read(vcpu);
-		ingpa = kvm_rdx_read(vcpu);
-		outgpa = kvm_r8_read(vcpu);
-	}
-#endif
 
 	code = param & 0xffff;
 	fast = !!(param & HV_HYPERCALL_FAST_BIT);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 1add1bc881e2..d859ae8890d0 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -45,11 +45,6 @@
 #include "lapic.h"
 #include "irq.h"
 
-#if 0
-#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
-#else
-#define ioapic_debug(fmt, arg...)
-#endif
 static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
 		bool line_status);
 
@@ -294,7 +289,6 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 	default:
 		index = (ioapic->ioregsel - 0x10) >> 1;
 
-		ioapic_debug("change redir index %x val %x\n", index, val);
 		if (index >= IOAPIC_NUM_PINS)
 			return;
 		e = &ioapic->redirtbl[index];
@@ -343,12 +337,6 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
 	    entry->fields.remote_irr))
 		return -1;
 
-	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
-		     "vector=%x trig_mode=%x\n",
-		     entry->fields.dest_id, entry->fields.dest_mode,
-		     entry->fields.delivery_mode, entry->fields.vector,
-		     entry->fields.trig_mode);
-
 	irqe.dest_id = entry->fields.dest_id;
 	irqe.vector = entry->fields.vector;
 	irqe.dest_mode = entry->fields.dest_mode;
@@ -515,7 +503,6 @@ static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
 	if (!ioapic_in_range(ioapic, addr))
 		return -EOPNOTSUPP;
 
-	ioapic_debug("addr %lx\n", (unsigned long)addr);
 	ASSERT(!(addr & 0xf));	/* check alignment */
 
 	addr &= 0xff;
@@ -558,8 +545,6 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
 	if (!ioapic_in_range(ioapic, addr))
 		return -EOPNOTSUPP;
 
-	ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
-		     (void*)addr, len, val);
 	ASSERT(!(addr & 0xf));	/* check alignment */
 
 	switch (len) {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a232e76d8f23..0aa158657f20 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -52,9 +52,6 @@
 #define PRIu64 "u"
 #define PRIo64 "o"
 
-/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
-#define apic_debug(fmt, arg...) do {} while (0)
-
 /* 14 is the version for Xeon and Pentium 8.4.8*/
 #define APIC_VERSION			(0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
 #define LAPIC_MMIO_LENGTH		(1 << 12)
@@ -121,6 +118,17 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
 	return apic->vcpu->vcpu_id;
 }
 
+bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
+{
+	return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt);
+
+static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
+{
+	return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
+}
+
 static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
 		u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
 	switch (map->mode) {
@@ -627,7 +635,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
 {
 	u8 val;
 	if (pv_eoi_get_user(vcpu, &val) < 0)
-		apic_debug("Can't read EOI MSR value: 0x%llx\n",
+		printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
 			   (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 	return val & 0x1;
 }
@@ -635,7 +643,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
 {
 	if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
-		apic_debug("Can't set EOI MSR value: 0x%llx\n",
+		printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
 			   (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 		return;
 	}
@@ -645,7 +653,7 @@ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
 {
 	if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
-		apic_debug("Can't clear EOI MSR value: 0x%llx\n",
+		printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
 			   (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 		return;
 	}
@@ -679,9 +687,6 @@ static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
 	else
 		ppr = isrv & 0xf0;
 
-	apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
-		   apic, ppr, isr, isrv);
-
 	*new_ppr = ppr;
 	if (old_ppr != ppr)
 		kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
@@ -758,8 +763,6 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
 		return ((logical_id >> 4) == (mda >> 4))
 		       && (logical_id & mda & 0xf) != 0;
 	default:
-		apic_debug("Bad DFR vcpu %d: %08x\n",
-			   apic->vcpu->vcpu_id, kvm_lapic_get_reg(apic, APIC_DFR));
 		return false;
 	}
 }
@@ -798,10 +801,6 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 	struct kvm_lapic *target = vcpu->arch.apic;
 	u32 mda = kvm_apic_mda(vcpu, dest, source, target);
 
-	apic_debug("target %p, source %p, dest 0x%x, "
-		   "dest_mode 0x%x, short_hand 0x%x\n",
-		   target, source, dest, dest_mode, short_hand);
-
 	ASSERT(target);
 	switch (short_hand) {
 	case APIC_DEST_NOSHORT:
@@ -816,8 +815,6 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 	case APIC_DEST_ALLBUT:
 		return target != source;
 	default:
-		apic_debug("kvm: apic: Bad dest shorthand value %x\n",
-			   short_hand);
 		return false;
 	}
 }
@@ -1095,15 +1092,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 			smp_wmb();
 			kvm_make_request(KVM_REQ_EVENT, vcpu);
 			kvm_vcpu_kick(vcpu);
-		} else {
-			apic_debug("Ignoring de-assert INIT to vcpu %d\n",
-				   vcpu->vcpu_id);
 		}
 		break;
 
 	case APIC_DM_STARTUP:
-		apic_debug("SIPI to vcpu %d vector 0x%02x\n",
-			   vcpu->vcpu_id, vector);
 		result = 1;
 		apic->sipi_vector = vector;
 		/* make sure sipi_vector is visible for the receiver */
@@ -1221,14 +1213,6 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 
 	trace_kvm_apic_ipi(icr_low, irq.dest_id);
 
-	apic_debug("icr_high 0x%x, icr_low 0x%x, "
-		   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
-		   "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, "
-		   "msi_redir_hint 0x%x\n",
-		   icr_high, icr_low, irq.shorthand, irq.dest_id,
-		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
-		   irq.vector, irq.msi_redir_hint);
-
 	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
 }
 
@@ -1282,7 +1266,6 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 
 	switch (offset) {
 	case APIC_ARBPRI:
-		apic_debug("Access APIC ARBPRI register which is for P6\n");
 		break;
 
 	case APIC_TMCCT:	/* Timer CCR */
@@ -1349,11 +1332,8 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
 	if (!apic_x2apic_mode(apic))
 		valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
 
-	if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) {
-		apic_debug("KVM_APIC_READ: read reserved register %x\n",
-			   offset);
+	if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
 		return 1;
-	}
 
 	result = __apic_read(apic, offset & ~0xf);
 
@@ -1411,9 +1391,6 @@ static void update_divide_count(struct kvm_lapic *apic)
 	tmp1 = tdcr & 0xf;
 	tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
 	apic->divide_count = 0x1 << (tmp2 & 0x7);
-
-	apic_debug("timer divide count is 0x%x\n",
-				   apic->divide_count);
 }
 
 static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
@@ -1455,29 +1432,6 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
 	}
 }
 
-static void apic_timer_expired(struct kvm_lapic *apic)
-{
-	struct kvm_vcpu *vcpu = apic->vcpu;
-	struct swait_queue_head *q = &vcpu->wq;
-	struct kvm_timer *ktimer = &apic->lapic_timer;
-
-	if (atomic_read(&apic->lapic_timer.pending))
-		return;
-
-	atomic_inc(&apic->lapic_timer.pending);
-	kvm_set_pending_timer(vcpu);
-
-	/*
-	 * For x86, the atomic_inc() is serialized, thus
-	 * using swait_active() is safe.
-	 */
-	if (swait_active(q))
-		swake_up_one(q);
-
-	if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
-		ktimer->expired_tscdeadline = ktimer->tscdeadline;
-}
-
 /*
  * On APICv, this test will cause a busy wait
  * during a higher-priority task.
@@ -1551,7 +1505,7 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
 	apic->lapic_timer.timer_advance_ns = timer_advance_ns;
 }
 
-void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
+static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	u64 guest_tsc, tsc_deadline;
@@ -1559,9 +1513,6 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
 	if (apic->lapic_timer.expired_tscdeadline == 0)
 		return;
 
-	if (!lapic_timer_int_injected(vcpu))
-		return;
-
 	tsc_deadline = apic->lapic_timer.expired_tscdeadline;
 	apic->lapic_timer.expired_tscdeadline = 0;
 	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
@@ -1573,8 +1524,57 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
 	if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
 		adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
 }
+
+void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
+{
+	if (lapic_timer_int_injected(vcpu))
+		__kvm_wait_lapic_expire(vcpu);
+}
 EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
 
+static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
+{
+	struct kvm_timer *ktimer = &apic->lapic_timer;
+
+	kvm_apic_local_deliver(apic, APIC_LVTT);
+	if (apic_lvtt_tscdeadline(apic))
+		ktimer->tscdeadline = 0;
+	if (apic_lvtt_oneshot(apic)) {
+		ktimer->tscdeadline = 0;
+		ktimer->target_expiration = 0;
+	}
+}
+
+static void apic_timer_expired(struct kvm_lapic *apic)
+{
+	struct kvm_vcpu *vcpu = apic->vcpu;
+	struct swait_queue_head *q = &vcpu->wq;
+	struct kvm_timer *ktimer = &apic->lapic_timer;
+
+	if (atomic_read(&apic->lapic_timer.pending))
+		return;
+
+	if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
+		ktimer->expired_tscdeadline = ktimer->tscdeadline;
+
+	if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
+		if (apic->lapic_timer.timer_advance_ns)
+			__kvm_wait_lapic_expire(vcpu);
+		kvm_apic_inject_pending_timer_irqs(apic);
+		return;
+	}
+
+	atomic_inc(&apic->lapic_timer.pending);
+	kvm_set_pending_timer(vcpu);
+
+	/*
+	 * For x86, the atomic_inc() is serialized, thus
+	 * using swait_active() is safe.
+	 */
+	if (swait_active(q))
+		swake_up_one(q);
+}
+
 static void start_sw_tscdeadline(struct kvm_lapic *apic)
 {
 	struct kvm_timer *ktimer = &apic->lapic_timer;
@@ -1601,7 +1601,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
 	    likely(ns > apic->lapic_timer.timer_advance_ns)) {
 		expire = ktime_add_ns(now, ns);
 		expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
-		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_PINNED);
+		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS);
 	} else
 		apic_timer_expired(apic);
 
@@ -1648,16 +1648,6 @@ static bool set_target_expiration(struct kvm_lapic *apic)
 
 	limit_periodic_timer_frequency(apic);
 
-	apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
-		   PRIx64 ", "
-		   "timer initial count 0x%x, period %lldns, "
-		   "expire @ 0x%016" PRIx64 ".\n", __func__,
-		   APIC_BUS_CYCLE_NS, ktime_to_ns(now),
-		   kvm_lapic_get_reg(apic, APIC_TMICT),
-		   apic->lapic_timer.period,
-		   ktime_to_ns(ktime_add_ns(now,
-				apic->lapic_timer.period)));
-
 	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
 		nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
 	apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
@@ -1703,7 +1693,7 @@ static void start_sw_period(struct kvm_lapic *apic)
 
 	hrtimer_start(&apic->lapic_timer.timer,
 		apic->lapic_timer.target_expiration,
-		HRTIMER_MODE_ABS_PINNED);
+		HRTIMER_MODE_ABS);
 }
 
 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
@@ -1860,8 +1850,6 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
 	if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
 		apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
 		if (lvt0_in_nmi_mode) {
-			apic_debug("Receive NMI setting on APIC_LVT0 "
-				   "for cpu %d\n", apic->vcpu->vcpu_id);
 			atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
 		} else
 			atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
@@ -1975,8 +1963,6 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 	case APIC_TDCR: {
 		uint32_t old_divisor = apic->divide_count;
 
-		if (val & 4)
-			apic_debug("KVM_WRITE:TDCR %x\n", val);
 		kvm_lapic_set_reg(apic, APIC_TDCR, val);
 		update_divide_count(apic);
 		if (apic->divide_count != old_divisor &&
@@ -1988,10 +1974,8 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 		break;
 	}
 	case APIC_ESR:
-		if (apic_x2apic_mode(apic) && val != 0) {
-			apic_debug("KVM_WRITE:ESR not zero %x\n", val);
+		if (apic_x2apic_mode(apic) && val != 0)
 			ret = 1;
-		}
 		break;
 
 	case APIC_SELF_IPI:
@@ -2004,8 +1988,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 		ret = 1;
 		break;
 	}
-	if (ret)
-		apic_debug("Local APIC Write to read-only register %x\n", reg);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
@@ -2033,20 +2016,12 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
 	 * 32/64/128 bits registers must be accessed thru 32 bits.
 	 * Refer SDM 8.4.1
 	 */
-	if (len != 4 || (offset & 0xf)) {
-		/* Don't shout loud, $infamous_os would cause only noise. */
-		apic_debug("apic write: bad size=%d %lx\n", len, (long)address);
+	if (len != 4 || (offset & 0xf))
 		return 0;
-	}
 
 	val = *(u32*)data;
 
-	/* too common printing */
-	if (offset != APIC_EOI)
-		apic_debug("%s: offset 0x%x with length 0x%x, and value is "
-			   "0x%x\n", __func__, offset, len, val);
-
-	kvm_lapic_reg_write(apic, offset, val);
+	kvm_lapic_reg_write(apic, offset & 0xff0, val);
 
 	return 0;
 }
@@ -2178,11 +2153,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 	if ((value & MSR_IA32_APICBASE_ENABLE) &&
 	     apic->base_address != APIC_DEFAULT_PHYS_BASE)
 		pr_warn_once("APIC base relocation is unsupported by KVM");
-
-	/* with FSB delivery interrupt, we can restart APIC functionality */
-	apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
-		   "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
-
 }
 
 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -2193,8 +2163,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 	if (!apic)
 		return;
 
-	apic_debug("%s\n", __func__);
-
 	/* Stop the timer in case it's a reset to an active apic */
 	hrtimer_cancel(&apic->lapic_timer.timer);
 
@@ -2247,11 +2215,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 	vcpu->arch.apic_arb_prio = 0;
 	vcpu->arch.apic_attention = 0;
-
-	apic_debug("%s: vcpu=%p, id=0x%x, base_msr="
-		   "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
-		   vcpu, kvm_lapic_get_reg(apic, APIC_ID),
-		   vcpu->arch.apic_base, apic->base_address);
 }
 
 /*
@@ -2323,7 +2286,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
 	struct kvm_lapic *apic;
 
 	ASSERT(vcpu != NULL);
-	apic_debug("apic_init %d\n", vcpu->vcpu_id);
 
 	apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
 	if (!apic)
@@ -2340,7 +2302,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
 	apic->vcpu = vcpu;
 
 	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
-		     HRTIMER_MODE_ABS_PINNED);
+		     HRTIMER_MODE_ABS);
 	apic->lapic_timer.timer.function = apic_timer_fn;
 	if (timer_advance_ns == -1) {
 		apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
@@ -2397,13 +2359,7 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
 	if (atomic_read(&apic->lapic_timer.pending) > 0) {
-		kvm_apic_local_deliver(apic, APIC_LVTT);
-		if (apic_lvtt_tscdeadline(apic))
-			apic->lapic_timer.tscdeadline = 0;
-		if (apic_lvtt_oneshot(apic)) {
-			apic->lapic_timer.tscdeadline = 0;
-			apic->lapic_timer.target_expiration = 0;
-		}
+		kvm_apic_inject_pending_timer_irqs(apic);
 		atomic_set(&apic->lapic_timer.pending, 0);
 	}
 }
@@ -2525,12 +2481,13 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
 {
 	struct hrtimer *timer;
 
-	if (!lapic_in_kernel(vcpu))
+	if (!lapic_in_kernel(vcpu) ||
+		kvm_can_post_timer_interrupt(vcpu))
 		return;
 
 	timer = &vcpu->arch.apic->lapic_timer.timer;
 	if (hrtimer_cancel(timer))
-		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
 }
 
 /*
@@ -2678,11 +2635,8 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
 		return 1;
 
-	if (reg == APIC_DFR || reg == APIC_ICR2) {
-		apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n",
-			   reg);
+	if (reg == APIC_DFR || reg == APIC_ICR2)
 		return 1;
-	}
 
 	if (kvm_lapic_reg_read(apic, reg, 4, &low))
 		return 1;
@@ -2780,8 +2734,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 		/* evaluate pending_events before reading the vector */
 		smp_rmb();
 		sipi_vector = apic->sipi_vector;
-		apic_debug("vcpu %d received sipi with vector # %x\n",
-			 vcpu->vcpu_id, sipi_vector);
 		kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 	}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 36747174e4a8..50053d2b8b7b 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -236,6 +236,7 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu);
 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu);
 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu);
 void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu);
+bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu);
 
 static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
 {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9a5814d8d194..24843cf49579 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3466,7 +3466,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
 		/*
 		 * Currently, fast page fault only works for direct mapping
 		 * since the gfn is not stable for indirect shadow page. See
-		 * Documentation/virtual/kvm/locking.txt to get more detail.
+		 * Documentation/virt/kvm/locking.txt to get more detail.
 		 */
 		fault_handled = fast_pf_fix_direct_spte(vcpu, sp,
 							iterator.sptep, spte,
@@ -4597,11 +4597,11 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
 		 */
 
 		/* Faults from writes to non-writable pages */
-		u8 wf = (pfec & PFERR_WRITE_MASK) ? ~w : 0;
+		u8 wf = (pfec & PFERR_WRITE_MASK) ? (u8)~w : 0;
 		/* Faults from user mode accesses to supervisor pages */
-		u8 uf = (pfec & PFERR_USER_MASK) ? ~u : 0;
+		u8 uf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0;
 		/* Faults from fetches of non-executable pages*/
-		u8 ff = (pfec & PFERR_FETCH_MASK) ? ~x : 0;
+		u8 ff = (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0;
 		/* Faults from kernel mode fetches of user pages */
 		u8 smepf = 0;
 		/* Faults from kernel mode accesses of user pages */
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index aa5a2597305a..46875bbd0419 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -19,8 +19,8 @@
 #include "lapic.h"
 #include "pmu.h"
 
-/* This keeps the total size of the filter under 4k. */
-#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 63
+/* This is enough to filter the vast majority of currently defined events. */
+#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
 
 /* NOTE:
  * - Each perf counter is defined as "struct kvm_pmc";
@@ -131,8 +131,8 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
 						 intr ? kvm_perf_overflow_intr :
 						 kvm_perf_overflow, pmc);
 	if (IS_ERR(event)) {
-		printk_once("kvm_pmu: event creation failed %ld\n",
-			    PTR_ERR(event));
+		pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
+			    PTR_ERR(event), pmc->idx);
 		return;
 	}
 
@@ -206,12 +206,24 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
 {
 	unsigned en_field = ctrl & 0x3;
 	bool pmi = ctrl & 0x8;
+	struct kvm_pmu_event_filter *filter;
+	struct kvm *kvm = pmc->vcpu->kvm;
 
 	pmc_stop_counter(pmc);
 
 	if (!en_field || !pmc_is_enabled(pmc))
 		return;
 
+	filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
+	if (filter) {
+		if (filter->action == KVM_PMU_EVENT_DENY &&
+		    test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
+			return;
+		if (filter->action == KVM_PMU_EVENT_ALLOW &&
+		    !test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
+			return;
+	}
+
 	pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
 			      kvm_x86_ops->pmu_ops->find_fixed_event(idx),
 			      !(en_field & 0x2), /* exclude user */
@@ -385,6 +397,9 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
 	    tmp.action != KVM_PMU_EVENT_DENY)
 		return -EINVAL;
 
+	if (tmp.flags != 0)
+		return -EINVAL;
+
 	if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
 		return -E2BIG;
 
@@ -406,8 +421,8 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
 	mutex_unlock(&kvm->lock);
 
 	synchronize_srcu_expedited(&kvm->srcu);
- 	r = 0;
+	r = 0;
 cleanup:
 	kfree(filter);
- 	return r;
+	return r;
 }
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 583b9fa656f3..7eafc6907861 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2143,12 +2143,20 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 		goto out;
 	}
 
+	svm->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
+						     GFP_KERNEL_ACCOUNT);
+	if (!svm->vcpu.arch.user_fpu) {
+		printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
+		err = -ENOMEM;
+		goto free_partial_svm;
+	}
+
 	svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
 						     GFP_KERNEL_ACCOUNT);
 	if (!svm->vcpu.arch.guest_fpu) {
 		printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
 		err = -ENOMEM;
-		goto free_partial_svm;
+		goto free_user_fpu;
 	}
 
 	err = kvm_vcpu_init(&svm->vcpu, kvm, id);
@@ -2211,6 +2219,8 @@ uninit:
 	kvm_vcpu_uninit(&svm->vcpu);
 free_svm:
 	kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
+free_user_fpu:
+	kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu);
 free_partial_svm:
 	kmem_cache_free(kvm_vcpu_cache, svm);
 out:
@@ -2241,6 +2251,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 	__free_page(virt_to_page(svm->nested.hsave));
 	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
 	kvm_vcpu_uninit(vcpu);
+	kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu);
 	kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
 	kmem_cache_free(kvm_vcpu_cache, svm);
 }
@@ -7128,13 +7139,41 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
 
 static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
 {
-	bool is_user, smap;
-
-	is_user = svm_get_cpl(vcpu) == 3;
-	smap = !kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
+	unsigned long cr4 = kvm_read_cr4(vcpu);
+	bool smep = cr4 & X86_CR4_SMEP;
+	bool smap = cr4 & X86_CR4_SMAP;
+	bool is_user = svm_get_cpl(vcpu) == 3;
 
 	/*
-	 * Detect and workaround Errata 1096 Fam_17h_00_0Fh
+	 * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
+	 *
+	 * Errata:
+	 * When CPU raise #NPF on guest data access and vCPU CR4.SMAP=1, it is
+	 * possible that CPU microcode implementing DecodeAssist will fail
+	 * to read bytes of instruction which caused #NPF. In this case,
+	 * GuestIntrBytes field of the VMCB on a VMEXIT will incorrectly
+	 * return 0 instead of the correct guest instruction bytes.
+	 *
+	 * This happens because CPU microcode reading instruction bytes
+	 * uses a special opcode which attempts to read data using CPL=0
+	 * priviledges. The microcode reads CS:RIP and if it hits a SMAP
+	 * fault, it gives up and returns no instruction bytes.
+	 *
+	 * Detection:
+	 * We reach here in case CPU supports DecodeAssist, raised #NPF and
+	 * returned 0 in GuestIntrBytes field of the VMCB.
+	 * First, errata can only be triggered in case vCPU CR4.SMAP=1.
+	 * Second, if vCPU CR4.SMEP=1, errata could only be triggered
+	 * in case vCPU CPL==3 (Because otherwise guest would have triggered
+	 * a SMEP fault instead of #NPF).
+	 * Otherwise, vCPU CR4.SMEP=0, errata could be triggered by any vCPU CPL.
+	 * As most guests enable SMAP if they have also enabled SMEP, use above
+	 * logic in order to attempt minimize false-positive of detecting errata
+	 * while still preserving all cases semantic correctness.
+	 *
+	 * Workaround:
+	 * To determine what instruction the guest was executing, the hypervisor
+	 * will have to decode the instruction at the instruction pointer.
 	 *
 	 * In non SEV guest, hypervisor will be able to read the guest
 	 * memory to decode the instruction pointer when insn_len is zero
@@ -7145,11 +7184,11 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
 	 * instruction pointer so we will not able to workaround it. Lets
 	 * print the error and request to kill the guest.
 	 */
-	if (is_user && smap) {
+	if (smap && (!smep || is_user)) {
 		if (!sev_guest(vcpu->kvm))
 			return true;
 
-		pr_err_ratelimited("KVM: Guest triggered AMD Erratum 1096\n");
+		pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n");
 		kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
 	}
 
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bb509c254939..ced9fba32598 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -194,6 +194,7 @@ static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
 {
 	secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
 	vmcs_write64(VMCS_LINK_POINTER, -1ull);
+	vmx->nested.need_vmcs12_to_shadow_sync = false;
 }
 
 static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
@@ -219,6 +220,8 @@ static void free_nested(struct kvm_vcpu *vcpu)
 	if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
 		return;
 
+	kvm_clear_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
+
 	vmx->nested.vmxon = false;
 	vmx->nested.smm.vmxon = false;
 	free_vpid(vmx->nested.vpid02);
@@ -231,7 +234,9 @@ static void free_nested(struct kvm_vcpu *vcpu)
 		vmx->vmcs01.shadow_vmcs = NULL;
 	}
 	kfree(vmx->nested.cached_vmcs12);
+	vmx->nested.cached_vmcs12 = NULL;
 	kfree(vmx->nested.cached_shadow_vmcs12);
+	vmx->nested.cached_shadow_vmcs12 = NULL;
 	/* Unpin physical memory we referred to in the vmcs02 */
 	if (vmx->nested.apic_access_page) {
 		kvm_release_page_dirty(vmx->nested.apic_access_page);
@@ -1341,6 +1346,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
 	unsigned long val;
 	int i;
 
+	if (WARN_ON(!shadow_vmcs))
+		return;
+
 	preempt_disable();
 
 	vmcs_load(shadow_vmcs);
@@ -1373,6 +1381,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
 	unsigned long val;
 	int i, q;
 
+	if (WARN_ON(!shadow_vmcs))
+		return;
+
 	vmcs_load(shadow_vmcs);
 
 	for (q = 0; q < ARRAY_SIZE(fields); q++) {
@@ -4194,7 +4205,10 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
 		 * mode, e.g. a 32-bit address size can yield a 64-bit virtual
 		 * address when using FS/GS with a non-zero base.
 		 */
-		*ret = s.base + off;
+		if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS)
+			*ret = s.base + off;
+		else
+			*ret = off;
 
 		/* Long mode: #GP(0)/#SS(0) if the memory address is in a
 		 * non-canonical form. This is the only check on the memory
@@ -4433,7 +4447,6 @@ static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
 		/* copy to memory all shadowed fields in case
 		   they were modified */
 		copy_shadow_to_vmcs12(vmx);
-		vmx->nested.need_vmcs12_to_shadow_sync = false;
 		vmx_disable_shadow_vmcs(vmx);
 	}
 	vmx->nested.posted_intr_nv = -1;
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 68d231d49c7a..4dea0e0e7e39 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -337,17 +337,22 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
 static void intel_pmu_reset(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+	struct kvm_pmc *pmc = NULL;
 	int i;
 
 	for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
-		struct kvm_pmc *pmc = &pmu->gp_counters[i];
+		pmc = &pmu->gp_counters[i];
 
 		pmc_stop_counter(pmc);
 		pmc->counter = pmc->eventsel = 0;
 	}
 
-	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++)
-		pmc_stop_counter(&pmu->fixed_counters[i]);
+	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
+		pmc = &pmu->fixed_counters[i];
+
+		pmc_stop_counter(pmc);
+		pmc->counter = 0;
+	}
 
 	pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
 		pmu->global_ovf_ctrl = 0;
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index d4cb1945b2e3..4010d519eb8c 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -54,9 +54,9 @@ ENTRY(vmx_vmenter)
 	ret
 
 3:	cmpb $0, kvm_rebooting
-	jne 4f
-	call kvm_spurious_fault
-4:	ret
+	je 4f
+	ret
+4:	ud2
 
 	.pushsection .fixup, "ax"
 5:	jmp 3b
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 69536553446d..074385c86c09 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5829,6 +5829,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 	}
 
 	if (unlikely(vmx->fail)) {
+		dump_vmcs();
 		vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
 		vcpu->run->fail_entry.hardware_entry_failure_reason
 			= vmcs_read32(VM_INSTRUCTION_ERROR);
@@ -6597,6 +6598,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 	free_loaded_vmcs(vmx->loaded_vmcs);
 	kfree(vmx->guest_msrs);
 	kvm_vcpu_uninit(vcpu);
+	kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
 	kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
 	kmem_cache_free(kvm_vcpu_cache, vmx);
 }
@@ -6612,12 +6614,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	if (!vmx)
 		return ERR_PTR(-ENOMEM);
 
+	vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
+			GFP_KERNEL_ACCOUNT);
+	if (!vmx->vcpu.arch.user_fpu) {
+		printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
+		err = -ENOMEM;
+		goto free_partial_vcpu;
+	}
+
 	vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
 			GFP_KERNEL_ACCOUNT);
 	if (!vmx->vcpu.arch.guest_fpu) {
 		printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
 		err = -ENOMEM;
-		goto free_partial_vcpu;
+		goto free_user_fpu;
 	}
 
 	vmx->vpid = allocate_vpid();
@@ -6720,6 +6730,8 @@ uninit_vcpu:
 free_vcpu:
 	free_vpid(vmx->vpid);
 	kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
+free_user_fpu:
+	kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
 free_partial_vcpu:
 	kmem_cache_free(kvm_vcpu_cache, vmx);
 	return ERR_PTR(err);
@@ -7064,7 +7076,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
 	u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
 	struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
 
-	if (kvm_mwait_in_guest(vcpu->kvm))
+	if (kvm_mwait_in_guest(vcpu->kvm) ||
+		kvm_can_post_timer_interrupt(vcpu))
 		return -EOPNOTSUPP;
 
 	vmx = to_vmx(vcpu);
@@ -7453,7 +7466,7 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
 
 static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
 {
-	return 0;
+	return false;
 }
 
 static __init int hardware_setup(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4a0b74ecd1de..c6d951cbd76c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -51,6 +51,7 @@
 #include <linux/kvm_irqfd.h>
 #include <linux/irqbypass.h>
 #include <linux/sched/stat.h>
+#include <linux/sched/isolation.h>
 #include <linux/mem_encrypt.h>
 
 #include <trace/events/kvm.h>
@@ -153,6 +154,9 @@ EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
 static bool __read_mostly force_emulation_prefix = false;
 module_param(force_emulation_prefix, bool, S_IRUGO);
 
+int __read_mostly pi_inject_timer = -1;
+module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
+
 #define KVM_NR_SHARED_MSRS 16
 
 struct kvm_shared_msrs_global {
@@ -1456,12 +1460,8 @@ static void update_pvclock_gtod(struct timekeeper *tk)
 
 void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
 {
-	/*
-	 * Note: KVM_REQ_PENDING_TIMER is implicitly checked in
-	 * vcpu_enter_guest.  This function is only called from
-	 * the physical CPU that is running vcpu.
-	 */
 	kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+	kvm_vcpu_kick(vcpu);
 }
 
 static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
@@ -1540,9 +1540,6 @@ static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
 
 	*pshift = shift;
 	*pmultiplier = div_frac(scaled64, tps32);
-
-	pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n",
-		 __func__, base_hz, scaled_hz, shift, *pmultiplier);
 }
 
 #ifdef CONFIG_X86_64
@@ -1785,12 +1782,10 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
 	    vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
 		if (!kvm_check_tsc_unstable()) {
 			offset = kvm->arch.cur_tsc_offset;
-			pr_debug("kvm: matched tsc offset for %llu\n", data);
 		} else {
 			u64 delta = nsec_to_cycles(vcpu, elapsed);
 			data += delta;
 			offset = kvm_compute_tsc_offset(vcpu, data);
-			pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
 		}
 		matched = true;
 		already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
@@ -1809,8 +1804,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		kvm->arch.cur_tsc_write = data;
 		kvm->arch.cur_tsc_offset = offset;
 		matched = false;
-		pr_debug("kvm: new tsc generation %llu, clock %llu\n",
-			 kvm->arch.cur_tsc_generation, data);
 	}
 
 	/*
@@ -3313,6 +3306,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 	kvm_x86_ops->vcpu_load(vcpu, cpu);
 
+	fpregs_assert_state_consistent();
+	if (test_thread_flag(TIF_NEED_FPU_LOAD))
+		switch_fpu_return();
+
 	/* Apply any externally detected TSC adjustments (due to suspend) */
 	if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
 		adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
@@ -6911,7 +6908,6 @@ static void kvm_timer_init(void)
 		cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
 					  CPUFREQ_TRANSITION_NOTIFIER);
 	}
-	pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
 
 	cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
 			  kvmclock_cpu_online, kvmclock_cpu_down_prep);
@@ -7070,6 +7066,8 @@ int kvm_arch_init(void *opaque)
 		host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
 
 	kvm_lapic_init();
+	if (pi_inject_timer == -1)
+		pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
 #ifdef CONFIG_X86_64
 	pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
 
@@ -7208,7 +7206,7 @@ static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
 
 	rcu_read_unlock();
 
-	if (target)
+	if (target && READ_ONCE(target->ready))
 		kvm_vcpu_yield_to(target);
 }
 
@@ -7248,6 +7246,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 		break;
 	case KVM_HC_KICK_CPU:
 		kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
+		kvm_sched_yield(vcpu->kvm, a1);
 		ret = 0;
 		break;
 #ifdef CONFIG_X86_64
@@ -7996,9 +7995,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	trace_kvm_entry(vcpu->vcpu_id);
 	guest_enter_irqoff();
 
-	fpregs_assert_state_consistent();
-	if (test_thread_flag(TIF_NEED_FPU_LOAD))
-		switch_fpu_return();
+	/* The preempt notifier should have taken care of the FPU already.  */
+	WARN_ON_ONCE(test_thread_flag(TIF_NEED_FPU_LOAD));
 
 	if (unlikely(vcpu->arch.switch_db_regs)) {
 		set_debugreg(0, 7);
@@ -8276,7 +8274,7 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 {
 	fpregs_lock();
 
-	copy_fpregs_to_fpstate(&current->thread.fpu);
+	copy_fpregs_to_fpstate(vcpu->arch.user_fpu);
 	/* PKRU is separately restored in kvm_x86_ops->run.  */
 	__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
 				~XFEATURE_MASK_PKRU);
@@ -8293,7 +8291,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 	fpregs_lock();
 
 	copy_fpregs_to_fpstate(vcpu->arch.guest_fpu);
-	copy_kernel_to_fpregs(&current->thread.fpu.state);
+	copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);
 
 	fpregs_mark_activate();
 	fpregs_unlock();
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index e08a12892e8b..6594020c0691 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -301,6 +301,8 @@ extern unsigned int min_timer_period_us;
 
 extern bool enable_vmware_backdoor;
 
+extern int pi_inject_timer;
+
 extern struct static_key kvm_no_apic_vcpu;
 
 static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 378a1f70ae7d..4fe1601dbc5d 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -239,7 +239,7 @@ copy_user_handle_tail:
 	ret
 
 	_ASM_EXTABLE_UA(1b, 2b)
-ENDPROC(copy_user_handle_tail)
+END(copy_user_handle_tail)
 
 /*
  * copy_user_nocache - Uncached memory copy with exception handling
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 74fdff968ea3..304f958c27b2 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -115,29 +115,29 @@ ENDPROC(__get_user_8)
 EXPORT_SYMBOL(__get_user_8)
 
 
+bad_get_user_clac:
+	ASM_CLAC
 bad_get_user:
 	xor %edx,%edx
 	mov $(-EFAULT),%_ASM_AX
-	ASM_CLAC
 	ret
-END(bad_get_user)
 
 #ifdef CONFIG_X86_32
+bad_get_user_8_clac:
+	ASM_CLAC
 bad_get_user_8:
 	xor %edx,%edx
 	xor %ecx,%ecx
 	mov $(-EFAULT),%_ASM_AX
-	ASM_CLAC
 	ret
-END(bad_get_user_8)
 #endif
 
-	_ASM_EXTABLE_UA(1b, bad_get_user)
-	_ASM_EXTABLE_UA(2b, bad_get_user)
-	_ASM_EXTABLE_UA(3b, bad_get_user)
+	_ASM_EXTABLE_UA(1b, bad_get_user_clac)
+	_ASM_EXTABLE_UA(2b, bad_get_user_clac)
+	_ASM_EXTABLE_UA(3b, bad_get_user_clac)
 #ifdef CONFIG_X86_64
-	_ASM_EXTABLE_UA(4b, bad_get_user)
+	_ASM_EXTABLE_UA(4b, bad_get_user_clac)
 #else
-	_ASM_EXTABLE_UA(4b, bad_get_user_8)
-	_ASM_EXTABLE_UA(5b, bad_get_user_8)
+	_ASM_EXTABLE_UA(4b, bad_get_user_8_clac)
+	_ASM_EXTABLE_UA(5b, bad_get_user_8_clac)
 #endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index d2e5c9c39601..14bf78341d3c 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -32,8 +32,6 @@
  */
 
 #define ENTER	mov PER_CPU_VAR(current_task), %_ASM_BX
-#define EXIT	ASM_CLAC ;	\
-		ret
 
 .text
 ENTRY(__put_user_1)
@@ -43,7 +41,8 @@ ENTRY(__put_user_1)
 	ASM_STAC
 1:	movb %al,(%_ASM_CX)
 	xor %eax,%eax
-	EXIT
+	ASM_CLAC
+	ret
 ENDPROC(__put_user_1)
 EXPORT_SYMBOL(__put_user_1)
 
@@ -56,7 +55,8 @@ ENTRY(__put_user_2)
 	ASM_STAC
 2:	movw %ax,(%_ASM_CX)
 	xor %eax,%eax
-	EXIT
+	ASM_CLAC
+	ret
 ENDPROC(__put_user_2)
 EXPORT_SYMBOL(__put_user_2)
 
@@ -69,7 +69,8 @@ ENTRY(__put_user_4)
 	ASM_STAC
 3:	movl %eax,(%_ASM_CX)
 	xor %eax,%eax
-	EXIT
+	ASM_CLAC
+	ret
 ENDPROC(__put_user_4)
 EXPORT_SYMBOL(__put_user_4)
 
@@ -85,19 +86,21 @@ ENTRY(__put_user_8)
 5:	movl %edx,4(%_ASM_CX)
 #endif
 	xor %eax,%eax
-	EXIT
+	ASM_CLAC
+	RET
 ENDPROC(__put_user_8)
 EXPORT_SYMBOL(__put_user_8)
 
+bad_put_user_clac:
+	ASM_CLAC
 bad_put_user:
 	movl $-EFAULT,%eax
-	EXIT
-END(bad_put_user)
+	RET
 
-	_ASM_EXTABLE_UA(1b, bad_put_user)
-	_ASM_EXTABLE_UA(2b, bad_put_user)
-	_ASM_EXTABLE_UA(3b, bad_put_user)
-	_ASM_EXTABLE_UA(4b, bad_put_user)
+	_ASM_EXTABLE_UA(1b, bad_put_user_clac)
+	_ASM_EXTABLE_UA(2b, bad_put_user_clac)
+	_ASM_EXTABLE_UA(3b, bad_put_user_clac)
+	_ASM_EXTABLE_UA(4b, bad_put_user_clac)
 #ifdef CONFIG_X86_32
-	_ASM_EXTABLE_UA(5b, bad_put_user)
+	_ASM_EXTABLE_UA(5b, bad_put_user_clac)
 #endif
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index e0e006f1624e..fff28c6f73a2 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -60,7 +60,7 @@ EXPORT_SYMBOL(clear_user);
  * but reuse __memcpy_mcsafe in case a new read error is encountered.
  * clac() is handled in _copy_to_iter_mcsafe().
  */
-__visible unsigned long
+__visible notrace unsigned long
 mcsafe_handle_tail(char *to, char *from, unsigned len)
 {
 	for (; len; --len, to++, from++) {
diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h
index a5a41ec58072..0c122226ca56 100644
--- a/arch/x86/math-emu/fpu_emu.h
+++ b/arch/x86/math-emu/fpu_emu.h
@@ -177,7 +177,7 @@ static inline void reg_copy(FPU_REG const *x, FPU_REG *y)
 #define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \
   ((y) + EXTENDED_Ebias) & 0x7fff; }
 #define exponent16(x)         (*(short *)&((x)->exp))
-#define setexponent16(x,y)  { (*(short *)&((x)->exp)) = (y); }
+#define setexponent16(x,y)  { (*(short *)&((x)->exp)) = (u16)(y); }
 #define addexponent(x,y)    { (*(short *)&((x)->exp)) += (y); }
 #define stdexp(x)           { (*(short *)&((x)->exp)) += EXTENDED_Ebias; }
 
diff --git a/arch/x86/math-emu/reg_constant.c b/arch/x86/math-emu/reg_constant.c
index 8dc9095bab22..742619e94bdf 100644
--- a/arch/x86/math-emu/reg_constant.c
+++ b/arch/x86/math-emu/reg_constant.c
@@ -18,7 +18,7 @@
 #include "control_w.h"
 
 #define MAKE_REG(s, e, l, h) { l, h, \
-		((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) }
+		(u16)((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) }
 
 FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000);
 #if 0
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index d1634c59ed56..6c46095cd0d9 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1490,9 +1490,8 @@ good_area:
 NOKPROBE_SYMBOL(do_user_addr_fault);
 
 /*
- * This routine handles page faults.  It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
+ * Explicitly marked noinline such that the function tracer sees this as the
+ * page_fault entry point.
  */
 static noinline void
 __do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
@@ -1511,33 +1510,26 @@ __do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
 }
 NOKPROBE_SYMBOL(__do_page_fault);
 
-static nokprobe_inline void
-trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
-			 unsigned long error_code)
+static __always_inline void
+trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code,
+			 unsigned long address)
 {
+	if (!trace_pagefault_enabled())
+		return;
+
 	if (user_mode(regs))
 		trace_page_fault_user(address, regs, error_code);
 	else
 		trace_page_fault_kernel(address, regs, error_code);
 }
 
-/*
- * We must have this function blacklisted from kprobes, tagged with notrace
- * and call read_cr2() before calling anything else. To avoid calling any
- * kind of tracing machinery before we've observed the CR2 value.
- *
- * exception_{enter,exit}() contains all sorts of tracepoints.
- */
-dotraplinkage void notrace
-do_page_fault(struct pt_regs *regs, unsigned long error_code)
+dotraplinkage void
+do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
 {
-	unsigned long address = read_cr2(); /* Get the faulting address */
 	enum ctx_state prev_state;
 
 	prev_state = exception_enter();
-	if (trace_pagefault_enabled())
-		trace_page_fault_entries(address, regs, error_code);
-
+	trace_page_fault_entries(regs, error_code, address);
 	__do_page_fault(regs, error_code, address);
 	exception_exit(prev_state);
 }
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index e0df96fdfe46..fece30ca8b0c 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -15,6 +15,10 @@
 #include <linux/dma-direct.h>
 #include <linux/swiotlb.h>
 #include <linux/mem_encrypt.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/tlbflush.h>
 #include <asm/fixmap.h>
@@ -41,7 +45,7 @@ EXPORT_SYMBOL_GPL(sev_enable_key);
 bool sev_enabled __section(.data);
 
 /* Buffer used for early in-place encryption by BSP, no locking needed */
-static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE);
+static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE);
 
 /*
  * This routine does not change the underlying encryption setting of the
@@ -348,6 +352,32 @@ bool sev_active(void)
 }
 EXPORT_SYMBOL(sev_active);
 
+/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
+bool force_dma_unencrypted(struct device *dev)
+{
+	/*
+	 * For SEV, all DMA must be to unencrypted addresses.
+	 */
+	if (sev_active())
+		return true;
+
+	/*
+	 * For SME, all DMA must be to unencrypted addresses if the
+	 * device does not support DMA to addresses that include the
+	 * encryption mask.
+	 */
+	if (sme_active()) {
+		u64 dma_enc_mask = DMA_BIT_MASK(__ffs64(sme_me_mask));
+		u64 dma_dev_mask = min_not_zero(dev->coherent_dma_mask,
+						dev->bus_dma_mask);
+
+		if (dma_dev_mask <= dma_enc_mask)
+			return true;
+	}
+
+	return false;
+}
+
 /* Architecture __weak replacement functions */
 void __init mem_encrypt_free_decrypted_mem(void)
 {
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index bed6bb93c965..7ceb32821093 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -998,7 +998,8 @@ void __init xen_setup_vcpu_info_placement(void)
 			__PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
 		pv_ops.irq.irq_enable =
 			__PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
-		pv_ops.mmu.read_cr2 = xen_read_cr2_direct;
+		pv_ops.mmu.read_cr2 =
+			__PV_IS_CALLEE_SAVE(xen_read_cr2_direct);
 	}
 }
 
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index f6e5eeecfc69..26e8b326966d 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1307,16 +1307,6 @@ static void xen_write_cr2(unsigned long cr2)
 	this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
 }
 
-static unsigned long xen_read_cr2(void)
-{
-	return this_cpu_read(xen_vcpu)->arch.cr2;
-}
-
-unsigned long xen_read_cr2_direct(void)
-{
-	return this_cpu_read(xen_vcpu_info.arch.cr2);
-}
-
 static noinline void xen_flush_tlb(void)
 {
 	struct mmuext_op *op;
@@ -2397,7 +2387,7 @@ static void xen_leave_lazy_mmu(void)
 }
 
 static const struct pv_mmu_ops xen_mmu_ops __initconst = {
-	.read_cr2 = xen_read_cr2,
+	.read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2),
 	.write_cr2 = xen_write_cr2,
 
 	.read_cr3 = xen_read_cr3,
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 8019edd0125c..be104eef80be 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -10,6 +10,7 @@
 #include <asm/percpu.h>
 #include <asm/processor-flags.h>
 #include <asm/frame.h>
+#include <asm/asm.h>
 
 #include <linux/linkage.h>
 
@@ -135,3 +136,18 @@ ENTRY(check_events)
 	FRAME_END
 	ret
 ENDPROC(check_events)
+
+ENTRY(xen_read_cr2)
+	FRAME_BEGIN
+	_ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
+	_ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
+	FRAME_END
+	ret
+	ENDPROC(xen_read_cr2);
+
+ENTRY(xen_read_cr2_direct)
+	FRAME_BEGIN
+	_ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
+	FRAME_END
+	ret
+	ENDPROC(xen_read_cr2_direct);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2f111f47ba98..45a441c33d6d 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -134,6 +134,9 @@ __visible void xen_irq_disable_direct(void);
 __visible unsigned long xen_save_fl_direct(void);
 __visible void xen_restore_fl_direct(unsigned long);
 
+__visible unsigned long xen_read_cr2(void);
+__visible unsigned long xen_read_cr2_direct(void);
+
 /* These are not functions, and cannot be called normally */
 __visible void xen_iret(void);
 __visible void xen_sysret32(void);
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 72860325245a..586fcfe227ea 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -3354,38 +3354,57 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq)
  * there is no active group, then the primary expectation for
  * this device is probably a high throughput.
  *
- * We are now left only with explaining the additional
- * compound condition that is checked below for deciding
- * whether the scenario is asymmetric. To explain this
- * compound condition, we need to add that the function
+ * We are now left only with explaining the two sub-conditions in the
+ * additional compound condition that is checked below for deciding
+ * whether the scenario is asymmetric. To explain the first
+ * sub-condition, we need to add that the function
  * bfq_asymmetric_scenario checks the weights of only
- * non-weight-raised queues, for efficiency reasons (see
- * comments on bfq_weights_tree_add()). Then the fact that
- * bfqq is weight-raised is checked explicitly here. More
- * precisely, the compound condition below takes into account
- * also the fact that, even if bfqq is being weight-raised,
- * the scenario is still symmetric if all queues with requests
- * waiting for completion happen to be
- * weight-raised. Actually, we should be even more precise
- * here, and differentiate between interactive weight raising
- * and soft real-time weight raising.
+ * non-weight-raised queues, for efficiency reasons (see comments on
+ * bfq_weights_tree_add()). Then the fact that bfqq is weight-raised
+ * is checked explicitly here. More precisely, the compound condition
+ * below takes into account also the fact that, even if bfqq is being
+ * weight-raised, the scenario is still symmetric if all queues with
+ * requests waiting for completion happen to be
+ * weight-raised. Actually, we should be even more precise here, and
+ * differentiate between interactive weight raising and soft real-time
+ * weight raising.
+ *
+ * The second sub-condition checked in the compound condition is
+ * whether there is a fair amount of already in-flight I/O not
+ * belonging to bfqq. If so, I/O dispatching is to be plugged, for the
+ * following reason. The drive may decide to serve in-flight
+ * non-bfqq's I/O requests before bfqq's ones, thereby delaying the
+ * arrival of new I/O requests for bfqq (recall that bfqq is sync). If
+ * I/O-dispatching is not plugged, then, while bfqq remains empty, a
+ * basically uncontrolled amount of I/O from other queues may be
+ * dispatched too, possibly causing the service of bfqq's I/O to be
+ * delayed even longer in the drive. This problem gets more and more
+ * serious as the speed and the queue depth of the drive grow,
+ * because, as these two quantities grow, the probability to find no
+ * queue busy but many requests in flight grows too. By contrast,
+ * plugging I/O dispatching minimizes the delay induced by already
+ * in-flight I/O, and enables bfqq to recover the bandwidth it may
+ * lose because of this delay.
  *
  * As a side note, it is worth considering that the above
- * device-idling countermeasures may however fail in the
- * following unlucky scenario: if idling is (correctly)
- * disabled in a time period during which all symmetry
- * sub-conditions hold, and hence the device is allowed to
- * enqueue many requests, but at some later point in time some
- * sub-condition stops to hold, then it may become impossible
- * to let requests be served in the desired order until all
- * the requests already queued in the device have been served.
+ * device-idling countermeasures may however fail in the following
+ * unlucky scenario: if I/O-dispatch plugging is (correctly) disabled
+ * in a time period during which all symmetry sub-conditions hold, and
+ * therefore the device is allowed to enqueue many requests, but at
+ * some later point in time some sub-condition stops to hold, then it
+ * may become impossible to make requests be served in the desired
+ * order until all the requests already queued in the device have been
+ * served. The last sub-condition commented above somewhat mitigates
+ * this problem for weight-raised queues.
  */
 static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd,
 						 struct bfq_queue *bfqq)
 {
 	return (bfqq->wr_coeff > 1 &&
-		bfqd->wr_busy_queues <
-		bfq_tot_busy_queues(bfqd)) ||
+		(bfqd->wr_busy_queues <
+		 bfq_tot_busy_queues(bfqd) ||
+		 bfqd->rq_in_driver >=
+		 bfqq->dispatched + 4)) ||
 		bfq_asymmetric_scenario(bfqd, bfqq);
 }
 
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 24ed26957367..55a7dc227dfb 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -54,7 +54,7 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
 
 static LIST_HEAD(all_blkcgs);		/* protected by blkcg_pol_mutex */
 
-static bool blkcg_debug_stats = false;
+bool blkcg_debug_stats = false;
 static struct workqueue_struct *blkcg_punt_bio_wq;
 
 static bool blkcg_policy_enabled(struct request_queue *q,
@@ -944,10 +944,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 					 dbytes, dios);
 		}
 
-		if (!blkcg_debug_stats)
-			goto next;
-
-		if (atomic_read(&blkg->use_delay)) {
+		if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
 			has_stats = true;
 			off += scnprintf(buf+off, size-off,
 					 " use_delay=%d delay_nsec=%llu",
@@ -967,7 +964,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 				has_stats = true;
 			off += written;
 		}
-next:
+
 		if (has_stats) {
 			if (off < size - 1) {
 				off += scnprintf(buf+off, size-off, "\n");
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index d973c38ee4fd..0fff7b56df0e 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -917,6 +917,9 @@ static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf,
 	unsigned long long avg_lat;
 	unsigned long long cur_win;
 
+	if (!blkcg_debug_stats)
+		return 0;
+
 	if (iolat->ssd)
 		return iolatency_ssd_stat(iolat, buf, size);
 
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index cf22ab00fefb..126021fc3a11 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -61,15 +61,6 @@ static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
 		e->type->ops.completed_request(rq, now);
 }
 
-static inline void blk_mq_sched_started_request(struct request *rq)
-{
-	struct request_queue *q = rq->q;
-	struct elevator_queue *e = q->elevator;
-
-	if (e && e->type->ops.started_request)
-		e->type->ops.started_request(rq);
-}
-
 static inline void blk_mq_sched_requeue_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b038ec680e84..f78d3287dd82 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -669,8 +669,6 @@ void blk_mq_start_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 
-	blk_mq_sched_started_request(rq);
-
 	trace_block_rq_issue(q, rq);
 
 	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
@@ -1960,9 +1958,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	rq = blk_mq_get_request(q, bio, &data);
 	if (unlikely(!rq)) {
 		rq_qos_cleanup(q, bio);
-		if (bio->bi_opf & REQ_NOWAIT)
+
+		cookie = BLK_QC_T_NONE;
+		if (bio->bi_opf & REQ_NOWAIT_INLINE)
+			cookie = BLK_QC_T_EAGAIN;
+		else if (bio->bi_opf & REQ_NOWAIT)
 			bio_wouldblock_error(bio);
-		return BLK_QC_T_NONE;
+		return cookie;
 	}
 
 	trace_block_getrq(q, bio, bio->bi_opf);
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 659ccb8b693f..3954c0dc1443 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -202,6 +202,7 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr,
 		return -1;
 
 	data->got_token = true;
+	smp_wmb();
 	list_del_init(&curr->entry);
 	wake_up_process(data->task);
 	return 1;
@@ -244,7 +245,9 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
 		return;
 
 	prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE);
+	has_sleeper = !wq_has_single_sleeper(&rqw->wait);
 	do {
+		/* The memory barrier in set_task_state saves us here. */
 		if (data.got_token)
 			break;
 		if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
@@ -255,12 +258,14 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
 			 * which means we now have two. Put our local token
 			 * and wake anyone else potentially waiting for one.
 			 */
+			smp_rmb();
 			if (data.got_token)
 				cleanup_cb(rqw, private_data);
 			break;
 		}
 		io_schedule();
-		has_sleeper = false;
+		has_sleeper = true;
+		set_current_state(TASK_UNINTERRUPTIBLE);
 	} while (1);
 	finish_wait(&rqw->wait, &data.wq);
 }
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 2ae348c101a0..2c1831207a8f 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -752,7 +752,8 @@ void blk_queue_virt_boundary(struct request_queue *q, unsigned long mask)
 	 * page (which might not be idential to the Linux PAGE_SIZE).  Because
 	 * of that they are not limited by our notion of "segment size".
 	 */
-	q->limits.max_segment_size = UINT_MAX;
+	if (mask)
+		q->limits.max_segment_size = UINT_MAX;
 }
 EXPORT_SYMBOL(blk_queue_virt_boundary);
 
diff --git a/block/genhd.c b/block/genhd.c
index 97887e59f3b2..54f1f0d381f4 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1969,7 +1969,7 @@ static const struct attribute *disk_events_attrs[] = {
  * The default polling interval can be specified by the kernel
  * parameter block.events_dfl_poll_msecs which defaults to 0
  * (disable).  This can also be modified runtime by writing to
- * /sys/module/block/events_dfl_poll_msecs.
+ * /sys/module/block/parameters/events_dfl_poll_msecs.
  */
 static int disk_events_set_dfl_poll_msecs(const char *val,
 					  const struct kernel_param *kp)
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index d4551e33fa71..8569b79e8b58 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -611,8 +611,8 @@ static int iort_dev_find_its_id(struct device *dev, u32 req_id,
 
 	/* Move to ITS specific data */
 	its = (struct acpi_iort_its_group *)node->node_data;
-	if (idx > its->its_count) {
-		dev_err(dev, "requested ITS ID index [%d] is greater than available [%d]\n",
+	if (idx >= its->its_count) {
+		dev_err(dev, "requested ITS ID index [%d] overruns ITS entries [%d]\n",
 			idx, its->its_count);
 		return -ENXIO;
 	}
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index c02fa27dd3f3..1413324982f0 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1282,7 +1282,7 @@ static ssize_t hw_error_scrub_store(struct device *dev,
 	if (rc)
 		return rc;
 
-	device_lock(dev);
+	nfit_device_lock(dev);
 	nd_desc = dev_get_drvdata(dev);
 	if (nd_desc) {
 		struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
@@ -1299,7 +1299,7 @@ static ssize_t hw_error_scrub_store(struct device *dev,
 			break;
 		}
 	}
-	device_unlock(dev);
+	nfit_device_unlock(dev);
 	if (rc)
 		return rc;
 	return size;
@@ -1319,7 +1319,7 @@ static ssize_t scrub_show(struct device *dev,
 	ssize_t rc = -ENXIO;
 	bool busy;
 
-	device_lock(dev);
+	nfit_device_lock(dev);
 	nd_desc = dev_get_drvdata(dev);
 	if (!nd_desc) {
 		device_unlock(dev);
@@ -1339,7 +1339,7 @@ static ssize_t scrub_show(struct device *dev,
 	}
 
 	mutex_unlock(&acpi_desc->init_mutex);
-	device_unlock(dev);
+	nfit_device_unlock(dev);
 	return rc;
 }
 
@@ -1356,14 +1356,14 @@ static ssize_t scrub_store(struct device *dev,
 	if (val != 1)
 		return -EINVAL;
 
-	device_lock(dev);
+	nfit_device_lock(dev);
 	nd_desc = dev_get_drvdata(dev);
 	if (nd_desc) {
 		struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 
 		rc = acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG);
 	}
-	device_unlock(dev);
+	nfit_device_unlock(dev);
 	if (rc)
 		return rc;
 	return size;
@@ -1749,9 +1749,9 @@ static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data)
 	struct acpi_device *adev = data;
 	struct device *dev = &adev->dev;
 
-	device_lock(dev->parent);
+	nfit_device_lock(dev->parent);
 	__acpi_nvdimm_notify(dev, event);
-	device_unlock(dev->parent);
+	nfit_device_unlock(dev->parent);
 }
 
 static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method)
@@ -3457,8 +3457,8 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
 	struct device *dev = acpi_desc->dev;
 
 	/* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
-	device_lock(dev);
-	device_unlock(dev);
+	nfit_device_lock(dev);
+	nfit_device_unlock(dev);
 
 	/* Bounce the init_mutex to complete initial registration */
 	mutex_lock(&acpi_desc->init_mutex);
@@ -3602,8 +3602,8 @@ void acpi_nfit_shutdown(void *data)
 	 * acpi_nfit_ars_rescan() submissions have had a chance to
 	 * either submit or see ->cancel set.
 	 */
-	device_lock(bus_dev);
-	device_unlock(bus_dev);
+	nfit_device_lock(bus_dev);
+	nfit_device_unlock(bus_dev);
 
 	flush_workqueue(nfit_wq);
 }
@@ -3746,9 +3746,9 @@ EXPORT_SYMBOL_GPL(__acpi_nfit_notify);
 
 static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
 {
-	device_lock(&adev->dev);
+	nfit_device_lock(&adev->dev);
 	__acpi_nfit_notify(&adev->dev, adev->handle, event);
-	device_unlock(&adev->dev);
+	nfit_device_unlock(&adev->dev);
 }
 
 static const struct acpi_device_id acpi_nfit_ids[] = {
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 6ee2b02af73e..24241941181c 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -312,6 +312,30 @@ static inline struct acpi_nfit_desc *to_acpi_desc(
 	return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
 }
 
+#ifdef CONFIG_PROVE_LOCKING
+static inline void nfit_device_lock(struct device *dev)
+{
+	device_lock(dev);
+	mutex_lock(&dev->lockdep_mutex);
+}
+
+static inline void nfit_device_unlock(struct device *dev)
+{
+	mutex_unlock(&dev->lockdep_mutex);
+	device_unlock(dev);
+}
+#else
+static inline void nfit_device_lock(struct device *dev)
+{
+	device_lock(dev);
+}
+
+static inline void nfit_device_unlock(struct device *dev)
+{
+	device_unlock(dev);
+}
+#endif
+
 const guid_t *to_nfit_uuid(enum nfit_uuids id);
 int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
 void acpi_nfit_shutdown(void *data);
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 72312ad2e142..3a36e76eca83 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -408,7 +408,6 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev,
 	hpriv->mmio = devm_ioremap_resource(dev,
 			      platform_get_resource(pdev, IORESOURCE_MEM, 0));
 	if (IS_ERR(hpriv->mmio)) {
-		dev_err(dev, "no mmio space\n");
 		rc = PTR_ERR(hpriv->mmio);
 		goto err_out;
 	}
diff --git a/drivers/base/core.c b/drivers/base/core.c
index da84a73f2ba6..636058bbf48a 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1663,6 +1663,9 @@ void device_initialize(struct device *dev)
 	kobject_init(&dev->kobj, &device_ktype);
 	INIT_LIST_HEAD(&dev->dma_pools);
 	mutex_init(&dev->mutex);
+#ifdef CONFIG_PROVE_LOCKING
+	mutex_init(&dev->lockdep_mutex);
+#endif
 	lockdep_set_novalidate_class(&dev->mutex);
 	spin_lock_init(&dev->devres_lock);
 	INIT_LIST_HEAD(&dev->devres_head);
@@ -2211,6 +2214,24 @@ void put_device(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(put_device);
 
+bool kill_device(struct device *dev)
+{
+	/*
+	 * Require the device lock and set the "dead" flag to guarantee that
+	 * the update behavior is consistent with the other bitfields near
+	 * it and that we cannot have an asynchronous probe routine trying
+	 * to run while we are tearing out the bus/class/sysfs from
+	 * underneath the device.
+	 */
+	lockdep_assert_held(&dev->mutex);
+
+	if (dev->p->dead)
+		return false;
+	dev->p->dead = true;
+	return true;
+}
+EXPORT_SYMBOL_GPL(kill_device);
+
 /**
  * device_del - delete device from system.
  * @dev: device.
@@ -2230,15 +2251,8 @@ void device_del(struct device *dev)
 	struct kobject *glue_dir = NULL;
 	struct class_interface *class_intf;
 
-	/*
-	 * Hold the device lock and set the "dead" flag to guarantee that
-	 * the update behavior is consistent with the other bitfields near
-	 * it and that we cannot have an asynchronous probe routine trying
-	 * to run while we are tearing out the bus/class/sysfs from
-	 * underneath the device.
-	 */
 	device_lock(dev);
-	dev->p->dead = true;
+	kill_device(dev);
 	device_unlock(dev);
 
 	/* Notify clients of device removal.  This call must come
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 90ebfcae0ce6..2b3103c30857 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -5417,7 +5417,7 @@ static int drbd_do_auth(struct drbd_connection *connection)
 	unsigned int key_len;
 	char secret[SHARED_SECRET_MAX]; /* 64 byte */
 	unsigned int resp_size;
-	SHASH_DESC_ON_STACK(desc, connection->cram_hmac_tfm);
+	struct shash_desc *desc;
 	struct packet_info pi;
 	struct net_conf *nc;
 	int err, rv;
@@ -5430,6 +5430,13 @@ static int drbd_do_auth(struct drbd_connection *connection)
 	memcpy(secret, nc->shared_secret, key_len);
 	rcu_read_unlock();
 
+	desc = kmalloc(sizeof(struct shash_desc) +
+		       crypto_shash_descsize(connection->cram_hmac_tfm),
+		       GFP_KERNEL);
+	if (!desc) {
+		rv = -1;
+		goto fail;
+	}
 	desc->tfm = connection->cram_hmac_tfm;
 
 	rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
@@ -5571,7 +5578,10 @@ static int drbd_do_auth(struct drbd_connection *connection)
 	kfree(peers_ch);
 	kfree(response);
 	kfree(right_response);
-	shash_desc_zero(desc);
+	if (desc) {
+		shash_desc_zero(desc);
+		kfree(desc);
+	}
 
 	return rv;
 }
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 23553ed6b548..2d22d6bf52f2 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -248,16 +248,12 @@ static int __maybe_unused cn_proc_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct cn_dev cdev = {
-	.input   = cn_rx_skb,
-};
-
 static int cn_init(void)
 {
 	struct cn_dev *dev = &cdev;
 	struct netlink_kernel_cfg cfg = {
 		.groups	= CN_NETLINK_USERS + 0xf,
-		.input	= dev->input,
+		.input	= cn_rx_skb,
 	};
 
 	dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR, &cfg);
diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c
index 93f39a1d4c3d..c66f566a854c 100644
--- a/drivers/cpufreq/pasemi-cpufreq.c
+++ b/drivers/cpufreq/pasemi-cpufreq.c
@@ -131,10 +131,18 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	int err = -ENODEV;
 
 	cpu = of_get_cpu_node(policy->cpu, NULL);
+	if (!cpu)
+		goto out;
 
+	max_freqp = of_get_property(cpu, "clock-frequency", NULL);
 	of_node_put(cpu);
-	if (!cpu)
+	if (!max_freqp) {
+		err = -EINVAL;
 		goto out;
+	}
+
+	/* we need the freq in kHz */
+	max_freq = *max_freqp / 1000;
 
 	dn = of_find_compatible_node(NULL, NULL, "1682m-sdc");
 	if (!dn)
@@ -171,16 +179,6 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	}
 
 	pr_debug("init cpufreq on CPU %d\n", policy->cpu);
-
-	max_freqp = of_get_property(cpu, "clock-frequency", NULL);
-	if (!max_freqp) {
-		err = -EINVAL;
-		goto out_unmap_sdcpwr;
-	}
-
-	/* we need the freq in kHz */
-	max_freq = *max_freqp / 1000;
-
 	pr_debug("max clock-frequency is at %u kHz\n", max_freq);
 	pr_debug("initializing frequency table\n");
 
@@ -199,9 +197,6 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	cpufreq_generic_init(policy, pas_freqs, get_gizmo_latency());
 	return 0;
 
-out_unmap_sdcpwr:
-	iounmap(sdcpwr_mapbase);
-
 out_unmap_sdcasr:
 	iounmap(sdcasr_mapbase);
 out:
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 53446e39a32c..ba8d3d0ef32c 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -157,7 +157,7 @@ config DMI_SCAN_MACHINE_NON_EFI_FALLBACK
 
 config ISCSI_IBFT_FIND
 	bool "iSCSI Boot Firmware Table Attributes"
-	depends on X86 && ACPI
+	depends on X86 && ISCSI_IBFT
 	default n
 	help
 	  This option enables the kernel to find the region of memory
@@ -168,7 +168,8 @@ config ISCSI_IBFT_FIND
 config ISCSI_IBFT
 	tristate "iSCSI Boot Firmware Table Attributes module"
 	select ISCSI_BOOT_SYSFS
-	depends on ISCSI_IBFT_FIND && SCSI && SCSI_LOWLEVEL
+	select ISCSI_IBFT_FIND if X86
+	depends on ACPI && SCSI && SCSI_LOWLEVEL
 	default	n
 	help
 	  This option enables support for detection and exposing of iSCSI
diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c
index ab3aa3983833..7e12cbdf957c 100644
--- a/drivers/firmware/iscsi_ibft.c
+++ b/drivers/firmware/iscsi_ibft.c
@@ -84,6 +84,10 @@ MODULE_DESCRIPTION("sysfs interface to BIOS iBFT information");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(IBFT_ISCSI_VERSION);
 
+#ifndef CONFIG_ISCSI_IBFT_FIND
+struct acpi_table_ibft *ibft_addr;
+#endif
+
 struct ibft_hdr {
 	u8 id;
 	u8 version;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f2e8b4238efd..5376328d3fd0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -148,7 +148,7 @@ struct amdgpu_mgpu_info mgpu_info = {
 	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
 };
 int amdgpu_ras_enable = -1;
-uint amdgpu_ras_mask = 0xffffffff;
+uint amdgpu_ras_mask = 0xfffffffb;
 
 /**
  * DOC: vramlimit (int)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 8b7efd0a7028..03ca8c69114f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -1734,7 +1734,7 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
 		return -EINVAL;
 
 	if (is_support_sw_smu(adev)) {
-		err = smu_get_current_rpm(&adev->smu, &speed);
+		err = smu_get_fan_speed_rpm(&adev->smu, &speed);
 		if (err)
 			return err;
 	} else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
@@ -1794,7 +1794,7 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
 		return -EINVAL;
 
 	if (is_support_sw_smu(adev)) {
-		err = smu_get_current_rpm(&adev->smu, &rpm);
+		err = smu_get_fan_speed_rpm(&adev->smu, &rpm);
 		if (err)
 			return err;
 	} else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1a4412e47810..fac7aa2c244f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -136,11 +136,6 @@ static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev,
 static int amdgpu_ras_release_vram(struct amdgpu_device *adev,
 		struct amdgpu_bo **bo_ptr);
 
-static void amdgpu_ras_self_test(struct amdgpu_device *adev)
-{
-	/* TODO */
-}
-
 static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 {
@@ -689,6 +684,12 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
 	if (!obj)
 		return -EINVAL;
 
+	if (block_info.block_id != TA_RAS_BLOCK__UMC) {
+		DRM_INFO("%s error injection is not supported yet\n",
+			 ras_block_str(info->head.block));
+		return -EINVAL;
+	}
+
 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
 	if (ret)
 		DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n",
@@ -1557,6 +1558,12 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 
 	amdgpu_ras_check_supported(adev, &con->hw_supported,
 			&con->supported);
+	if (!con->hw_supported) {
+		amdgpu_ras_set_context(adev, NULL);
+		kfree(con);
+		return 0;
+	}
+
 	con->features = 0;
 	INIT_LIST_HEAD(&con->head);
 	/* Might need get this flag from vbios. */
@@ -1570,8 +1577,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 	if (amdgpu_ras_fs_init(adev))
 		goto fs_out;
 
-	amdgpu_ras_self_test(adev);
-
 	DRM_INFO("RAS INFO: ras initialized successfully, "
 			"hardware ability[%x] ras_mask[%x]\n",
 			con->hw_supported, con->supported);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 1675d5837c3c..3df50c9c2fb0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -1441,6 +1441,15 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
 	}
 	nv_grbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);
+
+	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
+	   acccess. These should be enabled by FW for target VMIDs. */
+	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
+		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
+		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
+		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
+	}
 }
 
 static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 0db9f488da7e..21187275dfd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1879,6 +1879,15 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
 	}
 	cik_srbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);
+
+	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
+	   acccess. These should be enabled by FW for target VMIDs. */
+	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+		WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
+		WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
+		WREG32(amdgpu_gds_reg_offset[i].gws, 0);
+		WREG32(amdgpu_gds_reg_offset[i].oa, 0);
+	}
 }
 
 static void gfx_v7_0_config_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 5f401b41ef7c..751567f78567 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -3706,6 +3706,15 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
 	}
 	vi_srbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);
+
+	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
+	   acccess. These should be enabled by FW for target VMIDs. */
+	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+		WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
+		WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
+		WREG32(amdgpu_gds_reg_offset[i].gws, 0);
+		WREG32(amdgpu_gds_reg_offset[i].oa, 0);
+	}
 }
 
 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index f4c4eea62526..1cf639a51178 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1918,6 +1918,15 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
 	}
 	soc15_grbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);
+
+	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
+	   acccess. These should be enabled by FW for target VMIDs. */
+	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
+		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
+		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
+		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
+	}
 }
 
 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index 988c0adaca91..1cfc2620b2dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -372,11 +372,8 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev)
 		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
 			upper_32_bits(adev->vcn.gpu_addr));
 		offset = size;
-		/* No signed header for now from firmware
 		WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
 			AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
-		*/
-		WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0);
 	}
 
 	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c
index c1a92c16535c..5cc3acccda2a 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c
@@ -262,12 +262,12 @@ void dce110_clk_mgr_construct(
 		struct dc_context *ctx,
 		struct clk_mgr_internal *clk_mgr)
 {
+	dce_clk_mgr_construct(ctx, clk_mgr);
+
 	memcpy(clk_mgr->max_clks_by_state,
 		dce110_max_clks_by_state,
 		sizeof(dce110_max_clks_by_state));
 
-	dce_clk_mgr_construct(ctx, clk_mgr);
-
 	clk_mgr->regs = &disp_clk_regs;
 	clk_mgr->clk_mgr_shift = &disp_clk_shift;
 	clk_mgr->clk_mgr_mask = &disp_clk_mask;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c
index 778392c73187..7c746ef1e32e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c
@@ -226,12 +226,12 @@ void dce112_clk_mgr_construct(
 		struct dc_context *ctx,
 		struct clk_mgr_internal *clk_mgr)
 {
+	dce_clk_mgr_construct(ctx, clk_mgr);
+
 	memcpy(clk_mgr->max_clks_by_state,
 		dce112_max_clks_by_state,
 		sizeof(dce112_max_clks_by_state));
 
-	dce_clk_mgr_construct(ctx, clk_mgr);
-
 	clk_mgr->regs = &disp_clk_regs;
 	clk_mgr->clk_mgr_shift = &disp_clk_shift;
 	clk_mgr->clk_mgr_mask = &disp_clk_mask;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c
index 906310c3e2eb..5399b8cf6b75 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c
@@ -127,12 +127,12 @@ static struct clk_mgr_funcs dce120_funcs = {
 
 void dce120_clk_mgr_construct(struct dc_context *ctx, struct clk_mgr_internal *clk_mgr)
 {
+	dce_clk_mgr_construct(ctx, clk_mgr);
+
 	memcpy(clk_mgr->max_clks_by_state,
 		dce120_max_clks_by_state,
 		sizeof(dce120_max_clks_by_state));
 
-	dce_clk_mgr_construct(ctx, clk_mgr);
-
 	clk_mgr->base.dprefclk_khz = 600000;
 	clk_mgr->base.funcs = &dce120_funcs;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
index 08a774fc7b67..50bfb5921de0 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
@@ -301,6 +301,8 @@ void dcn2_update_clocks_fpga(struct clk_mgr *clk_mgr,
 void dcn2_init_clocks(struct clk_mgr *clk_mgr)
 {
 	memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
+	// Assumption is that boot state always supports pstate
+	clk_mgr->clks.p_state_change_support = true;
 }
 
 void dcn2_enable_pme_wa(struct clk_mgr *clk_mgr_base)
@@ -331,6 +333,7 @@ void dcn20_clk_mgr_construct(
 		struct dccg *dccg)
 {
 	clk_mgr->base.ctx = ctx;
+	clk_mgr->pp_smu = pp_smu;
 	clk_mgr->base.funcs = &dcn2_funcs;
 	clk_mgr->regs = &clk_mgr_regs;
 	clk_mgr->clk_mgr_shift = &clk_mgr_shift;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 4ef4dc63e221..fa20201eef3a 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -502,8 +502,10 @@ void dc_stream_set_static_screen_events(struct dc *dc,
 
 static void destruct(struct dc *dc)
 {
-	dc_release_state(dc->current_state);
-	dc->current_state = NULL;
+	if (dc->current_state) {
+		dc_release_state(dc->current_state);
+		dc->current_state = NULL;
+	}
 
 	destroy_links(dc);
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 8dbf759eba45..355b4ba12796 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -532,6 +532,7 @@ static void read_edp_current_link_settings_on_detect(struct dc_link *link)
 	uint32_t read_dpcd_retry_cnt = 10;
 	enum dc_status status = DC_ERROR_UNEXPECTED;
 	int i;
+	union max_down_spread max_down_spread = { {0} };
 
 	// Read DPCD 00101h to find out the number of lanes currently set
 	for (i = 0; i < read_dpcd_retry_cnt; i++) {
@@ -553,8 +554,6 @@ static void read_edp_current_link_settings_on_detect(struct dc_link *link)
 		msleep(8);
 	}
 
-	ASSERT(status == DC_OK);
-
 	// Read DPCD 00100h to find if standard link rates are set
 	core_link_read_dpcd(link, DP_LINK_BW_SET,
 			&link_bw_set, sizeof(link_bw_set));
@@ -576,6 +575,12 @@ static void read_edp_current_link_settings_on_detect(struct dc_link *link)
 		link->cur_link_settings.link_rate = link_bw_set;
 		link->cur_link_settings.use_link_rate_set = false;
 	}
+	// Read DPCD 00003h to find the max down spread.
+	core_link_read_dpcd(link, DP_MAX_DOWNSPREAD,
+			&max_down_spread.raw, sizeof(max_down_spread));
+	link->cur_link_settings.link_spread =
+		max_down_spread.bits.MAX_DOWN_SPREAD ?
+		LINK_SPREAD_05_DOWNSPREAD_30KHZ : LINK_SPREAD_DISABLED;
 }
 
 static bool detect_dp(
@@ -717,13 +722,6 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
 		return false;
 	}
 
-	if (link->connector_signal == SIGNAL_TYPE_EDP) {
-		/* On detect, we want to make sure current link settings are
-		 * up to date, especially if link was powered on by GOP.
-		 */
-		read_edp_current_link_settings_on_detect(link);
-	}
-
 	prev_sink = link->local_sink;
 	if (prev_sink != NULL) {
 		dc_sink_retain(prev_sink);
@@ -765,6 +763,7 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
 		}
 
 		case SIGNAL_TYPE_EDP: {
+			read_edp_current_link_settings_on_detect(link);
 			detect_edp_sink_caps(link);
 			sink_caps.transaction_type =
 				DDC_TRANSACTION_TYPE_I2C_OVER_AUX;
@@ -2329,7 +2328,7 @@ bool dc_link_set_backlight_level(const struct dc_link *link,
 			if (core_dc->current_state->res_ctx.pipe_ctx[i].stream) {
 				if (core_dc->current_state->res_ctx.
 						pipe_ctx[i].stream->link
-						== link)
+						== link) {
 					/* DMCU -1 for all controller id values,
 					 * therefore +1 here
 					 */
@@ -2337,6 +2336,13 @@ bool dc_link_set_backlight_level(const struct dc_link *link,
 						core_dc->current_state->
 						res_ctx.pipe_ctx[i].stream_res.tg->inst +
 						1;
+
+					/* Disable brightness ramping when the display is blanked
+					 * as it can hang the DMCU
+					 */
+					if (core_dc->current_state->res_ctx.pipe_ctx[i].plane_state == NULL)
+						frame_ramp = 0;
+				}
 			}
 		}
 		abm->funcs->set_backlight_level_pwm(
@@ -2984,8 +2990,10 @@ void dc_link_set_preferred_link_settings(struct dc *dc,
 
 	/* Retrain with preferred link settings only relevant for
 	 * DP signal type
+	 * Check for non-DP signal or if passive dongle present
 	 */
-	if (!dc_is_dp_signal(link->connector_signal))
+	if (!dc_is_dp_signal(link->connector_signal) ||
+		link->dongle_max_pix_clk > 0)
 		return;
 
 	for (i = 0; i < MAX_PIPES; i++) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 056be4c34a98..2c7aaed907b9 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -2230,18 +2230,25 @@ static void get_active_converter_info(
 		link->dpcd_caps.dongle_type = DISPLAY_DONGLE_NONE;
 		ddc_service_set_dongle_type(link->ddc,
 				link->dpcd_caps.dongle_type);
+		link->dpcd_caps.is_branch_dev = false;
 		return;
 	}
 
 	/* DPCD 0x5 bit 0 = 1, it indicate it's branch device */
-	link->dpcd_caps.is_branch_dev = ds_port.fields.PORT_PRESENT;
+	if (ds_port.fields.PORT_TYPE == DOWNSTREAM_DP) {
+		link->dpcd_caps.is_branch_dev = false;
+	}
+
+	else {
+		link->dpcd_caps.is_branch_dev = ds_port.fields.PORT_PRESENT;
+	}
 
 	switch (ds_port.fields.PORT_TYPE) {
 	case DOWNSTREAM_VGA:
 		link->dpcd_caps.dongle_type = DISPLAY_DONGLE_DP_VGA_CONVERTER;
 		break;
-	case DOWNSTREAM_DVI_HDMI:
-		/* At this point we don't know is it DVI or HDMI,
+	case DOWNSTREAM_DVI_HDMI_DP_PLUS_PLUS:
+		/* At this point we don't know is it DVI or HDMI or DP++,
 		 * assume DVI.*/
 		link->dpcd_caps.dongle_type = DISPLAY_DONGLE_DP_DVI_CONVERTER;
 		break;
@@ -2258,6 +2265,10 @@ static void get_active_converter_info(
 				det_caps, sizeof(det_caps));
 
 		switch (port_caps->bits.DWN_STRM_PORTX_TYPE) {
+		/*Handle DP case as DONGLE_NONE*/
+		case DOWN_STREAM_DETAILED_DP:
+			link->dpcd_caps.dongle_type = DISPLAY_DONGLE_NONE;
+			break;
 		case DOWN_STREAM_DETAILED_VGA:
 			link->dpcd_caps.dongle_type =
 				DISPLAY_DONGLE_DP_VGA_CONVERTER;
@@ -2267,6 +2278,8 @@ static void get_active_converter_info(
 				DISPLAY_DONGLE_DP_DVI_CONVERTER;
 			break;
 		case DOWN_STREAM_DETAILED_HDMI:
+		case DOWN_STREAM_DETAILED_DP_PLUS_PLUS:
+			/*Handle DP++ active converter case, process DP++ case as HDMI case according DP1.4 spec*/
 			link->dpcd_caps.dongle_type =
 				DISPLAY_DONGLE_DP_HDMI_CONVERTER;
 
@@ -2282,14 +2295,18 @@ static void get_active_converter_info(
 
 				link->dpcd_caps.dongle_caps.is_dp_hdmi_s3d_converter =
 					hdmi_caps.bits.FRAME_SEQ_TO_FRAME_PACK;
-				link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr422_pass_through =
-					hdmi_caps.bits.YCrCr422_PASS_THROUGH;
-				link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr420_pass_through =
-					hdmi_caps.bits.YCrCr420_PASS_THROUGH;
-				link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr422_converter =
-					hdmi_caps.bits.YCrCr422_CONVERSION;
-				link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr420_converter =
-					hdmi_caps.bits.YCrCr420_CONVERSION;
+				/*YCBCR capability only for HDMI case*/
+				if (port_caps->bits.DWN_STRM_PORTX_TYPE
+						== DOWN_STREAM_DETAILED_HDMI) {
+					link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr422_pass_through =
+							hdmi_caps.bits.YCrCr422_PASS_THROUGH;
+					link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr420_pass_through =
+							hdmi_caps.bits.YCrCr420_PASS_THROUGH;
+					link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr422_converter =
+							hdmi_caps.bits.YCrCr422_CONVERSION;
+					link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr420_converter =
+							hdmi_caps.bits.YCrCr420_CONVERSION;
+				}
 
 				link->dpcd_caps.dongle_caps.dp_hdmi_max_bpc =
 					translate_dpcd_max_bpc(
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 51a78283a86d..2ceaab4fb5de 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -258,7 +258,7 @@ bool resource_construct(
 		 * PORT_CONNECTIVITY == 1 (as instructed by HW team).
 		 */
 		update_num_audio(&straps, &num_audio, &pool->audio_support);
-		for (i = 0; i < pool->pipe_count && i < num_audio; i++) {
+		for (i = 0; i < caps->num_audio; i++) {
 			struct audio *aud = create_funcs->create_audio(ctx, i);
 
 			if (aud == NULL) {
@@ -1669,6 +1669,12 @@ static struct audio *find_first_free_audio(
 			return pool->audios[i];
 		}
 	}
+
+    /* use engine id to find free audio */
+	if ((id < pool->audio_count) && (res_ctx->is_audio_acquired[id] == false)) {
+		return pool->audios[id];
+	}
+
 	/*not found the matching one, first come first serve*/
 	for (i = 0; i < pool->audio_count; i++) {
 		if (res_ctx->is_audio_acquired[i] == false) {
@@ -1833,6 +1839,7 @@ static int get_norm_pix_clk(const struct dc_crtc_timing *timing)
 		pix_clk /= 2;
 	if (timing->pixel_encoding != PIXEL_ENCODING_YCBCR422) {
 		switch (timing->display_color_depth) {
+		case COLOR_DEPTH_666:
 		case COLOR_DEPTH_888:
 			normalized_pix_clk = pix_clk;
 			break;
@@ -1979,7 +1986,7 @@ enum dc_status resource_map_pool_resources(
 	/* TODO: Add check if ASIC support and EDID audio */
 	if (!stream->converter_disable_audio &&
 	    dc_is_audio_capable_signal(pipe_ctx->stream->signal) &&
-	    stream->audio_info.mode_count) {
+	    stream->audio_info.mode_count && stream->audio_info.flags.all) {
 		pipe_ctx->stream_res.audio = find_first_free_audio(
 		&context->res_ctx, pool, pipe_ctx->stream_res.stream_enc->id);
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index af7f8be230f7..352862370390 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -612,7 +612,8 @@ bool dc_stream_set_dynamic_metadata(struct dc *dc,
 
 	pipe_ctx->stream->dmdata_address = attr->address;
 
-	if (pipe_ctx->stream_res.stream_enc->funcs->set_dynamic_metadata != NULL) {
+	if (pipe_ctx->stream_res.stream_enc &&
+			pipe_ctx->stream_res.stream_enc->funcs->set_dynamic_metadata != NULL) {
 		if (pipe_ctx->stream->dmdata_address.quad_part != 0) {
 			/* if using dynamic meta, don't set up generic infopackets */
 			pipe_ctx->stream_res.encoder_info_frame.hdrsmd.valid = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
index f8903bcabe49..58bd131d5b48 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -239,6 +239,10 @@ static void dmcu_set_backlight_level(
 	s2 |= (backlight_8_bit << ATOM_S2_CURRENT_BL_LEVEL_SHIFT);
 
 	REG_WRITE(BIOS_SCRATCH_2, s2);
+
+	/* waitDMCUReadyForCmd */
+	REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT,
+			0, 1, 80000);
 }
 
 static void dce_abm_init(struct abm *abm)
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 858a58856ebd..fafb4b470140 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -965,11 +965,17 @@ void hwss_edp_backlight_control(
 void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx)
 {
 	/* notify audio driver for audio modes of monitor */
-	struct dc *core_dc = pipe_ctx->stream->ctx->dc;
+	struct dc *core_dc;
 	struct pp_smu_funcs *pp_smu = NULL;
-	struct clk_mgr *clk_mgr = core_dc->clk_mgr;
+	struct clk_mgr *clk_mgr;
 	unsigned int i, num_audio = 1;
 
+	if (!pipe_ctx->stream)
+		return;
+
+	core_dc = pipe_ctx->stream->ctx->dc;
+	clk_mgr = core_dc->clk_mgr;
+
 	if (pipe_ctx->stream_res.audio && pipe_ctx->stream_res.audio->enabled == true)
 		return;
 
@@ -999,9 +1005,15 @@ void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx)
 
 void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx, int option)
 {
-	struct dc *dc = pipe_ctx->stream->ctx->dc;
+	struct dc *dc;
 	struct pp_smu_funcs *pp_smu = NULL;
-	struct clk_mgr *clk_mgr = dc->clk_mgr;
+	struct clk_mgr *clk_mgr;
+
+	if (!pipe_ctx || !pipe_ctx->stream)
+		return;
+
+	dc = pipe_ctx->stream->ctx->dc;
+	clk_mgr = dc->clk_mgr;
 
 	if (pipe_ctx->stream_res.audio && pipe_ctx->stream_res.audio->enabled == false)
 		return;
@@ -1009,6 +1021,8 @@ void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx, int option)
 	pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control(
 			pipe_ctx->stream_res.stream_enc, true);
 	if (pipe_ctx->stream_res.audio) {
+		pipe_ctx->stream_res.audio->enabled = false;
+
 		if (dc->res_pool->pp_smu)
 			pp_smu = dc->res_pool->pp_smu;
 
@@ -1039,8 +1053,6 @@ void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx, int option)
 		/* dal_audio_disable_azalia_audio_jack_presence(stream->audio,
 		 * stream->stream_engine_id);
 		 */
-		if (pipe_ctx->stream_res.audio)
-			pipe_ctx->stream_res.audio->enabled = false;
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index e50a696fcb5d..2118ea21d7e9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -1195,16 +1195,7 @@ static void dcn10_init_hw(struct dc *dc)
 	 * everything down.
 	 */
 	if (dcb->funcs->is_accelerated_mode(dcb) || dc->config.power_down_display_on_boot) {
-		for (i = 0; i < dc->res_pool->pipe_count; i++) {
-			struct hubp *hubp = dc->res_pool->hubps[i];
-			struct dpp *dpp = dc->res_pool->dpps[i];
-
-			hubp->funcs->hubp_init(hubp);
-			dc->res_pool->opps[i]->mpc_tree_params.opp_id = dc->res_pool->opps[i]->inst;
-			plane_atomic_power_down(dc, dpp, hubp);
-		}
-
-		apply_DEGVIDCN10_253_wa(dc);
+		dc->hwss.init_pipes(dc, dc->current_state);
 	}
 
 	for (i = 0; i < dc->res_pool->audio_count; i++) {
@@ -1375,10 +1366,6 @@ static bool dcn10_set_input_transfer_func(struct pipe_ctx *pipe_ctx,
 	return result;
 }
 
-
-
-
-
 static bool
 dcn10_set_output_transfer_func(struct pipe_ctx *pipe_ctx,
 			       const struct dc_stream_state *stream)
@@ -2516,6 +2503,12 @@ static void dcn10_apply_ctx_for_surface(
 		if (removed_pipe[i])
 			dcn10_disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
 
+	for (i = 0; i < dc->res_pool->pipe_count; i++)
+		if (removed_pipe[i]) {
+			dc->hwss.optimize_bandwidth(dc, context);
+			break;
+		}
+
 	if (dc->hwseq->wa.DEGVIDCN10_254)
 		hubbub1_wm_change_req_wa(dc->res_pool->hubbub);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 1a20461c2937..a12530a3ab9c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -508,7 +508,7 @@ static const struct resource_caps rv2_res_cap = {
 		.num_audio = 3,
 		.num_stream_encoder = 3,
 		.num_pll = 3,
-		.num_ddc = 3,
+		.num_ddc = 4,
 };
 
 static const struct dc_plane_cap plane_cap = {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
index ece6e136437b..6e2dbd03f9bf 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
@@ -337,6 +337,7 @@ static enum dcn_hubbub_page_table_block_size page_table_block_size_to_hw(unsigne
 		break;
 	default:
 		ASSERT(false);
+		block_size = page_table_block_size;
 		break;
 	}
 
@@ -366,25 +367,24 @@ int hubbub2_init_dchub_sys_ctx(struct hubbub *hubbub,
 	struct dcn_vmid_page_table_config phys_config;
 
 	REG_SET(DCN_VM_FB_LOCATION_BASE, 0,
-			FB_BASE, pa_config->system_aperture.fb_base);
+			FB_BASE, pa_config->system_aperture.fb_base >> 24);
 	REG_SET(DCN_VM_FB_LOCATION_TOP, 0,
-			FB_TOP, pa_config->system_aperture.fb_top);
+			FB_TOP, pa_config->system_aperture.fb_top >> 24);
 	REG_SET(DCN_VM_FB_OFFSET, 0,
-			FB_OFFSET, pa_config->system_aperture.fb_offset);
+			FB_OFFSET, pa_config->system_aperture.fb_offset >> 24);
 	REG_SET(DCN_VM_AGP_BOT, 0,
-			AGP_BOT, pa_config->system_aperture.agp_bot);
+			AGP_BOT, pa_config->system_aperture.agp_bot >> 24);
 	REG_SET(DCN_VM_AGP_TOP, 0,
-			AGP_TOP, pa_config->system_aperture.agp_top);
+			AGP_TOP, pa_config->system_aperture.agp_top >> 24);
 	REG_SET(DCN_VM_AGP_BASE, 0,
-			AGP_BASE, pa_config->system_aperture.agp_base);
+			AGP_BASE, pa_config->system_aperture.agp_base >> 24);
 
 	if (pa_config->gart_config.page_table_start_addr != pa_config->gart_config.page_table_end_addr) {
-		phys_config.depth = 1;
-		phys_config.block_size = 4096;
 		phys_config.page_table_start_addr = pa_config->gart_config.page_table_start_addr >> 12;
 		phys_config.page_table_end_addr = pa_config->gart_config.page_table_end_addr >> 12;
 		phys_config.page_table_base_addr = pa_config->gart_config.page_table_base_addr;
-
+		phys_config.depth = 0;
+		phys_config.block_size = 0;
 		// Init VMID 0 based on PA config
 		dcn20_vmid_setup(&hubbub1->vmid[0], &phys_config);
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index 0b84a322b8a2..d810c8940129 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -1153,8 +1153,8 @@ void dcn20_enable_plane(
 
 		apt.sys_default.quad_part = 0;
 
-		apt.sys_high.quad_part = dc->vm_pa_config.system_aperture.start_addr;
-		apt.sys_low.quad_part = dc->vm_pa_config.system_aperture.end_addr;
+		apt.sys_low.quad_part = dc->vm_pa_config.system_aperture.start_addr;
+		apt.sys_high.quad_part = dc->vm_pa_config.system_aperture.end_addr;
 
 		// Program system aperture settings
 		pipe_ctx->plane_res.hubp->funcs->hubp_set_vm_system_aperture_settings(pipe_ctx->plane_res.hubp, &apt);
@@ -1242,6 +1242,8 @@ void dcn20_pipe_control_lock_global(
 				CRTC_STATE_VACTIVE);
 		pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg,
 				CRTC_STATE_VBLANK);
+		pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg,
+				CRTC_STATE_VACTIVE);
 		pipe->stream_res.tg->funcs->lock_doublebuffer_disable(
 				pipe->stream_res.tg);
 	}
@@ -1263,6 +1265,17 @@ void dcn20_pipe_control_lock(
 	if (pipe->plane_state != NULL)
 		flip_immediate = pipe->plane_state->flip_immediate;
 
+	if (flip_immediate && lock) {
+		while (pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->plane_res.hubp))	{
+			udelay(1);
+		}
+
+		if (pipe->bottom_pipe != NULL)
+			while (pipe->bottom_pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->bottom_pipe->plane_res.hubp))	{
+				udelay(1);
+			}
+	}
+
 	/* In flip immediate and pipe splitting case, we need to use GSL
 	 * for synchronization. Only do setup on locking and on flip type change.
 	 */
@@ -1740,8 +1753,11 @@ static void dcn20_reset_back_end_for_pipe(
 		else if (pipe_ctx->stream_res.audio) {
 			dc->hwss.disable_audio_stream(pipe_ctx, FREE_ACQUIRED_RESOURCE);
 		}
-
 	}
+#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
+	else if (pipe_ctx->stream_res.dsc)
+		dp_set_dsc_enable(pipe_ctx, false);
+#endif
 
 	/* by upper caller loop, parent pipe: pipe0, will be reset last.
 	 * back end share by all pipes and will be disable only when disable
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
index 26a66ccf6e72..1ae973962d53 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
@@ -535,7 +535,7 @@ void dcn20_timing_generator_init(struct optc *optc1)
 	optc1->min_h_blank = 32;
 	optc1->min_v_blank = 3;
 	optc1->min_v_blank_interlace = 5;
-	optc1->min_h_sync_width = 8;
+	optc1->min_h_sync_width = 4;//	Minimum HSYNC = 8 pixels asked By HW in the first place for no actual reason. Oculus Rift S will not light up with 8 as it's hsyncWidth is 6. Changing it to 4 to fix that issue.
 	optc1->min_v_sync_width = 1;
 	optc1->comb_opp_id = 0xf;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index d200bc3cec71..b949e202d6cb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -2643,6 +2643,10 @@ static void update_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_
 
 	if (dc->bb_overrides.min_dcfclk_mhz > 0)
 		min_dcfclk = dc->bb_overrides.min_dcfclk_mhz;
+	else
+		// Accounting for SOC/DCF relationship, we can go as high as
+		// 506Mhz in Vmin.  We need to code 507 since SMU will round down to 506.
+		min_dcfclk = 507;
 
 	for (i = 0; i < num_states; i++) {
 		int min_fclk_required_by_uclk;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.c
index 27679ef6ebe8..96c263223315 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/delay.h>
+
 #include "dcn20_vmid.h"
 #include "reg_helper.h"
 
@@ -36,6 +38,38 @@
 #define FN(reg_name, field_name) \
 	vmid->shifts->field_name, vmid->masks->field_name
 
+static void dcn20_wait_for_vmid_ready(struct dcn20_vmid *vmid)
+{
+	/* According the hardware spec, we need to poll for the lowest
+	 * bit of PAGE_TABLE_BASE_ADDR_LO32 = 1 any time a GPUVM
+	 * context is updated. We can't use REG_WAIT here since we
+	 * don't have a seperate field to wait on.
+	 *
+	 * TODO: Confirm timeout / poll interval with hardware team
+	 */
+
+	int max_times = 10000;
+	int delay_us  = 5;
+	int i;
+
+	for (i = 0; i < max_times; ++i) {
+		uint32_t entry_lo32;
+
+		REG_GET(PAGE_TABLE_BASE_ADDR_LO32,
+			VM_CONTEXT0_PAGE_DIRECTORY_ENTRY_LO32,
+			&entry_lo32);
+
+		if (entry_lo32 & 0x1)
+			return;
+
+		udelay(delay_us);
+	}
+
+	/* VM setup timed out */
+	DC_LOG_WARNING("Timeout while waiting for GPUVM context update\n");
+	ASSERT(0);
+}
+
 void dcn20_vmid_setup(struct dcn20_vmid *vmid, const struct dcn_vmid_page_table_config *config)
 {
 	REG_SET(PAGE_TABLE_START_ADDR_HI32, 0,
@@ -54,6 +88,9 @@ void dcn20_vmid_setup(struct dcn20_vmid *vmid, const struct dcn_vmid_page_table_
 
 	REG_SET(PAGE_TABLE_BASE_ADDR_HI32, 0,
 			VM_CONTEXT0_PAGE_DIRECTORY_ENTRY_HI32, (config->page_table_base_addr >> 32) & 0xFFFFFFFF);
+	/* Note: per hardware spec PAGE_TABLE_BASE_ADDR_LO32 must be programmed last in sequence */
 	REG_SET(PAGE_TABLE_BASE_ADDR_LO32, 0,
 			VM_CONTEXT0_PAGE_DIRECTORY_ENTRY_LO32, config->page_table_base_addr & 0xFFFFFFFF);
+
+	dcn20_wait_for_vmid_ready(vmid);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/drm_dsc_dc.c b/drivers/gpu/drm/amd/display/dc/dsc/drm_dsc_dc.c
index 67089765780b..340ef4d41ebd 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/drm_dsc_dc.c
+++ b/drivers/gpu/drm/amd/display/dc/dsc/drm_dsc_dc.c
@@ -377,6 +377,12 @@ int drm_dsc_compute_rc_parameters(struct drm_dsc_config *vdsc_cfg)
 	vdsc_cfg->rc_bits = (hrd_delay * vdsc_cfg->bits_per_pixel) / 16;
 	vdsc_cfg->initial_dec_delay = hrd_delay - vdsc_cfg->initial_xmit_delay;
 
+	/* As per DSC spec v1.2a recommendation: */
+	if (vdsc_cfg->native_420)
+		vdsc_cfg->second_line_offset_adj = 512;
+	else
+		vdsc_cfg->second_line_offset_adj = 0;
+
 	return 0;
 }
 EXPORT_SYMBOL(drm_dsc_compute_rc_parameters);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index c89393c19232..a148ffde8b12 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -212,7 +212,7 @@ struct resource_pool {
 	struct clock_source *clock_sources[MAX_CLOCK_SOURCES];
 	unsigned int clk_src_count;
 
-	struct audio *audios[MAX_PIPES];
+	struct audio *audios[MAX_AUDIOS];
 	unsigned int audio_count;
 	struct audio_support audio_support;
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
index 959f5b654611..9502478c4a1b 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
@@ -61,8 +61,8 @@ enum dcn_hubbub_page_table_depth {
 };
 
 enum dcn_hubbub_page_table_block_size {
-	DCN_PAGE_TABLE_BLOCK_SIZE_4KB,
-	DCN_PAGE_TABLE_BLOCK_SIZE_64KB
+	DCN_PAGE_TABLE_BLOCK_SIZE_4KB = 0,
+	DCN_PAGE_TABLE_BLOCK_SIZE_64KB = 4,
 };
 
 struct dcn_hubbub_phys_addr_config {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
index 8759ec03aede..f82365e2d03c 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
@@ -34,6 +34,7 @@
  * Data types shared between different Virtual HW blocks
  ******************************************************************************/
 
+#define MAX_AUDIOS 7
 #define MAX_PIPES 6
 #if defined(CONFIG_DRM_AMD_DC_DCN2_0)
 #define MAX_DWB_PIPES	1
diff --git a/drivers/gpu/drm/amd/display/include/dpcd_defs.h b/drivers/gpu/drm/amd/display/include/dpcd_defs.h
index 1c66166d0a94..2c90d1b46c8b 100644
--- a/drivers/gpu/drm/amd/display/include/dpcd_defs.h
+++ b/drivers/gpu/drm/amd/display/include/dpcd_defs.h
@@ -43,7 +43,7 @@ enum dpcd_revision {
 enum dpcd_downstream_port_type {
 	DOWNSTREAM_DP = 0,
 	DOWNSTREAM_VGA,
-	DOWNSTREAM_DVI_HDMI,
+	DOWNSTREAM_DVI_HDMI_DP_PLUS_PLUS,/* DVI, HDMI, DP++ */
 	DOWNSTREAM_NONDDC /* has no EDID (TV,CV) */
 };
 
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index f1565c448de5..c097113c3976 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -137,12 +137,37 @@ int smu_get_dpm_freq_range(struct smu_context *smu, enum smu_clk_type clk_type,
 {
 	int ret = 0, clk_id = 0;
 	uint32_t param = 0;
+	uint32_t clock_limit;
 
 	if (!min && !max)
 		return -EINVAL;
 
-	if (!smu_clk_dpm_is_enabled(smu, clk_type))
+	if (!smu_clk_dpm_is_enabled(smu, clk_type)) {
+		switch (clk_type) {
+		case SMU_MCLK:
+		case SMU_UCLK:
+			clock_limit = smu->smu_table.boot_values.uclk;
+			break;
+		case SMU_GFXCLK:
+		case SMU_SCLK:
+			clock_limit = smu->smu_table.boot_values.gfxclk;
+			break;
+		case SMU_SOCCLK:
+			clock_limit = smu->smu_table.boot_values.socclk;
+			break;
+		default:
+			clock_limit = 0;
+			break;
+		}
+
+		/* clock in Mhz unit */
+		if (min)
+			*min = clock_limit / 100;
+		if (max)
+			*max = clock_limit / 100;
+
 		return 0;
+	}
 
 	mutex_lock(&smu->mutex);
 	clk_id = smu_clk_get_index(smu, clk_type);
@@ -1349,13 +1374,49 @@ static int smu_enable_umd_pstate(void *handle,
 	return 0;
 }
 
+static int smu_default_set_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level)
+{
+	int ret = 0;
+	uint32_t sclk_mask, mclk_mask, soc_mask;
+
+	switch (level) {
+	case AMD_DPM_FORCED_LEVEL_HIGH:
+		ret = smu_force_dpm_limit_value(smu, true);
+		break;
+	case AMD_DPM_FORCED_LEVEL_LOW:
+		ret = smu_force_dpm_limit_value(smu, false);
+		break;
+	case AMD_DPM_FORCED_LEVEL_AUTO:
+	case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
+		ret = smu_unforce_dpm_levels(smu);
+		break;
+	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
+	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
+	case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
+		ret = smu_get_profiling_clk_mask(smu, level,
+						 &sclk_mask,
+						 &mclk_mask,
+						 &soc_mask);
+		if (ret)
+			return ret;
+		smu_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask);
+		smu_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask);
+		smu_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask);
+		break;
+	case AMD_DPM_FORCED_LEVEL_MANUAL:
+	case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT:
+	default:
+		break;
+	}
+	return ret;
+}
+
 int smu_adjust_power_state_dynamic(struct smu_context *smu,
 				   enum amd_dpm_forced_level level,
 				   bool skip_display_settings)
 {
 	int ret = 0;
 	int index = 0;
-	uint32_t sclk_mask, mclk_mask, soc_mask;
 	long workload;
 	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
 
@@ -1386,39 +1447,10 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu,
 	}
 
 	if (smu_dpm_ctx->dpm_level != level) {
-		switch (level) {
-		case AMD_DPM_FORCED_LEVEL_HIGH:
-			ret = smu_force_dpm_limit_value(smu, true);
-			break;
-		case AMD_DPM_FORCED_LEVEL_LOW:
-			ret = smu_force_dpm_limit_value(smu, false);
-			break;
-
-		case AMD_DPM_FORCED_LEVEL_AUTO:
-		case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
-			ret = smu_unforce_dpm_levels(smu);
-			break;
-
-		case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
-		case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
-		case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
-			ret = smu_get_profiling_clk_mask(smu, level,
-							 &sclk_mask,
-							 &mclk_mask,
-							 &soc_mask);
-			if (ret)
-				return ret;
-			smu_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask);
-			smu_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask);
-			smu_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask);
-			break;
-
-		case AMD_DPM_FORCED_LEVEL_MANUAL:
-		case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT:
-		default:
-			break;
+		ret = smu_asic_set_performance_level(smu, level);
+		if (ret) {
+			ret = smu_default_set_performance_level(smu, level);
 		}
-
 		if (!ret)
 			smu_dpm_ctx->dpm_level = level;
 	}
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index 1af992fb0bde..22e46a289a16 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -613,6 +613,7 @@ struct pptable_funcs {
 	int (*tables_init)(struct smu_context *smu, struct smu_table *tables);
 	int (*set_thermal_fan_table)(struct smu_context *smu);
 	int (*get_fan_speed_percent)(struct smu_context *smu, uint32_t *speed);
+	int (*get_fan_speed_rpm)(struct smu_context *smu, uint32_t *speed);
 	int (*set_watermarks_table)(struct smu_context *smu, void *watermarks,
 				    struct dm_pp_wm_sets_with_clock_ranges_soc15 *clock_ranges);
 	int (*get_current_clk_freq_by_table)(struct smu_context *smu,
@@ -621,6 +622,7 @@ struct pptable_funcs {
 	int (*get_thermal_temperature_range)(struct smu_context *smu, struct smu_temperature_range *range);
 	int (*get_uclk_dpm_states)(struct smu_context *smu, uint32_t *clocks_in_khz, uint32_t *num_states);
 	int (*set_default_od_settings)(struct smu_context *smu, bool initialize);
+	int (*set_performance_level)(struct smu_context *smu, enum amd_dpm_forced_level level);
 };
 
 struct smu_funcs
@@ -685,7 +687,6 @@ struct smu_funcs
 	int (*set_watermarks_for_clock_ranges)(struct smu_context *smu,
 					       struct dm_pp_wm_sets_with_clock_ranges_soc15 *clock_ranges);
 	int (*conv_power_profile_to_pplib_workload)(int power_profile);
-	int (*get_current_rpm)(struct smu_context *smu, uint32_t *speed);
 	uint32_t (*get_fan_control_mode)(struct smu_context *smu);
 	int (*set_fan_control_mode)(struct smu_context *smu, uint32_t mode);
 	int (*set_fan_speed_percent)(struct smu_context *smu, uint32_t speed);
@@ -751,8 +752,6 @@ struct smu_funcs
 	((smu)->funcs->init_max_sustainable_clocks ? (smu)->funcs->init_max_sustainable_clocks((smu)) : 0)
 #define smu_set_default_od_settings(smu, initialize) \
 	((smu)->ppt_funcs->set_default_od_settings ? (smu)->ppt_funcs->set_default_od_settings((smu), (initialize)) : 0)
-#define smu_get_current_rpm(smu, speed) \
-	((smu)->funcs->get_current_rpm ? (smu)->funcs->get_current_rpm((smu), (speed)) : 0)
 #define smu_set_fan_speed_rpm(smu, speed) \
 	((smu)->funcs->set_fan_speed_rpm ? (smu)->funcs->set_fan_speed_rpm((smu), (speed)) : 0)
 #define smu_send_smc_msg(smu, msg) \
@@ -841,6 +840,8 @@ struct smu_funcs
 	((smu)->ppt_funcs->get_fan_speed_percent ? (smu)->ppt_funcs->get_fan_speed_percent((smu), (speed)) : 0)
 #define smu_set_fan_speed_percent(smu, speed) \
 	((smu)->funcs->set_fan_speed_percent ? (smu)->funcs->set_fan_speed_percent((smu), (speed)) : 0)
+#define smu_get_fan_speed_rpm(smu, speed) \
+	((smu)->ppt_funcs->get_fan_speed_rpm ? (smu)->ppt_funcs->get_fan_speed_rpm((smu), (speed)) : 0)
 
 #define smu_msg_get_index(smu, msg) \
 	((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_msg_index? (smu)->ppt_funcs->get_smu_msg_index((smu), (msg)) : -EINVAL) : -EINVAL)
@@ -918,6 +919,9 @@ struct smu_funcs
 	((smu)->funcs->baco_get_state? (smu)->funcs->baco_get_state((smu), (state)) : 0)
 #define smu_baco_reset(smu) \
 	((smu)->funcs->baco_reset? (smu)->funcs->baco_reset((smu)) : 0)
+#define smu_asic_set_performance_level(smu, level) \
+	((smu)->ppt_funcs->set_performance_level? (smu)->ppt_funcs->set_performance_level((smu), (level)) : -EINVAL);
+
 
 extern int smu_get_atom_data_table(struct smu_context *smu, uint32_t table,
 				   uint16_t *size, uint8_t *frev, uint8_t *crev,
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
index 2dae0ae0829e..4aaad255a288 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@@ -626,11 +626,26 @@ static int navi10_get_current_clk_freq_by_table(struct smu_context *smu,
 	return ret;
 }
 
+static bool navi10_is_support_fine_grained_dpm(struct smu_context *smu, enum smu_clk_type clk_type)
+{
+	PPTable_t *pptable = smu->smu_table.driver_pptable;
+	DpmDescriptor_t *dpm_desc = NULL;
+	uint32_t clk_index = 0;
+
+	clk_index = smu_clk_get_index(smu, clk_type);
+	dpm_desc = &pptable->DpmDescriptor[clk_index];
+
+	/* 0 - Fine grained DPM, 1 - Discrete DPM */
+	return dpm_desc->SnapToDiscrete == 0 ? true : false;
+}
+
 static int navi10_print_clk_levels(struct smu_context *smu,
 			enum smu_clk_type clk_type, char *buf)
 {
 	int i, size = 0, ret = 0;
 	uint32_t cur_value = 0, value = 0, count = 0;
+	uint32_t freq_values[3] = {0};
+	uint32_t mark_index = 0;
 
 	switch (clk_type) {
 	case SMU_GFXCLK:
@@ -643,22 +658,42 @@ static int navi10_print_clk_levels(struct smu_context *smu,
 		ret = smu_get_current_clk_freq(smu, clk_type, &cur_value);
 		if (ret)
 			return size;
+
 		/* 10KHz -> MHz */
 		cur_value = cur_value / 100;
 
-		size += sprintf(buf, "current clk: %uMhz\n", cur_value);
-
 		ret = smu_get_dpm_level_count(smu, clk_type, &count);
 		if (ret)
 			return size;
 
-		for (i = 0; i < count; i++) {
-			ret = smu_get_dpm_freq_by_index(smu, clk_type, i, &value);
+		if (!navi10_is_support_fine_grained_dpm(smu, clk_type)) {
+			for (i = 0; i < count; i++) {
+				ret = smu_get_dpm_freq_by_index(smu, clk_type, i, &value);
+				if (ret)
+					return size;
+
+				size += sprintf(buf + size, "%d: %uMhz %s\n", i, value,
+						cur_value == value ? "*" : "");
+			}
+		} else {
+			ret = smu_get_dpm_freq_by_index(smu, clk_type, 0, &freq_values[0]);
+			if (ret)
+				return size;
+			ret = smu_get_dpm_freq_by_index(smu, clk_type, count - 1, &freq_values[2]);
 			if (ret)
 				return size;
 
-			size += sprintf(buf + size, "%d: %uMhz %s\n", i, value,
-					cur_value == value ? "*" : "");
+			freq_values[1] = cur_value;
+			mark_index = cur_value == freq_values[0] ? 0 :
+				     cur_value == freq_values[2] ? 2 : 1;
+			if (mark_index != 1)
+				freq_values[1] = (freq_values[0] + freq_values[2]) / 2;
+
+			for (i = 0; i < 3; i++) {
+				size += sprintf(buf + size, "%d: %uMhz %s\n", i, freq_values[i],
+						i == mark_index ? "*" : "");
+			}
+
 		}
 		break;
 	default:
@@ -919,12 +954,13 @@ static bool navi10_is_dpm_running(struct smu_context *smu)
 	return !!(feature_enabled & SMC_DPM_FEATURE);
 }
 
-static int navi10_get_fan_speed(struct smu_context *smu, uint16_t *value)
+static int navi10_get_fan_speed_rpm(struct smu_context *smu,
+				    uint32_t *speed)
 {
 	SmuMetrics_t metrics;
 	int ret = 0;
 
-	if (!value)
+	if (!speed)
 		return -EINVAL;
 
 	memset(&metrics, 0, sizeof(metrics));
@@ -934,7 +970,7 @@ static int navi10_get_fan_speed(struct smu_context *smu, uint16_t *value)
 	if (ret)
 		return ret;
 
-	*value = metrics.CurrFanSpeed;
+	*speed = metrics.CurrFanSpeed;
 
 	return ret;
 }
@@ -944,10 +980,10 @@ static int navi10_get_fan_speed_percent(struct smu_context *smu,
 {
 	int ret = 0;
 	uint32_t percent = 0;
-	uint16_t current_rpm;
+	uint32_t current_rpm;
 	PPTable_t *pptable = smu->smu_table.driver_pptable;
 
-	ret = navi10_get_fan_speed(smu, &current_rpm);
+	ret = navi10_get_fan_speed_rpm(smu, &current_rpm);
 	if (ret)
 		return ret;
 
@@ -1530,6 +1566,60 @@ static int navi10_set_ppfeature_status(struct smu_context *smu,
 	return 0;
 }
 
+static int navi10_set_peak_clock_by_device(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+	int ret = 0;
+	uint32_t sclk_freq = 0, uclk_freq = 0;
+	uint32_t uclk_level = 0;
+
+	switch (adev->rev_id) {
+	case 0xf0: /* XTX */
+	case 0xc0:
+		sclk_freq = NAVI10_PEAK_SCLK_XTX;
+		break;
+	case 0xf1: /* XT */
+	case 0xc1:
+		sclk_freq = NAVI10_PEAK_SCLK_XT;
+		break;
+	default: /* XL */
+		sclk_freq = NAVI10_PEAK_SCLK_XL;
+		break;
+	}
+
+	ret = smu_get_dpm_level_count(smu, SMU_UCLK, &uclk_level);
+	if (ret)
+		return ret;
+	ret = smu_get_dpm_freq_by_index(smu, SMU_UCLK, uclk_level - 1, &uclk_freq);
+	if (ret)
+		return ret;
+
+	ret = smu_set_soft_freq_range(smu, SMU_SCLK, sclk_freq, sclk_freq);
+	if (ret)
+		return ret;
+	ret = smu_set_soft_freq_range(smu, SMU_UCLK, uclk_freq, uclk_freq);
+	if (ret)
+		return ret;
+
+	return ret;
+}
+
+static int navi10_set_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level)
+{
+	int ret = 0;
+
+	switch (level) {
+	case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
+		ret = navi10_set_peak_clock_by_device(smu);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
 static const struct pptable_funcs navi10_ppt_funcs = {
 	.tables_init = navi10_tables_init,
 	.alloc_dpm_context = navi10_allocate_dpm_context,
@@ -1557,6 +1647,7 @@ static const struct pptable_funcs navi10_ppt_funcs = {
 	.unforce_dpm_levels = navi10_unforce_dpm_levels,
 	.is_dpm_running = navi10_is_dpm_running,
 	.get_fan_speed_percent = navi10_get_fan_speed_percent,
+	.get_fan_speed_rpm = navi10_get_fan_speed_rpm,
 	.get_power_profile_mode = navi10_get_power_profile_mode,
 	.set_power_profile_mode = navi10_set_power_profile_mode,
 	.get_profiling_clk_mask = navi10_get_profiling_clk_mask,
@@ -1565,6 +1656,7 @@ static const struct pptable_funcs navi10_ppt_funcs = {
 	.get_uclk_dpm_states = navi10_get_uclk_dpm_states,
 	.get_ppfeature_status = navi10_get_ppfeature_status,
 	.set_ppfeature_status = navi10_set_ppfeature_status,
+	.set_performance_level = navi10_set_performance_level,
 };
 
 void navi10_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.h b/drivers/gpu/drm/amd/powerplay/navi10_ppt.h
index 957288e22f47..620ff17c2fef 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.h
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.h
@@ -23,6 +23,10 @@
 #ifndef __NAVI10_PPT_H__
 #define __NAVI10_PPT_H__
 
+#define NAVI10_PEAK_SCLK_XTX		(1830)
+#define NAVI10_PEAK_SCLK_XT  		(1755)
+#define NAVI10_PEAK_SCLK_XL  		(1625)
+
 extern void navi10_set_ppt_funcs(struct smu_context *smu);
 
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index 95c7c4dae523..caca9091bfcc 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -1371,23 +1371,6 @@ static int smu_v11_0_gfx_off_control(struct smu_context *smu, bool enable)
 	return ret;
 }
 
-static int smu_v11_0_get_current_rpm(struct smu_context *smu,
-				     uint32_t *current_rpm)
-{
-	int ret;
-
-	ret = smu_send_smc_msg(smu, SMU_MSG_GetCurrentRpm);
-
-	if (ret) {
-		pr_err("Attempt to get current RPM from SMC Failed!\n");
-		return ret;
-	}
-
-	smu_read_smc_arg(smu, current_rpm);
-
-	return 0;
-}
-
 static uint32_t
 smu_v11_0_get_fan_control_mode(struct smu_context *smu)
 {
@@ -1773,7 +1756,6 @@ static const struct smu_funcs smu_v11_0_funcs = {
 	.set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
 	.display_clock_voltage_request = smu_v11_0_display_clock_voltage_request,
 	.set_watermarks_for_clock_ranges = smu_v11_0_set_watermarks_for_clock_ranges,
-	.get_current_rpm = smu_v11_0_get_current_rpm,
 	.get_fan_control_mode = smu_v11_0_get_fan_control_mode,
 	.set_fan_control_mode = smu_v11_0_set_fan_control_mode,
 	.set_fan_speed_percent = smu_v11_0_set_fan_speed_percent,
diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
index bb9bb09cfc7a..dc139a6feeb1 100644
--- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
@@ -3015,6 +3015,23 @@ static int vega20_set_thermal_fan_table(struct smu_context *smu)
 	return ret;
 }
 
+static int vega20_get_fan_speed_rpm(struct smu_context *smu,
+				    uint32_t *speed)
+{
+	int ret;
+
+	ret = smu_send_smc_msg(smu, SMU_MSG_GetCurrentRpm);
+
+	if (ret) {
+		pr_err("Attempt to get current RPM from SMC Failed!\n");
+		return ret;
+	}
+
+	smu_read_smc_arg(smu, speed);
+
+	return 0;
+}
+
 static int vega20_get_fan_speed_percent(struct smu_context *smu,
 					uint32_t *speed)
 {
@@ -3022,7 +3039,7 @@ static int vega20_get_fan_speed_percent(struct smu_context *smu,
 	uint32_t current_rpm = 0, percent = 0;
 	PPTable_t *pptable = smu->smu_table.driver_pptable;
 
-	ret = smu_get_current_rpm(smu, &current_rpm);
+	ret = vega20_get_fan_speed_rpm(smu, &current_rpm);
 	if (ret)
 		return ret;
 
@@ -3293,6 +3310,7 @@ static const struct pptable_funcs vega20_ppt_funcs = {
 	.is_dpm_running = vega20_is_dpm_running,
 	.set_thermal_fan_table = vega20_set_thermal_fan_table,
 	.get_fan_speed_percent = vega20_get_fan_speed_percent,
+	.get_fan_speed_rpm = vega20_get_fan_speed_rpm,
 	.set_watermarks_table = vega20_set_watermarks_table,
 	.get_thermal_temperature_range = vega20_get_thermal_temperature_range
 };
diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c
index 56d36779d213..c8922b7cac09 100644
--- a/drivers/gpu/drm/drm_client_modeset.c
+++ b/drivers/gpu/drm/drm_client_modeset.c
@@ -859,7 +859,7 @@ bool drm_client_rotation(struct drm_mode_set *modeset, unsigned int *rotation)
 	 * simple XOR between the two handle the addition nicely.
 	 */
 	cmdline = &connector->cmdline_mode;
-	if (cmdline->specified) {
+	if (cmdline->specified && cmdline->rotation_reflection) {
 		unsigned int cmdline_rest, panel_rest;
 		unsigned int cmdline_rot, panel_rot;
 		unsigned int sum_rot, sum_rest;
diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index 0b72468e8131..57564318ceea 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -835,7 +835,7 @@ static int atomic_remove_fb(struct drm_framebuffer *fb)
 	struct drm_device *dev = fb->dev;
 	struct drm_atomic_state *state;
 	struct drm_plane *plane;
-	struct drm_connector *conn;
+	struct drm_connector *conn __maybe_unused;
 	struct drm_connector_state *conn_state;
 	int i, ret;
 	unsigned plane_mask;
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 74a5739df506..80fcd5dc1558 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -1686,7 +1686,7 @@ static int drm_mode_parse_cmdline_options(char *str, size_t len,
  *
  * Additionals options can be provided following the mode, using a comma to
  * separate each option. Valid options can be found in
- * Documentation/fb/modedb.txt.
+ * Documentation/fb/modedb.rst.
  *
  * The intermediate drm_cmdline_mode structure is required to store additional
  * options from the command line modline like the force-enable/disable flag.
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 0e2f74163a16..0aa8a12c9952 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -2221,8 +2221,6 @@ int dpu_encoder_setup(struct drm_device *dev, struct drm_encoder *enc,
 	if (ret)
 		goto fail;
 
-	spin_lock_init(&dpu_enc->enc_spinlock);
-
 	atomic_set(&dpu_enc->frame_done_timeout_ms, 0);
 	timer_setup(&dpu_enc->frame_done_timer,
 			dpu_encoder_frame_done_timeout, 0);
@@ -2276,6 +2274,7 @@ struct drm_encoder *dpu_encoder_init(struct drm_device *dev,
 
 	drm_encoder_helper_add(&dpu_enc->base, &dpu_encoder_helper_funcs);
 
+	spin_lock_init(&dpu_enc->enc_spinlock);
 	dpu_enc->enabled = false;
 
 	return &dpu_enc->base;
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index ab64ab470de7..c226156f2dea 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -619,7 +619,7 @@ static int context_init(struct drm_device *dev, struct drm_file *file)
 
 	msm_submitqueue_init(dev, ctx);
 
-	ctx->aspace = priv->gpu->aspace;
+	ctx->aspace = priv->gpu ? priv->gpu->aspace : NULL;
 	file->driver_priv = ctx;
 
 	return 0;
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 8b78554cfde3..c2114c748c2f 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -97,7 +97,7 @@ static struct page **get_pages(struct drm_gem_object *obj)
 		 * because display controller, GPU, etc. are not coherent:
 		 */
 		if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
-			dma_map_sg(dev->dev, msm_obj->sgt->sgl,
+			dma_sync_sg_for_device(dev->dev, msm_obj->sgt->sgl,
 					msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
 	}
 
@@ -127,7 +127,7 @@ static void put_pages(struct drm_gem_object *obj)
 			 * GPU, etc. are not coherent:
 			 */
 			if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
-				dma_unmap_sg(obj->dev->dev, msm_obj->sgt->sgl,
+				dma_sync_sg_for_cpu(obj->dev->dev, msm_obj->sgt->sgl,
 					     msm_obj->sgt->nents,
 					     DMA_BIDIRECTIONAL);
 
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index d594f7520b7b..7d78e6deac89 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -285,9 +285,13 @@ static int ttm_set_pages_caching(struct dma_pool *pool,
 
 static void __ttm_dma_free_page(struct dma_pool *pool, struct dma_page *d_page)
 {
+	unsigned long attrs = 0;
 	dma_addr_t dma = d_page->dma;
 	d_page->vaddr &= ~VADDR_FLAG_HUGE_POOL;
-	dma_free_coherent(pool->dev, pool->size, (void *)d_page->vaddr, dma);
+	if (pool->type & IS_HUGE)
+		attrs = DMA_ATTR_NO_WARN;
+
+	dma_free_attrs(pool->dev, pool->size, (void *)d_page->vaddr, dma, attrs);
 
 	kfree(d_page);
 	d_page = NULL;
diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c
index e7dff5febe16..d42bc0883a32 100644
--- a/drivers/hwmon/nct6775.c
+++ b/drivers/hwmon/nct6775.c
@@ -852,7 +852,7 @@ static const u16 NCT6106_REG_TARGET[] = { 0x111, 0x121, 0x131 };
 static const u16 NCT6106_REG_WEIGHT_TEMP_SEL[] = { 0x168, 0x178, 0x188 };
 static const u16 NCT6106_REG_WEIGHT_TEMP_STEP[] = { 0x169, 0x179, 0x189 };
 static const u16 NCT6106_REG_WEIGHT_TEMP_STEP_TOL[] = { 0x16a, 0x17a, 0x18a };
-static const u16 NCT6106_REG_WEIGHT_DUTY_STEP[] = { 0x16b, 0x17b, 0x17c };
+static const u16 NCT6106_REG_WEIGHT_DUTY_STEP[] = { 0x16b, 0x17b, 0x18b };
 static const u16 NCT6106_REG_WEIGHT_TEMP_BASE[] = { 0x16c, 0x17c, 0x18c };
 static const u16 NCT6106_REG_WEIGHT_DUTY_BASE[] = { 0x16d, 0x17d, 0x18d };
 
@@ -3764,6 +3764,7 @@ static int nct6775_probe(struct platform_device *pdev)
 		data->REG_FAN_TIME[0] = NCT6106_REG_FAN_STOP_TIME;
 		data->REG_FAN_TIME[1] = NCT6106_REG_FAN_STEP_UP_TIME;
 		data->REG_FAN_TIME[2] = NCT6106_REG_FAN_STEP_DOWN_TIME;
+		data->REG_TOLERANCE_H = NCT6106_REG_TOLERANCE_H;
 		data->REG_PWM[0] = NCT6106_REG_PWM;
 		data->REG_PWM[1] = NCT6106_REG_FAN_START_OUTPUT;
 		data->REG_PWM[2] = NCT6106_REG_FAN_STOP_OUTPUT;
diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c
index a7d2b16dd702..30e18eb60da7 100644
--- a/drivers/hwmon/occ/common.c
+++ b/drivers/hwmon/occ/common.c
@@ -408,8 +408,10 @@ static ssize_t occ_show_power_1(struct device *dev,
 
 static u64 occ_get_powr_avg(u64 *accum, u32 *samples)
 {
-	return div64_u64(get_unaligned_be64(accum) * 1000000ULL,
-			 get_unaligned_be32(samples));
+	u64 divisor = get_unaligned_be32(samples);
+
+	return (divisor == 0) ? 0 :
+		div64_u64(get_unaligned_be64(accum) * 1000000ULL, divisor);
 }
 
 static ssize_t occ_show_power_2(struct device *dev,
diff --git a/drivers/i2c/busses/i2c-nvidia-gpu.c b/drivers/i2c/busses/i2c-nvidia-gpu.c
index cfc76b5de726..5a1235fd86bb 100644
--- a/drivers/i2c/busses/i2c-nvidia-gpu.c
+++ b/drivers/i2c/busses/i2c-nvidia-gpu.c
@@ -364,7 +364,7 @@ static void gpu_i2c_remove(struct pci_dev *pdev)
 /*
  * We need gpu_i2c_suspend() even if it is stub, for runtime pm to work
  * correctly. Without it, lspci shows runtime pm status as "D0" for the card.
- * Documentation/power/pci.txt also insists for driver to provide this.
+ * Documentation/power/pci.rst also insists for driver to provide this.
  */
 static __maybe_unused int gpu_i2c_suspend(struct device *dev)
 {
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index c7a3d75fb308..2e72fc5af157 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -611,6 +611,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
 	struct Scsi_Host *shost;
 	struct iser_conn *iser_conn = NULL;
 	struct ib_conn *ib_conn;
+	struct ib_device *ib_dev;
 	u32 max_fr_sectors;
 
 	shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
@@ -641,16 +642,19 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
 		}
 
 		ib_conn = &iser_conn->ib_conn;
+		ib_dev = ib_conn->device->ib_device;
 		if (ib_conn->pi_support) {
-			u32 sig_caps = ib_conn->device->ib_device->attrs.sig_prot_cap;
+			u32 sig_caps = ib_dev->attrs.sig_prot_cap;
 
 			scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
 			scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
 						   SHOST_DIX_GUARD_CRC);
 		}
 
-		if (iscsi_host_add(shost,
-				   ib_conn->device->ib_device->dev.parent)) {
+		if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
+			shost->virt_boundary_mask = ~MASK_4K;
+
+		if (iscsi_host_add(shost, ib_dev->dev.parent)) {
 			mutex_unlock(&iser_conn->state_mutex);
 			goto free_host;
 		}
@@ -956,30 +960,6 @@ static umode_t iser_attr_is_visible(int param_type, int param)
 	return 0;
 }
 
-static int iscsi_iser_slave_alloc(struct scsi_device *sdev)
-{
-	struct iscsi_session *session;
-	struct iser_conn *iser_conn;
-	struct ib_device *ib_dev;
-
-	mutex_lock(&unbind_iser_conn_mutex);
-
-	session = starget_to_session(scsi_target(sdev))->dd_data;
-	iser_conn = session->leadconn->dd_data;
-	if (!iser_conn) {
-		mutex_unlock(&unbind_iser_conn_mutex);
-		return -ENOTCONN;
-	}
-	ib_dev = iser_conn->ib_conn.device->ib_device;
-
-	if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
-		blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K);
-
-	mutex_unlock(&unbind_iser_conn_mutex);
-
-	return 0;
-}
-
 static struct scsi_host_template iscsi_iser_sht = {
 	.module                 = THIS_MODULE,
 	.name                   = "iSCSI Initiator over iSER",
@@ -992,7 +972,6 @@ static struct scsi_host_template iscsi_iser_sht = {
 	.eh_device_reset_handler= iscsi_eh_device_reset,
 	.eh_target_reset_handler = iscsi_eh_recover_target,
 	.target_alloc		= iscsi_target_alloc,
-	.slave_alloc            = iscsi_iser_slave_alloc,
 	.proc_name              = "iscsi_iser",
 	.this_id                = -1,
 	.track_queue_depth	= 1,
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index c7bd96edce80..b5960351bec0 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -3046,20 +3046,6 @@ static int srp_target_alloc(struct scsi_target *starget)
 	return 0;
 }
 
-static int srp_slave_alloc(struct scsi_device *sdev)
-{
-	struct Scsi_Host *shost = sdev->host;
-	struct srp_target_port *target = host_to_target(shost);
-	struct srp_device *srp_dev = target->srp_host->srp_dev;
-	struct ib_device *ibdev = srp_dev->dev;
-
-	if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
-		blk_queue_virt_boundary(sdev->request_queue,
-					~srp_dev->mr_page_mask);
-
-	return 0;
-}
-
 static int srp_slave_configure(struct scsi_device *sdev)
 {
 	struct Scsi_Host *shost = sdev->host;
@@ -3262,7 +3248,6 @@ static struct scsi_host_template srp_template = {
 	.name				= "InfiniBand SRP initiator",
 	.proc_name			= DRV_NAME,
 	.target_alloc			= srp_target_alloc,
-	.slave_alloc			= srp_slave_alloc,
 	.slave_configure		= srp_slave_configure,
 	.info				= srp_target_info,
 	.queuecommand			= srp_queuecommand,
@@ -3806,6 +3791,9 @@ static ssize_t srp_create_target(struct device *dev,
 	target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
 	target_host->max_segment_size = ib_dma_max_seg_size(ibdev);
 
+	if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
+		target_host->virt_boundary_mask = ~srp_dev->mr_page_mask;
+
 	target = host_to_target(target_host);
 
 	target->net		= kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
diff --git a/drivers/input/joystick/iforce/iforce-ff.c b/drivers/input/joystick/iforce/iforce-ff.c
index 4cadebd8b9c4..95c0348843e6 100644
--- a/drivers/input/joystick/iforce/iforce-ff.c
+++ b/drivers/input/joystick/iforce/iforce-ff.c
@@ -6,9 +6,6 @@
  *  USB/RS232 I-Force joysticks and wheels.
  */
 
-/*
- */
-
 #include "iforce.h"
 
 /*
diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c
index 9a5f90da06ec..b2a68bc9f0b4 100644
--- a/drivers/input/joystick/iforce/iforce-main.c
+++ b/drivers/input/joystick/iforce/iforce-main.c
@@ -6,9 +6,6 @@
  *  USB/RS232 I-Force joysticks and wheels.
  */
 
-/*
- */
-
 #include <asm/unaligned.h>
 #include "iforce.h"
 
diff --git a/drivers/input/joystick/iforce/iforce-packets.c b/drivers/input/joystick/iforce/iforce-packets.c
index b313e38b2c3a..763642c8cee9 100644
--- a/drivers/input/joystick/iforce/iforce-packets.c
+++ b/drivers/input/joystick/iforce/iforce-packets.c
@@ -6,9 +6,6 @@
  *  USB/RS232 I-Force joysticks and wheels.
  */
 
-/*
- */
-
 #include <asm/unaligned.h>
 #include "iforce.h"
 
diff --git a/drivers/input/joystick/iforce/iforce-serio.c b/drivers/input/joystick/iforce/iforce-serio.c
index bbe31e0b759f..f95a81b9fac7 100644
--- a/drivers/input/joystick/iforce/iforce-serio.c
+++ b/drivers/input/joystick/iforce/iforce-serio.c
@@ -6,9 +6,6 @@
  *  USB/RS232 I-Force joysticks and wheels.
  */
 
-/*
- */
-
 #include <linux/serio.h>
 #include "iforce.h"
 
diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c
index ade376bfb79f..29abfeeef9a5 100644
--- a/drivers/input/joystick/iforce/iforce-usb.c
+++ b/drivers/input/joystick/iforce/iforce-usb.c
@@ -6,9 +6,6 @@
  *  USB/RS232 I-Force joysticks and wheels.
  */
 
-/*
- */
-
 #include <linux/usb.h>
 #include "iforce.h"
 
diff --git a/drivers/input/joystick/iforce/iforce.h b/drivers/input/joystick/iforce/iforce.h
index 9cfa460466aa..6aa761ebbdf7 100644
--- a/drivers/input/joystick/iforce/iforce.h
+++ b/drivers/input/joystick/iforce/iforce.h
@@ -6,9 +6,6 @@
  *  USB/RS232 I-Force joysticks and wheels.
  */
 
-/*
- */
-
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/input.h>
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 7c4f19dab34f..8e9c3ea9d5e7 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -71,6 +71,22 @@ config KEYBOARD_AMIGA
 config ATARI_KBD_CORE
 	bool
 
+config KEYBOARD_APPLESPI
+	tristate "Apple SPI keyboard and trackpad"
+	depends on ACPI && EFI
+	depends on SPI
+	depends on X86 || COMPILE_TEST
+	help
+	  Say Y here if you are running Linux on any Apple MacBook8,1 or later,
+	  or any MacBookPro13,* or MacBookPro14,*.
+
+	  You will also need to enable appropriate SPI master controllers:
+	  spi_pxa2xx_platform and spi_pxa2xx_pci for MacBook8,1, and
+	  spi_pxa2xx_platform and intel_lpss_pci for the rest.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called applespi.
+
 config KEYBOARD_ATARI
 	tristate "Atari keyboard"
 	depends on ATARI
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index f0291ca39f62..06a0af6efeae 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_KEYBOARD_ADP5520)		+= adp5520-keys.o
 obj-$(CONFIG_KEYBOARD_ADP5588)		+= adp5588-keys.o
 obj-$(CONFIG_KEYBOARD_ADP5589)		+= adp5589-keys.o
 obj-$(CONFIG_KEYBOARD_AMIGA)		+= amikbd.o
+obj-$(CONFIG_KEYBOARD_APPLESPI)		+= applespi.o
 obj-$(CONFIG_KEYBOARD_ATARI)		+= atakbd.o
 obj-$(CONFIG_KEYBOARD_ATKBD)		+= atkbd.o
 obj-$(CONFIG_KEYBOARD_BCM)		+= bcm-keypad.o
diff --git a/drivers/input/keyboard/adp5589-keys.c b/drivers/input/keyboard/adp5589-keys.c
index 4c05c70a8cf3..4f96a4a99e5b 100644
--- a/drivers/input/keyboard/adp5589-keys.c
+++ b/drivers/input/keyboard/adp5589-keys.c
@@ -505,6 +505,7 @@ static int adp5589_gpio_add(struct adp5589_kpad *kpad)
 	if (!gpio_data)
 		return 0;
 
+	kpad->gc.parent = dev;
 	kpad->gc.ngpio = adp5589_build_gpiomap(kpad, pdata);
 	if (kpad->gc.ngpio == 0) {
 		dev_info(dev, "No unused gpios left to export\n");
diff --git a/drivers/input/keyboard/applespi.c b/drivers/input/keyboard/applespi.c
new file mode 100644
index 000000000000..548737e7aeda
--- /dev/null
+++ b/drivers/input/keyboard/applespi.c
@@ -0,0 +1,1977 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MacBook (Pro) SPI keyboard and touchpad driver
+ *
+ * Copyright (c) 2015-2018 Federico Lorenzi
+ * Copyright (c) 2017-2018 Ronald Tschalär
+ */
+
+/*
+ * The keyboard and touchpad controller on the MacBookAir6, MacBookPro12,
+ * MacBook8 and newer can be driven either by USB or SPI. However the USB
+ * pins are only connected on the MacBookAir6 and 7 and the MacBookPro12.
+ * All others need this driver. The interface is selected using ACPI methods:
+ *
+ * * UIEN ("USB Interface Enable"): If invoked with argument 1, disables SPI
+ *   and enables USB. If invoked with argument 0, disables USB.
+ * * UIST ("USB Interface Status"): Returns 1 if USB is enabled, 0 otherwise.
+ * * SIEN ("SPI Interface Enable"): If invoked with argument 1, disables USB
+ *   and enables SPI. If invoked with argument 0, disables SPI.
+ * * SIST ("SPI Interface Status"): Returns 1 if SPI is enabled, 0 otherwise.
+ * * ISOL: Resets the four GPIO pins used for SPI. Intended to be invoked with
+ *   argument 1, then once more with argument 0.
+ *
+ * UIEN and UIST are only provided on models where the USB pins are connected.
+ *
+ * SPI-based Protocol
+ * ------------------
+ *
+ * The device and driver exchange messages (struct message); each message is
+ * encapsulated in one or more packets (struct spi_packet). There are two types
+ * of exchanges: reads, and writes. A read is signaled by a GPE, upon which one
+ * message can be read from the device. A write exchange consists of writing a
+ * command message, immediately reading a short status packet, and then, upon
+ * receiving a GPE, reading the response message. Write exchanges cannot be
+ * interleaved, i.e. a new write exchange must not be started till the previous
+ * write exchange is complete. Whether a received message is part of a read or
+ * write exchange is indicated in the encapsulating packet's flags field.
+ *
+ * A single message may be too large to fit in a single packet (which has a
+ * fixed, 256-byte size). In that case it will be split over multiple,
+ * consecutive packets.
+ */
+
+#include <linux/acpi.h>
+#include <linux/crc16.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/efi.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/spi/spi.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include <asm/barrier.h>
+#include <asm/unaligned.h>
+
+#define CREATE_TRACE_POINTS
+#include "applespi.h"
+#include "applespi_trace.h"
+
+#define APPLESPI_PACKET_SIZE	256
+#define APPLESPI_STATUS_SIZE	4
+
+#define PACKET_TYPE_READ	0x20
+#define PACKET_TYPE_WRITE	0x40
+#define PACKET_DEV_KEYB		0x01
+#define PACKET_DEV_TPAD		0x02
+#define PACKET_DEV_INFO		0xd0
+
+#define MAX_ROLLOVER		6
+
+#define MAX_FINGERS		11
+#define MAX_FINGER_ORIENTATION	16384
+#define MAX_PKTS_PER_MSG	2
+
+#define KBD_BL_LEVEL_MIN	32U
+#define KBD_BL_LEVEL_MAX	255U
+#define KBD_BL_LEVEL_SCALE	1000000U
+#define KBD_BL_LEVEL_ADJ	\
+	((KBD_BL_LEVEL_MAX - KBD_BL_LEVEL_MIN) * KBD_BL_LEVEL_SCALE / 255U)
+
+#define EFI_BL_LEVEL_NAME	L"KeyboardBacklightLevel"
+#define EFI_BL_LEVEL_GUID	EFI_GUID(0xa076d2af, 0x9678, 0x4386, 0x8b, 0x58, 0x1f, 0xc8, 0xef, 0x04, 0x16, 0x19)
+
+#define APPLE_FLAG_FKEY		0x01
+
+#define SPI_RW_CHG_DELAY_US	100	/* from experimentation, in µs */
+
+#define SYNAPTICS_VENDOR_ID	0x06cb
+
+static unsigned int fnmode = 1;
+module_param(fnmode, uint, 0644);
+MODULE_PARM_DESC(fnmode, "Mode of Fn key on Apple keyboards (0 = disabled, [1] = fkeyslast, 2 = fkeysfirst)");
+
+static unsigned int fnremap;
+module_param(fnremap, uint, 0644);
+MODULE_PARM_DESC(fnremap, "Remap Fn key ([0] = no-remap; 1 = left-ctrl, 2 = left-shift, 3 = left-alt, 4 = left-meta, 6 = right-shift, 7 = right-alt, 8 = right-meta)");
+
+static bool iso_layout;
+module_param(iso_layout, bool, 0644);
+MODULE_PARM_DESC(iso_layout, "Enable/Disable hardcoded ISO-layout of the keyboard. ([0] = disabled, 1 = enabled)");
+
+static char touchpad_dimensions[40];
+module_param_string(touchpad_dimensions, touchpad_dimensions,
+		    sizeof(touchpad_dimensions), 0444);
+MODULE_PARM_DESC(touchpad_dimensions, "The pixel dimensions of the touchpad, as XxY+W+H .");
+
+/**
+ * struct keyboard_protocol - keyboard message.
+ * message.type = 0x0110, message.length = 0x000a
+ *
+ * @unknown1:		unknown
+ * @modifiers:		bit-set of modifier/control keys pressed
+ * @unknown2:		unknown
+ * @keys_pressed:	the (non-modifier) keys currently pressed
+ * @fn_pressed:		whether the fn key is currently pressed
+ * @crc16:		crc over the whole message struct (message header +
+ *			this struct) minus this @crc16 field
+ */
+struct keyboard_protocol {
+	u8			unknown1;
+	u8			modifiers;
+	u8			unknown2;
+	u8			keys_pressed[MAX_ROLLOVER];
+	u8			fn_pressed;
+	__le16			crc16;
+};
+
+/**
+ * struct tp_finger - single trackpad finger structure, le16-aligned
+ *
+ * @origin:		zero when switching track finger
+ * @abs_x:		absolute x coodinate
+ * @abs_y:		absolute y coodinate
+ * @rel_x:		relative x coodinate
+ * @rel_y:		relative y coodinate
+ * @tool_major:		tool area, major axis
+ * @tool_minor:		tool area, minor axis
+ * @orientation:	16384 when point, else 15 bit angle
+ * @touch_major:	touch area, major axis
+ * @touch_minor:	touch area, minor axis
+ * @unused:		zeros
+ * @pressure:		pressure on forcetouch touchpad
+ * @multi:		one finger: varies, more fingers: constant
+ * @crc16:		on last finger: crc over the whole message struct
+ *			(i.e. message header + this struct) minus the last
+ *			@crc16 field; unknown on all other fingers.
+ */
+struct tp_finger {
+	__le16 origin;
+	__le16 abs_x;
+	__le16 abs_y;
+	__le16 rel_x;
+	__le16 rel_y;
+	__le16 tool_major;
+	__le16 tool_minor;
+	__le16 orientation;
+	__le16 touch_major;
+	__le16 touch_minor;
+	__le16 unused[2];
+	__le16 pressure;
+	__le16 multi;
+	__le16 crc16;
+};
+
+/**
+ * struct touchpad_protocol - touchpad message.
+ * message.type = 0x0210
+ *
+ * @unknown1:		unknown
+ * @clicked:		1 if a button-click was detected, 0 otherwise
+ * @unknown2:		unknown
+ * @number_of_fingers:	the number of fingers being reported in @fingers
+ * @clicked2:		same as @clicked
+ * @unknown3:		unknown
+ * @fingers:		the data for each finger
+ */
+struct touchpad_protocol {
+	u8			unknown1[1];
+	u8			clicked;
+	u8			unknown2[28];
+	u8			number_of_fingers;
+	u8			clicked2;
+	u8			unknown3[16];
+	struct tp_finger	fingers[0];
+};
+
+/**
+ * struct command_protocol_tp_info - get touchpad info.
+ * message.type = 0x1020, message.length = 0x0000
+ *
+ * @crc16:		crc over the whole message struct (message header +
+ *			this struct) minus this @crc16 field
+ */
+struct command_protocol_tp_info {
+	__le16			crc16;
+};
+
+/**
+ * struct touchpad_info - touchpad info response.
+ * message.type = 0x1020, message.length = 0x006e
+ *
+ * @unknown1:		unknown
+ * @model_flags:	flags (vary by model number, but significance otherwise
+ *			unknown)
+ * @model_no:		the touchpad model number
+ * @unknown2:		unknown
+ * @crc16:		crc over the whole message struct (message header +
+ *			this struct) minus this @crc16 field
+ */
+struct touchpad_info_protocol {
+	u8			unknown1[105];
+	u8			model_flags;
+	u8			model_no;
+	u8			unknown2[3];
+	__le16			crc16;
+};
+
+/**
+ * struct command_protocol_mt_init - initialize multitouch.
+ * message.type = 0x0252, message.length = 0x0002
+ *
+ * @cmd:		value: 0x0102
+ * @crc16:		crc over the whole message struct (message header +
+ *			this struct) minus this @crc16 field
+ */
+struct command_protocol_mt_init {
+	__le16			cmd;
+	__le16			crc16;
+};
+
+/**
+ * struct command_protocol_capsl - toggle caps-lock led
+ * message.type = 0x0151, message.length = 0x0002
+ *
+ * @unknown:		value: 0x01 (length?)
+ * @led:		0 off, 2 on
+ * @crc16:		crc over the whole message struct (message header +
+ *			this struct) minus this @crc16 field
+ */
+struct command_protocol_capsl {
+	u8			unknown;
+	u8			led;
+	__le16			crc16;
+};
+
+/**
+ * struct command_protocol_bl - set keyboard backlight brightness
+ * message.type = 0xB051, message.length = 0x0006
+ *
+ * @const1:		value: 0x01B0
+ * @level:		the brightness level to set
+ * @const2:		value: 0x0001 (backlight off), 0x01F4 (backlight on)
+ * @crc16:		crc over the whole message struct (message header +
+ *			this struct) minus this @crc16 field
+ */
+struct command_protocol_bl {
+	__le16			const1;
+	__le16			level;
+	__le16			const2;
+	__le16			crc16;
+};
+
+/**
+ * struct message - a complete spi message.
+ *
+ * Each message begins with fixed header, followed by a message-type specific
+ * payload, and ends with a 16-bit crc. Because of the varying lengths of the
+ * payload, the crc is defined at the end of each payload struct, rather than
+ * in this struct.
+ *
+ * @type:	the message type
+ * @zero:	always 0
+ * @counter:	incremented on each message, rolls over after 255; there is a
+ *		separate counter for each message type.
+ * @rsp_buf_len:response buffer length (the exact nature of this field is quite
+ *		speculative). On a request/write this is often the same as
+ *		@length, though in some cases it has been seen to be much larger
+ *		(e.g. 0x400); on a response/read this the same as on the
+ *		request; for reads that are not responses it is 0.
+ * @length:	length of the remainder of the data in the whole message
+ *		structure (after re-assembly in case of being split over
+ *		multiple spi-packets), minus the trailing crc. The total size
+ *		of the message struct is therefore @length + 10.
+ */
+struct message {
+	__le16		type;
+	u8		zero;
+	u8		counter;
+	__le16		rsp_buf_len;
+	__le16		length;
+	union {
+		struct keyboard_protocol	keyboard;
+		struct touchpad_protocol	touchpad;
+		struct touchpad_info_protocol	tp_info;
+		struct command_protocol_tp_info	tp_info_command;
+		struct command_protocol_mt_init	init_mt_command;
+		struct command_protocol_capsl	capsl_command;
+		struct command_protocol_bl	bl_command;
+		u8				data[0];
+	};
+};
+
+/* type + zero + counter + rsp_buf_len + length */
+#define MSG_HEADER_SIZE		8
+
+/**
+ * struct spi_packet - a complete spi packet; always 256 bytes. This carries
+ * the (parts of the) message in the data. But note that this does not
+ * necessarily contain a complete message, as in some cases (e.g. many
+ * fingers pressed) the message is split over multiple packets (see the
+ * @offset, @remaining, and @length fields). In general the data parts in
+ * spi_packet's are concatenated until @remaining is 0, and the result is an
+ * message.
+ *
+ * @flags:	0x40 = write (to device), 0x20 = read (from device); note that
+ *		the response to a write still has 0x40.
+ * @device:	1 = keyboard, 2 = touchpad
+ * @offset:	specifies the offset of this packet's data in the complete
+ *		message; i.e. > 0 indicates this is a continuation packet (in
+ *		the second packet for a message split over multiple packets
+ *		this would then be the same as the @length in the first packet)
+ * @remaining:	number of message bytes remaining in subsequents packets (in
+ *		the first packet of a message split over two packets this would
+ *		then be the same as the @length in the second packet)
+ * @length:	length of the valid data in the @data in this packet
+ * @data:	all or part of a message
+ * @crc16:	crc over this whole structure minus this @crc16 field. This
+ *		covers just this packet, even on multi-packet messages (in
+ *		contrast to the crc in the message).
+ */
+struct spi_packet {
+	u8			flags;
+	u8			device;
+	__le16			offset;
+	__le16			remaining;
+	__le16			length;
+	u8			data[246];
+	__le16			crc16;
+};
+
+struct spi_settings {
+	u64	spi_cs_delay;		/* cs-to-clk delay in us */
+	u64	reset_a2r_usec;		/* active-to-receive delay? */
+	u64	reset_rec_usec;		/* ? (cur val: 10) */
+};
+
+/* this mimics struct drm_rect */
+struct applespi_tp_info {
+	int	x_min;
+	int	y_min;
+	int	x_max;
+	int	y_max;
+};
+
+struct applespi_data {
+	struct spi_device		*spi;
+	struct spi_settings		spi_settings;
+	struct input_dev		*keyboard_input_dev;
+	struct input_dev		*touchpad_input_dev;
+
+	u8				*tx_buffer;
+	u8				*tx_status;
+	u8				*rx_buffer;
+
+	u8				*msg_buf;
+	unsigned int			saved_msg_len;
+
+	struct applespi_tp_info		tp_info;
+
+	u8				last_keys_pressed[MAX_ROLLOVER];
+	u8				last_keys_fn_pressed[MAX_ROLLOVER];
+	u8				last_fn_pressed;
+	struct input_mt_pos		pos[MAX_FINGERS];
+	int				slots[MAX_FINGERS];
+	int				gpe;
+	acpi_handle			sien;
+	acpi_handle			sist;
+
+	struct spi_transfer		dl_t;
+	struct spi_transfer		rd_t;
+	struct spi_message		rd_m;
+
+	struct spi_transfer		ww_t;
+	struct spi_transfer		wd_t;
+	struct spi_transfer		wr_t;
+	struct spi_transfer		st_t;
+	struct spi_message		wr_m;
+
+	bool				want_tp_info_cmd;
+	bool				want_mt_init_cmd;
+	bool				want_cl_led_on;
+	bool				have_cl_led_on;
+	unsigned int			want_bl_level;
+	unsigned int			have_bl_level;
+	unsigned int			cmd_msg_cntr;
+	/* lock to protect the above parameters and flags below */
+	spinlock_t			cmd_msg_lock;
+	bool				cmd_msg_queued;
+	enum applespi_evt_type		cmd_evt_type;
+
+	struct led_classdev		backlight_info;
+
+	bool				suspended;
+	bool				drain;
+	wait_queue_head_t		drain_complete;
+	bool				read_active;
+	bool				write_active;
+
+	struct work_struct		work;
+	struct touchpad_info_protocol	rcvd_tp_info;
+
+	struct dentry			*debugfs_root;
+	bool				debug_tp_dim;
+	char				tp_dim_val[40];
+	int				tp_dim_min_x;
+	int				tp_dim_max_x;
+	int				tp_dim_min_y;
+	int				tp_dim_max_y;
+};
+
+static const unsigned char applespi_scancodes[] = {
+	0, 0, 0, 0,
+	KEY_A, KEY_B, KEY_C, KEY_D, KEY_E, KEY_F, KEY_G, KEY_H, KEY_I, KEY_J,
+	KEY_K, KEY_L, KEY_M, KEY_N, KEY_O, KEY_P, KEY_Q, KEY_R, KEY_S, KEY_T,
+	KEY_U, KEY_V, KEY_W, KEY_X, KEY_Y, KEY_Z,
+	KEY_1, KEY_2, KEY_3, KEY_4, KEY_5, KEY_6, KEY_7, KEY_8, KEY_9, KEY_0,
+	KEY_ENTER, KEY_ESC, KEY_BACKSPACE, KEY_TAB, KEY_SPACE, KEY_MINUS,
+	KEY_EQUAL, KEY_LEFTBRACE, KEY_RIGHTBRACE, KEY_BACKSLASH, 0,
+	KEY_SEMICOLON, KEY_APOSTROPHE, KEY_GRAVE, KEY_COMMA, KEY_DOT, KEY_SLASH,
+	KEY_CAPSLOCK,
+	KEY_F1, KEY_F2, KEY_F3, KEY_F4, KEY_F5, KEY_F6, KEY_F7, KEY_F8, KEY_F9,
+	KEY_F10, KEY_F11, KEY_F12, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	KEY_RIGHT, KEY_LEFT, KEY_DOWN, KEY_UP,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, KEY_102ND,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, KEY_RO, 0, KEY_YEN, 0, 0, 0, 0, 0,
+	0, KEY_KATAKANAHIRAGANA, KEY_MUHENKAN
+};
+
+/*
+ * This must have exactly as many entries as there are bits in
+ * struct keyboard_protocol.modifiers .
+ */
+static const unsigned char applespi_controlcodes[] = {
+	KEY_LEFTCTRL,
+	KEY_LEFTSHIFT,
+	KEY_LEFTALT,
+	KEY_LEFTMETA,
+	0,
+	KEY_RIGHTSHIFT,
+	KEY_RIGHTALT,
+	KEY_RIGHTMETA
+};
+
+struct applespi_key_translation {
+	u16 from;
+	u16 to;
+	u8 flags;
+};
+
+static const struct applespi_key_translation applespi_fn_codes[] = {
+	{ KEY_BACKSPACE, KEY_DELETE },
+	{ KEY_ENTER,	KEY_INSERT },
+	{ KEY_F1,	KEY_BRIGHTNESSDOWN,	APPLE_FLAG_FKEY },
+	{ KEY_F2,	KEY_BRIGHTNESSUP,	APPLE_FLAG_FKEY },
+	{ KEY_F3,	KEY_SCALE,		APPLE_FLAG_FKEY },
+	{ KEY_F4,	KEY_DASHBOARD,		APPLE_FLAG_FKEY },
+	{ KEY_F5,	KEY_KBDILLUMDOWN,	APPLE_FLAG_FKEY },
+	{ KEY_F6,	KEY_KBDILLUMUP,		APPLE_FLAG_FKEY },
+	{ KEY_F7,	KEY_PREVIOUSSONG,	APPLE_FLAG_FKEY },
+	{ KEY_F8,	KEY_PLAYPAUSE,		APPLE_FLAG_FKEY },
+	{ KEY_F9,	KEY_NEXTSONG,		APPLE_FLAG_FKEY },
+	{ KEY_F10,	KEY_MUTE,		APPLE_FLAG_FKEY },
+	{ KEY_F11,	KEY_VOLUMEDOWN,		APPLE_FLAG_FKEY },
+	{ KEY_F12,	KEY_VOLUMEUP,		APPLE_FLAG_FKEY },
+	{ KEY_RIGHT,	KEY_END },
+	{ KEY_LEFT,	KEY_HOME },
+	{ KEY_DOWN,	KEY_PAGEDOWN },
+	{ KEY_UP,	KEY_PAGEUP },
+	{ }
+};
+
+static const struct applespi_key_translation apple_iso_keyboard[] = {
+	{ KEY_GRAVE,	KEY_102ND },
+	{ KEY_102ND,	KEY_GRAVE },
+	{ }
+};
+
+struct applespi_tp_model_info {
+	u16			model;
+	struct applespi_tp_info	tp_info;
+};
+
+static const struct applespi_tp_model_info applespi_tp_models[] = {
+	{
+		.model = 0x04,	/* MB8 MB9 MB10 */
+		.tp_info = { -5087, -182, 5579, 6089 },
+	},
+	{
+		.model = 0x05,	/* MBP13,1 MBP13,2 MBP14,1 MBP14,2 */
+		.tp_info = { -6243, -170, 6749, 7685 },
+	},
+	{
+		.model = 0x06,	/* MBP13,3 MBP14,3 */
+		.tp_info = { -7456, -163, 7976, 9283 },
+	},
+	{}
+};
+
+typedef void (*applespi_trace_fun)(enum applespi_evt_type,
+				   enum applespi_pkt_type, u8 *, size_t);
+
+static applespi_trace_fun applespi_get_trace_fun(enum applespi_evt_type type)
+{
+	switch (type) {
+	case ET_CMD_TP_INI:
+		return trace_applespi_tp_ini_cmd;
+	case ET_CMD_BL:
+		return trace_applespi_backlight_cmd;
+	case ET_CMD_CL:
+		return trace_applespi_caps_lock_cmd;
+	case ET_RD_KEYB:
+		return trace_applespi_keyboard_data;
+	case ET_RD_TPAD:
+		return trace_applespi_touchpad_data;
+	case ET_RD_UNKN:
+		return trace_applespi_unknown_data;
+	default:
+		WARN_ONCE(1, "Unknown msg type %d", type);
+		return trace_applespi_unknown_data;
+	}
+}
+
+static void applespi_setup_read_txfrs(struct applespi_data *applespi)
+{
+	struct spi_message *msg = &applespi->rd_m;
+	struct spi_transfer *dl_t = &applespi->dl_t;
+	struct spi_transfer *rd_t = &applespi->rd_t;
+
+	memset(dl_t, 0, sizeof(*dl_t));
+	memset(rd_t, 0, sizeof(*rd_t));
+
+	dl_t->delay_usecs = applespi->spi_settings.spi_cs_delay;
+
+	rd_t->rx_buf = applespi->rx_buffer;
+	rd_t->len = APPLESPI_PACKET_SIZE;
+
+	spi_message_init(msg);
+	spi_message_add_tail(dl_t, msg);
+	spi_message_add_tail(rd_t, msg);
+}
+
+static void applespi_setup_write_txfrs(struct applespi_data *applespi)
+{
+	struct spi_message *msg = &applespi->wr_m;
+	struct spi_transfer *wt_t = &applespi->ww_t;
+	struct spi_transfer *dl_t = &applespi->wd_t;
+	struct spi_transfer *wr_t = &applespi->wr_t;
+	struct spi_transfer *st_t = &applespi->st_t;
+
+	memset(wt_t, 0, sizeof(*wt_t));
+	memset(dl_t, 0, sizeof(*dl_t));
+	memset(wr_t, 0, sizeof(*wr_t));
+	memset(st_t, 0, sizeof(*st_t));
+
+	/*
+	 * All we need here is a delay at the beginning of the message before
+	 * asserting cs. But the current spi API doesn't support this, so we
+	 * end up with an extra unnecessary (but harmless) cs assertion and
+	 * deassertion.
+	 */
+	wt_t->delay_usecs = SPI_RW_CHG_DELAY_US;
+	wt_t->cs_change = 1;
+
+	dl_t->delay_usecs = applespi->spi_settings.spi_cs_delay;
+
+	wr_t->tx_buf = applespi->tx_buffer;
+	wr_t->len = APPLESPI_PACKET_SIZE;
+	wr_t->delay_usecs = SPI_RW_CHG_DELAY_US;
+
+	st_t->rx_buf = applespi->tx_status;
+	st_t->len = APPLESPI_STATUS_SIZE;
+
+	spi_message_init(msg);
+	spi_message_add_tail(wt_t, msg);
+	spi_message_add_tail(dl_t, msg);
+	spi_message_add_tail(wr_t, msg);
+	spi_message_add_tail(st_t, msg);
+}
+
+static int applespi_async(struct applespi_data *applespi,
+			  struct spi_message *message, void (*complete)(void *))
+{
+	message->complete = complete;
+	message->context = applespi;
+
+	return spi_async(applespi->spi, message);
+}
+
+static inline bool applespi_check_write_status(struct applespi_data *applespi,
+					       int sts)
+{
+	static u8 status_ok[] = { 0xac, 0x27, 0x68, 0xd5 };
+
+	if (sts < 0) {
+		dev_warn(&applespi->spi->dev, "Error writing to device: %d\n",
+			 sts);
+		return false;
+	}
+
+	if (memcmp(applespi->tx_status, status_ok, APPLESPI_STATUS_SIZE)) {
+		dev_warn(&applespi->spi->dev, "Error writing to device: %*ph\n",
+			 APPLESPI_STATUS_SIZE, applespi->tx_status);
+		return false;
+	}
+
+	return true;
+}
+
+static int applespi_get_spi_settings(struct applespi_data *applespi)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&applespi->spi->dev);
+	const union acpi_object *o;
+	struct spi_settings *settings = &applespi->spi_settings;
+
+	if (!acpi_dev_get_property(adev, "spiCSDelay", ACPI_TYPE_BUFFER, &o))
+		settings->spi_cs_delay = *(u64 *)o->buffer.pointer;
+	else
+		dev_warn(&applespi->spi->dev,
+			 "Property spiCSDelay not found\n");
+
+	if (!acpi_dev_get_property(adev, "resetA2RUsec", ACPI_TYPE_BUFFER, &o))
+		settings->reset_a2r_usec = *(u64 *)o->buffer.pointer;
+	else
+		dev_warn(&applespi->spi->dev,
+			 "Property resetA2RUsec not found\n");
+
+	if (!acpi_dev_get_property(adev, "resetRecUsec", ACPI_TYPE_BUFFER, &o))
+		settings->reset_rec_usec = *(u64 *)o->buffer.pointer;
+	else
+		dev_warn(&applespi->spi->dev,
+			 "Property resetRecUsec not found\n");
+
+	dev_dbg(&applespi->spi->dev,
+		"SPI settings: spi_cs_delay=%llu reset_a2r_usec=%llu reset_rec_usec=%llu\n",
+		settings->spi_cs_delay, settings->reset_a2r_usec,
+		settings->reset_rec_usec);
+
+	return 0;
+}
+
+static int applespi_setup_spi(struct applespi_data *applespi)
+{
+	int sts;
+
+	sts = applespi_get_spi_settings(applespi);
+	if (sts)
+		return sts;
+
+	spin_lock_init(&applespi->cmd_msg_lock);
+	init_waitqueue_head(&applespi->drain_complete);
+
+	return 0;
+}
+
+static int applespi_enable_spi(struct applespi_data *applespi)
+{
+	acpi_status acpi_sts;
+	unsigned long long spi_status;
+
+	/* check if SPI is already enabled, so we can skip the delay below */
+	acpi_sts = acpi_evaluate_integer(applespi->sist, NULL, NULL,
+					 &spi_status);
+	if (ACPI_SUCCESS(acpi_sts) && spi_status)
+		return 0;
+
+	/* SIEN(1) will enable SPI communication */
+	acpi_sts = acpi_execute_simple_method(applespi->sien, NULL, 1);
+	if (ACPI_FAILURE(acpi_sts)) {
+		dev_err(&applespi->spi->dev, "SIEN failed: %s\n",
+			acpi_format_exception(acpi_sts));
+		return -ENODEV;
+	}
+
+	/*
+	 * Allow the SPI interface to come up before returning. Without this
+	 * delay, the SPI commands to enable multitouch mode may not reach
+	 * the trackpad controller, causing pointer movement to break upon
+	 * resume from sleep.
+	 */
+	msleep(50);
+
+	return 0;
+}
+
+static int applespi_send_cmd_msg(struct applespi_data *applespi);
+
+static void applespi_msg_complete(struct applespi_data *applespi,
+				  bool is_write_msg, bool is_read_compl)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+	if (is_read_compl)
+		applespi->read_active = false;
+	if (is_write_msg)
+		applespi->write_active = false;
+
+	if (applespi->drain && !applespi->write_active)
+		wake_up_all(&applespi->drain_complete);
+
+	if (is_write_msg) {
+		applespi->cmd_msg_queued = false;
+		applespi_send_cmd_msg(applespi);
+	}
+
+	spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+}
+
+static void applespi_async_write_complete(void *context)
+{
+	struct applespi_data *applespi = context;
+	enum applespi_evt_type evt_type = applespi->cmd_evt_type;
+
+	applespi_get_trace_fun(evt_type)(evt_type, PT_WRITE,
+					 applespi->tx_buffer,
+					 APPLESPI_PACKET_SIZE);
+	applespi_get_trace_fun(evt_type)(evt_type, PT_STATUS,
+					 applespi->tx_status,
+					 APPLESPI_STATUS_SIZE);
+
+	if (!applespi_check_write_status(applespi, applespi->wr_m.status)) {
+		/*
+		 * If we got an error, we presumably won't get the expected
+		 * response message either.
+		 */
+		applespi_msg_complete(applespi, true, false);
+	}
+}
+
+static int applespi_send_cmd_msg(struct applespi_data *applespi)
+{
+	u16 crc;
+	int sts;
+	struct spi_packet *packet = (struct spi_packet *)applespi->tx_buffer;
+	struct message *message = (struct message *)packet->data;
+	u16 msg_len;
+	u8 device;
+
+	/* check if draining */
+	if (applespi->drain)
+		return 0;
+
+	/* check whether send is in progress */
+	if (applespi->cmd_msg_queued)
+		return 0;
+
+	/* set up packet */
+	memset(packet, 0, APPLESPI_PACKET_SIZE);
+
+	/* are we processing init commands? */
+	if (applespi->want_tp_info_cmd) {
+		applespi->want_tp_info_cmd = false;
+		applespi->want_mt_init_cmd = true;
+		applespi->cmd_evt_type = ET_CMD_TP_INI;
+
+		/* build init command */
+		device = PACKET_DEV_INFO;
+
+		message->type = cpu_to_le16(0x1020);
+		msg_len = sizeof(message->tp_info_command);
+
+		message->zero = 0x02;
+		message->rsp_buf_len = cpu_to_le16(0x0200);
+
+	} else if (applespi->want_mt_init_cmd) {
+		applespi->want_mt_init_cmd = false;
+		applespi->cmd_evt_type = ET_CMD_TP_INI;
+
+		/* build init command */
+		device = PACKET_DEV_TPAD;
+
+		message->type = cpu_to_le16(0x0252);
+		msg_len = sizeof(message->init_mt_command);
+
+		message->init_mt_command.cmd = cpu_to_le16(0x0102);
+
+	/* do we need caps-lock command? */
+	} else if (applespi->want_cl_led_on != applespi->have_cl_led_on) {
+		applespi->have_cl_led_on = applespi->want_cl_led_on;
+		applespi->cmd_evt_type = ET_CMD_CL;
+
+		/* build led command */
+		device = PACKET_DEV_KEYB;
+
+		message->type = cpu_to_le16(0x0151);
+		msg_len = sizeof(message->capsl_command);
+
+		message->capsl_command.unknown = 0x01;
+		message->capsl_command.led = applespi->have_cl_led_on ? 2 : 0;
+
+	/* do we need backlight command? */
+	} else if (applespi->want_bl_level != applespi->have_bl_level) {
+		applespi->have_bl_level = applespi->want_bl_level;
+		applespi->cmd_evt_type = ET_CMD_BL;
+
+		/* build command buffer */
+		device = PACKET_DEV_KEYB;
+
+		message->type = cpu_to_le16(0xB051);
+		msg_len = sizeof(message->bl_command);
+
+		message->bl_command.const1 = cpu_to_le16(0x01B0);
+		message->bl_command.level =
+				cpu_to_le16(applespi->have_bl_level);
+
+		if (applespi->have_bl_level > 0)
+			message->bl_command.const2 = cpu_to_le16(0x01F4);
+		else
+			message->bl_command.const2 = cpu_to_le16(0x0001);
+
+	/* everything's up-to-date */
+	} else {
+		return 0;
+	}
+
+	/* finalize packet */
+	packet->flags = PACKET_TYPE_WRITE;
+	packet->device = device;
+	packet->length = cpu_to_le16(MSG_HEADER_SIZE + msg_len);
+
+	message->counter = applespi->cmd_msg_cntr++ % (U8_MAX + 1);
+
+	message->length = cpu_to_le16(msg_len - 2);
+	if (!message->rsp_buf_len)
+		message->rsp_buf_len = message->length;
+
+	crc = crc16(0, (u8 *)message, le16_to_cpu(packet->length) - 2);
+	put_unaligned_le16(crc, &message->data[msg_len - 2]);
+
+	crc = crc16(0, (u8 *)packet, sizeof(*packet) - 2);
+	packet->crc16 = cpu_to_le16(crc);
+
+	/* send command */
+	sts = applespi_async(applespi, &applespi->wr_m,
+			     applespi_async_write_complete);
+	if (sts) {
+		dev_warn(&applespi->spi->dev,
+			 "Error queueing async write to device: %d\n", sts);
+		return sts;
+	}
+
+	applespi->cmd_msg_queued = true;
+	applespi->write_active = true;
+
+	return 0;
+}
+
+static void applespi_init(struct applespi_data *applespi, bool is_resume)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+	if (is_resume)
+		applespi->want_mt_init_cmd = true;
+	else
+		applespi->want_tp_info_cmd = true;
+	applespi_send_cmd_msg(applespi);
+
+	spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+}
+
+static int applespi_set_capsl_led(struct applespi_data *applespi,
+				  bool capslock_on)
+{
+	unsigned long flags;
+	int sts;
+
+	spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+	applespi->want_cl_led_on = capslock_on;
+	sts = applespi_send_cmd_msg(applespi);
+
+	spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+
+	return sts;
+}
+
+static void applespi_set_bl_level(struct led_classdev *led_cdev,
+				  enum led_brightness value)
+{
+	struct applespi_data *applespi =
+		container_of(led_cdev, struct applespi_data, backlight_info);
+	unsigned long flags;
+
+	spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+	if (value == 0) {
+		applespi->want_bl_level = value;
+	} else {
+		/*
+		 * The backlight does not turn on till level 32, so we scale
+		 * the range here so that from a user's perspective it turns
+		 * on at 1.
+		 */
+		applespi->want_bl_level =
+			((value * KBD_BL_LEVEL_ADJ) / KBD_BL_LEVEL_SCALE +
+			 KBD_BL_LEVEL_MIN);
+	}
+
+	applespi_send_cmd_msg(applespi);
+
+	spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+}
+
+static int applespi_event(struct input_dev *dev, unsigned int type,
+			  unsigned int code, int value)
+{
+	struct applespi_data *applespi = input_get_drvdata(dev);
+
+	switch (type) {
+	case EV_LED:
+		applespi_set_capsl_led(applespi, !!test_bit(LED_CAPSL, dev->led));
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+/* lifted from the BCM5974 driver and renamed from raw2int */
+/* convert 16-bit little endian to signed integer */
+static inline int le16_to_int(__le16 x)
+{
+	return (signed short)le16_to_cpu(x);
+}
+
+static void applespi_debug_update_dimensions(struct applespi_data *applespi,
+					     const struct tp_finger *f)
+{
+	applespi->tp_dim_min_x = min_t(int, applespi->tp_dim_min_x, f->abs_x);
+	applespi->tp_dim_max_x = max_t(int, applespi->tp_dim_max_x, f->abs_x);
+	applespi->tp_dim_min_y = min_t(int, applespi->tp_dim_min_y, f->abs_y);
+	applespi->tp_dim_max_y = max_t(int, applespi->tp_dim_max_y, f->abs_y);
+}
+
+static int applespi_tp_dim_open(struct inode *inode, struct file *file)
+{
+	struct applespi_data *applespi = inode->i_private;
+
+	file->private_data = applespi;
+
+	snprintf(applespi->tp_dim_val, sizeof(applespi->tp_dim_val),
+		 "0x%.4x %dx%d+%u+%u\n",
+		 applespi->touchpad_input_dev->id.product,
+		 applespi->tp_dim_min_x, applespi->tp_dim_min_y,
+		 applespi->tp_dim_max_x - applespi->tp_dim_min_x,
+		 applespi->tp_dim_max_y - applespi->tp_dim_min_y);
+
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t applespi_tp_dim_read(struct file *file, char __user *buf,
+				    size_t len, loff_t *off)
+{
+	struct applespi_data *applespi = file->private_data;
+
+	return simple_read_from_buffer(buf, len, off, applespi->tp_dim_val,
+				       strlen(applespi->tp_dim_val));
+}
+
+static const struct file_operations applespi_tp_dim_fops = {
+	.owner = THIS_MODULE,
+	.open = applespi_tp_dim_open,
+	.read = applespi_tp_dim_read,
+	.llseek = no_llseek,
+};
+
+static void report_finger_data(struct input_dev *input, int slot,
+			       const struct input_mt_pos *pos,
+			       const struct tp_finger *f)
+{
+	input_mt_slot(input, slot);
+	input_mt_report_slot_state(input, MT_TOOL_FINGER, true);
+
+	input_report_abs(input, ABS_MT_TOUCH_MAJOR,
+			 le16_to_int(f->touch_major) << 1);
+	input_report_abs(input, ABS_MT_TOUCH_MINOR,
+			 le16_to_int(f->touch_minor) << 1);
+	input_report_abs(input, ABS_MT_WIDTH_MAJOR,
+			 le16_to_int(f->tool_major) << 1);
+	input_report_abs(input, ABS_MT_WIDTH_MINOR,
+			 le16_to_int(f->tool_minor) << 1);
+	input_report_abs(input, ABS_MT_ORIENTATION,
+			 MAX_FINGER_ORIENTATION - le16_to_int(f->orientation));
+	input_report_abs(input, ABS_MT_POSITION_X, pos->x);
+	input_report_abs(input, ABS_MT_POSITION_Y, pos->y);
+}
+
+static void report_tp_state(struct applespi_data *applespi,
+			    struct touchpad_protocol *t)
+{
+	const struct tp_finger *f;
+	struct input_dev *input;
+	const struct applespi_tp_info *tp_info = &applespi->tp_info;
+	int i, n;
+
+	/* touchpad_input_dev is set async in worker */
+	input = smp_load_acquire(&applespi->touchpad_input_dev);
+	if (!input)
+		return;	/* touchpad isn't initialized yet */
+
+	n = 0;
+
+	for (i = 0; i < t->number_of_fingers; i++) {
+		f = &t->fingers[i];
+		if (le16_to_int(f->touch_major) == 0)
+			continue;
+		applespi->pos[n].x = le16_to_int(f->abs_x);
+		applespi->pos[n].y = tp_info->y_min + tp_info->y_max -
+				     le16_to_int(f->abs_y);
+		n++;
+
+		if (applespi->debug_tp_dim)
+			applespi_debug_update_dimensions(applespi, f);
+	}
+
+	input_mt_assign_slots(input, applespi->slots, applespi->pos, n, 0);
+
+	for (i = 0; i < n; i++)
+		report_finger_data(input, applespi->slots[i],
+				   &applespi->pos[i], &t->fingers[i]);
+
+	input_mt_sync_frame(input);
+	input_report_key(input, BTN_LEFT, t->clicked);
+
+	input_sync(input);
+}
+
+static const struct applespi_key_translation *
+applespi_find_translation(const struct applespi_key_translation *table, u16 key)
+{
+	const struct applespi_key_translation *trans;
+
+	for (trans = table; trans->from; trans++)
+		if (trans->from == key)
+			return trans;
+
+	return NULL;
+}
+
+static unsigned int applespi_translate_fn_key(unsigned int key, int fn_pressed)
+{
+	const struct applespi_key_translation *trans;
+	int do_translate;
+
+	trans = applespi_find_translation(applespi_fn_codes, key);
+	if (trans) {
+		if (trans->flags & APPLE_FLAG_FKEY)
+			do_translate = (fnmode == 2 && fn_pressed) ||
+				       (fnmode == 1 && !fn_pressed);
+		else
+			do_translate = fn_pressed;
+
+		if (do_translate)
+			key = trans->to;
+	}
+
+	return key;
+}
+
+static unsigned int applespi_translate_iso_layout(unsigned int key)
+{
+	const struct applespi_key_translation *trans;
+
+	trans = applespi_find_translation(apple_iso_keyboard, key);
+	if (trans)
+		key = trans->to;
+
+	return key;
+}
+
+static unsigned int applespi_code_to_key(u8 code, int fn_pressed)
+{
+	unsigned int key = applespi_scancodes[code];
+
+	if (fnmode)
+		key = applespi_translate_fn_key(key, fn_pressed);
+	if (iso_layout)
+		key = applespi_translate_iso_layout(key);
+	return key;
+}
+
+static void
+applespi_remap_fn_key(struct keyboard_protocol *keyboard_protocol)
+{
+	unsigned char tmp;
+	u8 bit = BIT((fnremap - 1) & 0x07);
+
+	if (!fnremap || fnremap > ARRAY_SIZE(applespi_controlcodes) ||
+	    !applespi_controlcodes[fnremap - 1])
+		return;
+
+	tmp = keyboard_protocol->fn_pressed;
+	keyboard_protocol->fn_pressed = !!(keyboard_protocol->modifiers & bit);
+	if (tmp)
+		keyboard_protocol->modifiers |= bit;
+	else
+		keyboard_protocol->modifiers &= ~bit;
+}
+
+static void
+applespi_handle_keyboard_event(struct applespi_data *applespi,
+			       struct keyboard_protocol *keyboard_protocol)
+{
+	unsigned int key;
+	int i;
+
+	compiletime_assert(ARRAY_SIZE(applespi_controlcodes) ==
+			   sizeof_field(struct keyboard_protocol, modifiers) * 8,
+			   "applespi_controlcodes has wrong number of entries");
+
+	/* check for rollover overflow, which is signalled by all keys == 1 */
+	if (!memchr_inv(keyboard_protocol->keys_pressed, 1, MAX_ROLLOVER))
+		return;
+
+	/* remap fn key if desired */
+	applespi_remap_fn_key(keyboard_protocol);
+
+	/* check released keys */
+	for (i = 0; i < MAX_ROLLOVER; i++) {
+		if (memchr(keyboard_protocol->keys_pressed,
+			   applespi->last_keys_pressed[i], MAX_ROLLOVER))
+			continue;	/* key is still pressed */
+
+		key = applespi_code_to_key(applespi->last_keys_pressed[i],
+					   applespi->last_keys_fn_pressed[i]);
+		input_report_key(applespi->keyboard_input_dev, key, 0);
+		applespi->last_keys_fn_pressed[i] = 0;
+	}
+
+	/* check pressed keys */
+	for (i = 0; i < MAX_ROLLOVER; i++) {
+		if (keyboard_protocol->keys_pressed[i] <
+				ARRAY_SIZE(applespi_scancodes) &&
+		    keyboard_protocol->keys_pressed[i] > 0) {
+			key = applespi_code_to_key(
+					keyboard_protocol->keys_pressed[i],
+					keyboard_protocol->fn_pressed);
+			input_report_key(applespi->keyboard_input_dev, key, 1);
+			applespi->last_keys_fn_pressed[i] =
+					keyboard_protocol->fn_pressed;
+		}
+	}
+
+	/* check control keys */
+	for (i = 0; i < ARRAY_SIZE(applespi_controlcodes); i++) {
+		if (keyboard_protocol->modifiers & BIT(i))
+			input_report_key(applespi->keyboard_input_dev,
+					 applespi_controlcodes[i], 1);
+		else
+			input_report_key(applespi->keyboard_input_dev,
+					 applespi_controlcodes[i], 0);
+	}
+
+	/* check function key */
+	if (keyboard_protocol->fn_pressed && !applespi->last_fn_pressed)
+		input_report_key(applespi->keyboard_input_dev, KEY_FN, 1);
+	else if (!keyboard_protocol->fn_pressed && applespi->last_fn_pressed)
+		input_report_key(applespi->keyboard_input_dev, KEY_FN, 0);
+	applespi->last_fn_pressed = keyboard_protocol->fn_pressed;
+
+	/* done */
+	input_sync(applespi->keyboard_input_dev);
+	memcpy(&applespi->last_keys_pressed, keyboard_protocol->keys_pressed,
+	       sizeof(applespi->last_keys_pressed));
+}
+
+static const struct applespi_tp_info *applespi_find_touchpad_info(u8 model)
+{
+	const struct applespi_tp_model_info *info;
+
+	for (info = applespi_tp_models; info->model; info++) {
+		if (info->model == model)
+			return &info->tp_info;
+	}
+
+	return NULL;
+}
+
+static int
+applespi_register_touchpad_device(struct applespi_data *applespi,
+				  struct touchpad_info_protocol *rcvd_tp_info)
+{
+	const struct applespi_tp_info *tp_info;
+	struct input_dev *touchpad_input_dev;
+	int sts;
+
+	/* set up touchpad dimensions */
+	tp_info = applespi_find_touchpad_info(rcvd_tp_info->model_no);
+	if (!tp_info) {
+		dev_warn(&applespi->spi->dev,
+			 "Unknown touchpad model %x - falling back to MB8 touchpad\n",
+			 rcvd_tp_info->model_no);
+		tp_info = &applespi_tp_models[0].tp_info;
+	}
+
+	applespi->tp_info = *tp_info;
+
+	if (touchpad_dimensions[0]) {
+		int x, y, w, h;
+
+		sts = sscanf(touchpad_dimensions, "%dx%d+%u+%u", &x, &y, &w, &h);
+		if (sts == 4) {
+			dev_info(&applespi->spi->dev,
+				 "Overriding touchpad dimensions from module param\n");
+			applespi->tp_info.x_min = x;
+			applespi->tp_info.y_min = y;
+			applespi->tp_info.x_max = x + w;
+			applespi->tp_info.y_max = y + h;
+		} else {
+			dev_warn(&applespi->spi->dev,
+				 "Invalid touchpad dimensions '%s': must be in the form XxY+W+H\n",
+				 touchpad_dimensions);
+			touchpad_dimensions[0] = '\0';
+		}
+	}
+	if (!touchpad_dimensions[0]) {
+		snprintf(touchpad_dimensions, sizeof(touchpad_dimensions),
+			 "%dx%d+%u+%u",
+			 applespi->tp_info.x_min,
+			 applespi->tp_info.y_min,
+			 applespi->tp_info.x_max - applespi->tp_info.x_min,
+			 applespi->tp_info.y_max - applespi->tp_info.y_min);
+	}
+
+	/* create touchpad input device */
+	touchpad_input_dev = devm_input_allocate_device(&applespi->spi->dev);
+	if (!touchpad_input_dev) {
+		dev_err(&applespi->spi->dev,
+			"Failed to allocate touchpad input device\n");
+		return -ENOMEM;
+	}
+
+	touchpad_input_dev->name = "Apple SPI Touchpad";
+	touchpad_input_dev->phys = "applespi/input1";
+	touchpad_input_dev->dev.parent = &applespi->spi->dev;
+	touchpad_input_dev->id.bustype = BUS_SPI;
+	touchpad_input_dev->id.vendor = SYNAPTICS_VENDOR_ID;
+	touchpad_input_dev->id.product =
+			rcvd_tp_info->model_no << 8 | rcvd_tp_info->model_flags;
+
+	/* basic properties */
+	input_set_capability(touchpad_input_dev, EV_REL, REL_X);
+	input_set_capability(touchpad_input_dev, EV_REL, REL_Y);
+
+	__set_bit(INPUT_PROP_POINTER, touchpad_input_dev->propbit);
+	__set_bit(INPUT_PROP_BUTTONPAD, touchpad_input_dev->propbit);
+
+	/* finger touch area */
+	input_set_abs_params(touchpad_input_dev, ABS_MT_TOUCH_MAJOR,
+			     0, 5000, 0, 0);
+	input_set_abs_params(touchpad_input_dev, ABS_MT_TOUCH_MINOR,
+			     0, 5000, 0, 0);
+
+	/* finger approach area */
+	input_set_abs_params(touchpad_input_dev, ABS_MT_WIDTH_MAJOR,
+			     0, 5000, 0, 0);
+	input_set_abs_params(touchpad_input_dev, ABS_MT_WIDTH_MINOR,
+			     0, 5000, 0, 0);
+
+	/* finger orientation */
+	input_set_abs_params(touchpad_input_dev, ABS_MT_ORIENTATION,
+			     -MAX_FINGER_ORIENTATION, MAX_FINGER_ORIENTATION,
+			     0, 0);
+
+	/* finger position */
+	input_set_abs_params(touchpad_input_dev, ABS_MT_POSITION_X,
+			     applespi->tp_info.x_min, applespi->tp_info.x_max,
+			     0, 0);
+	input_set_abs_params(touchpad_input_dev, ABS_MT_POSITION_Y,
+			     applespi->tp_info.y_min, applespi->tp_info.y_max,
+			     0, 0);
+
+	/* touchpad button */
+	input_set_capability(touchpad_input_dev, EV_KEY, BTN_LEFT);
+
+	/* multitouch */
+	sts = input_mt_init_slots(touchpad_input_dev, MAX_FINGERS,
+				  INPUT_MT_POINTER | INPUT_MT_DROP_UNUSED |
+					INPUT_MT_TRACK);
+	if (sts) {
+		dev_err(&applespi->spi->dev,
+			"failed to initialize slots: %d", sts);
+		return sts;
+	}
+
+	/* register input device */
+	sts = input_register_device(touchpad_input_dev);
+	if (sts) {
+		dev_err(&applespi->spi->dev,
+			"Unable to register touchpad input device (%d)\n", sts);
+		return sts;
+	}
+
+	/* touchpad_input_dev is read async in spi callback */
+	smp_store_release(&applespi->touchpad_input_dev, touchpad_input_dev);
+
+	return 0;
+}
+
+static void applespi_worker(struct work_struct *work)
+{
+	struct applespi_data *applespi =
+		container_of(work, struct applespi_data, work);
+
+	applespi_register_touchpad_device(applespi, &applespi->rcvd_tp_info);
+}
+
+static void applespi_handle_cmd_response(struct applespi_data *applespi,
+					 struct spi_packet *packet,
+					 struct message *message)
+{
+	if (packet->device == PACKET_DEV_INFO &&
+	    le16_to_cpu(message->type) == 0x1020) {
+		/*
+		 * We're not allowed to sleep here, but registering an input
+		 * device can sleep.
+		 */
+		applespi->rcvd_tp_info = message->tp_info;
+		schedule_work(&applespi->work);
+		return;
+	}
+
+	if (le16_to_cpu(message->length) != 0x0000) {
+		dev_warn_ratelimited(&applespi->spi->dev,
+				     "Received unexpected write response: length=%x\n",
+				     le16_to_cpu(message->length));
+		return;
+	}
+
+	if (packet->device == PACKET_DEV_TPAD &&
+	    le16_to_cpu(message->type) == 0x0252 &&
+	    le16_to_cpu(message->rsp_buf_len) == 0x0002)
+		dev_info(&applespi->spi->dev, "modeswitch done.\n");
+}
+
+static bool applespi_verify_crc(struct applespi_data *applespi, u8 *buffer,
+				size_t buflen)
+{
+	u16 crc;
+
+	crc = crc16(0, buffer, buflen);
+	if (crc) {
+		dev_warn_ratelimited(&applespi->spi->dev,
+				     "Received corrupted packet (crc mismatch)\n");
+		trace_applespi_bad_crc(ET_RD_CRC, READ, buffer, buflen);
+
+		return false;
+	}
+
+	return true;
+}
+
+static void applespi_debug_print_read_packet(struct applespi_data *applespi,
+					     struct spi_packet *packet)
+{
+	unsigned int evt_type;
+
+	if (packet->flags == PACKET_TYPE_READ &&
+	    packet->device == PACKET_DEV_KEYB)
+		evt_type = ET_RD_KEYB;
+	else if (packet->flags == PACKET_TYPE_READ &&
+		 packet->device == PACKET_DEV_TPAD)
+		evt_type = ET_RD_TPAD;
+	else if (packet->flags == PACKET_TYPE_WRITE)
+		evt_type = applespi->cmd_evt_type;
+	else
+		evt_type = ET_RD_UNKN;
+
+	applespi_get_trace_fun(evt_type)(evt_type, PT_READ, applespi->rx_buffer,
+					 APPLESPI_PACKET_SIZE);
+}
+
+static void applespi_got_data(struct applespi_data *applespi)
+{
+	struct spi_packet *packet;
+	struct message *message;
+	unsigned int msg_len;
+	unsigned int off;
+	unsigned int rem;
+	unsigned int len;
+
+	/* process packet header */
+	if (!applespi_verify_crc(applespi, applespi->rx_buffer,
+				 APPLESPI_PACKET_SIZE)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+		if (applespi->drain) {
+			applespi->read_active = false;
+			applespi->write_active = false;
+
+			wake_up_all(&applespi->drain_complete);
+		}
+
+		spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+
+		return;
+	}
+
+	packet = (struct spi_packet *)applespi->rx_buffer;
+
+	applespi_debug_print_read_packet(applespi, packet);
+
+	off = le16_to_cpu(packet->offset);
+	rem = le16_to_cpu(packet->remaining);
+	len = le16_to_cpu(packet->length);
+
+	if (len > sizeof(packet->data)) {
+		dev_warn_ratelimited(&applespi->spi->dev,
+				     "Received corrupted packet (invalid packet length %u)\n",
+				     len);
+		goto msg_complete;
+	}
+
+	/* handle multi-packet messages */
+	if (rem > 0 || off > 0) {
+		if (off != applespi->saved_msg_len) {
+			dev_warn_ratelimited(&applespi->spi->dev,
+					     "Received unexpected offset (got %u, expected %u)\n",
+					     off, applespi->saved_msg_len);
+			goto msg_complete;
+		}
+
+		if (off + rem > MAX_PKTS_PER_MSG * APPLESPI_PACKET_SIZE) {
+			dev_warn_ratelimited(&applespi->spi->dev,
+					     "Received message too large (size %u)\n",
+					     off + rem);
+			goto msg_complete;
+		}
+
+		if (off + len > MAX_PKTS_PER_MSG * APPLESPI_PACKET_SIZE) {
+			dev_warn_ratelimited(&applespi->spi->dev,
+					     "Received message too large (size %u)\n",
+					     off + len);
+			goto msg_complete;
+		}
+
+		memcpy(applespi->msg_buf + off, &packet->data, len);
+		applespi->saved_msg_len += len;
+
+		if (rem > 0)
+			return;
+
+		message = (struct message *)applespi->msg_buf;
+		msg_len = applespi->saved_msg_len;
+	} else {
+		message = (struct message *)&packet->data;
+		msg_len = len;
+	}
+
+	/* got complete message - verify */
+	if (!applespi_verify_crc(applespi, (u8 *)message, msg_len))
+		goto msg_complete;
+
+	if (le16_to_cpu(message->length) != msg_len - MSG_HEADER_SIZE - 2) {
+		dev_warn_ratelimited(&applespi->spi->dev,
+				     "Received corrupted packet (invalid message length %u - expected %u)\n",
+				     le16_to_cpu(message->length),
+				     msg_len - MSG_HEADER_SIZE - 2);
+		goto msg_complete;
+	}
+
+	/* handle message */
+	if (packet->flags == PACKET_TYPE_READ &&
+	    packet->device == PACKET_DEV_KEYB) {
+		applespi_handle_keyboard_event(applespi, &message->keyboard);
+
+	} else if (packet->flags == PACKET_TYPE_READ &&
+		   packet->device == PACKET_DEV_TPAD) {
+		struct touchpad_protocol *tp;
+		size_t tp_len;
+
+		tp = &message->touchpad;
+		tp_len = sizeof(*tp) +
+			 tp->number_of_fingers * sizeof(tp->fingers[0]);
+
+		if (le16_to_cpu(message->length) + 2 != tp_len) {
+			dev_warn_ratelimited(&applespi->spi->dev,
+					     "Received corrupted packet (invalid message length %u - num-fingers %u, tp-len %zu)\n",
+					     le16_to_cpu(message->length),
+					     tp->number_of_fingers, tp_len);
+			goto msg_complete;
+		}
+
+		if (tp->number_of_fingers > MAX_FINGERS) {
+			dev_warn_ratelimited(&applespi->spi->dev,
+					     "Number of reported fingers (%u) exceeds max (%u))\n",
+					     tp->number_of_fingers,
+					     MAX_FINGERS);
+			tp->number_of_fingers = MAX_FINGERS;
+		}
+
+		report_tp_state(applespi, tp);
+
+	} else if (packet->flags == PACKET_TYPE_WRITE) {
+		applespi_handle_cmd_response(applespi, packet, message);
+	}
+
+msg_complete:
+	applespi->saved_msg_len = 0;
+
+	applespi_msg_complete(applespi, packet->flags == PACKET_TYPE_WRITE,
+			      true);
+}
+
+static void applespi_async_read_complete(void *context)
+{
+	struct applespi_data *applespi = context;
+
+	if (applespi->rd_m.status < 0) {
+		dev_warn(&applespi->spi->dev, "Error reading from device: %d\n",
+			 applespi->rd_m.status);
+		/*
+		 * We don't actually know if this was a pure read, or a response
+		 * to a write. But this is a rare error condition that should
+		 * never occur, so clearing both flags to avoid deadlock.
+		 */
+		applespi_msg_complete(applespi, true, true);
+	} else {
+		applespi_got_data(applespi);
+	}
+
+	acpi_finish_gpe(NULL, applespi->gpe);
+}
+
+static u32 applespi_notify(acpi_handle gpe_device, u32 gpe, void *context)
+{
+	struct applespi_data *applespi = context;
+	int sts;
+	unsigned long flags;
+
+	trace_applespi_irq_received(ET_RD_IRQ, PT_READ);
+
+	spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+	if (!applespi->suspended) {
+		sts = applespi_async(applespi, &applespi->rd_m,
+				     applespi_async_read_complete);
+		if (sts)
+			dev_warn(&applespi->spi->dev,
+				 "Error queueing async read to device: %d\n",
+				 sts);
+		else
+			applespi->read_active = true;
+	}
+
+	spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+
+	return ACPI_INTERRUPT_HANDLED;
+}
+
+static int applespi_get_saved_bl_level(struct applespi_data *applespi)
+{
+	struct efivar_entry *efivar_entry;
+	u16 efi_data = 0;
+	unsigned long efi_data_len;
+	int sts;
+
+	efivar_entry = kmalloc(sizeof(*efivar_entry), GFP_KERNEL);
+	if (!efivar_entry)
+		return -ENOMEM;
+
+	memcpy(efivar_entry->var.VariableName, EFI_BL_LEVEL_NAME,
+	       sizeof(EFI_BL_LEVEL_NAME));
+	efivar_entry->var.VendorGuid = EFI_BL_LEVEL_GUID;
+	efi_data_len = sizeof(efi_data);
+
+	sts = efivar_entry_get(efivar_entry, NULL, &efi_data_len, &efi_data);
+	if (sts && sts != -ENOENT)
+		dev_warn(&applespi->spi->dev,
+			 "Error getting backlight level from EFI vars: %d\n",
+			 sts);
+
+	kfree(efivar_entry);
+
+	return sts ? sts : efi_data;
+}
+
+static void applespi_save_bl_level(struct applespi_data *applespi,
+				   unsigned int level)
+{
+	efi_guid_t efi_guid;
+	u32 efi_attr;
+	unsigned long efi_data_len;
+	u16 efi_data;
+	int sts;
+
+	/* Save keyboard backlight level */
+	efi_guid = EFI_BL_LEVEL_GUID;
+	efi_data = (u16)level;
+	efi_data_len = sizeof(efi_data);
+	efi_attr = EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS |
+		   EFI_VARIABLE_RUNTIME_ACCESS;
+
+	sts = efivar_entry_set_safe(EFI_BL_LEVEL_NAME, efi_guid, efi_attr, true,
+				    efi_data_len, &efi_data);
+	if (sts)
+		dev_warn(&applespi->spi->dev,
+			 "Error saving backlight level to EFI vars: %d\n", sts);
+}
+
+static int applespi_probe(struct spi_device *spi)
+{
+	struct applespi_data *applespi;
+	acpi_handle spi_handle = ACPI_HANDLE(&spi->dev);
+	acpi_status acpi_sts;
+	int sts, i;
+	unsigned long long gpe, usb_status;
+
+	/* check if the USB interface is present and enabled already */
+	acpi_sts = acpi_evaluate_integer(spi_handle, "UIST", NULL, &usb_status);
+	if (ACPI_SUCCESS(acpi_sts) && usb_status) {
+		/* let the USB driver take over instead */
+		dev_info(&spi->dev, "USB interface already enabled\n");
+		return -ENODEV;
+	}
+
+	/* allocate driver data */
+	applespi = devm_kzalloc(&spi->dev, sizeof(*applespi), GFP_KERNEL);
+	if (!applespi)
+		return -ENOMEM;
+
+	applespi->spi = spi;
+
+	INIT_WORK(&applespi->work, applespi_worker);
+
+	/* store the driver data */
+	spi_set_drvdata(spi, applespi);
+
+	/* create our buffers */
+	applespi->tx_buffer = devm_kmalloc(&spi->dev, APPLESPI_PACKET_SIZE,
+					   GFP_KERNEL);
+	applespi->tx_status = devm_kmalloc(&spi->dev, APPLESPI_STATUS_SIZE,
+					   GFP_KERNEL);
+	applespi->rx_buffer = devm_kmalloc(&spi->dev, APPLESPI_PACKET_SIZE,
+					   GFP_KERNEL);
+	applespi->msg_buf = devm_kmalloc_array(&spi->dev, MAX_PKTS_PER_MSG,
+					       APPLESPI_PACKET_SIZE,
+					       GFP_KERNEL);
+
+	if (!applespi->tx_buffer || !applespi->tx_status ||
+	    !applespi->rx_buffer || !applespi->msg_buf)
+		return -ENOMEM;
+
+	/* set up our spi messages */
+	applespi_setup_read_txfrs(applespi);
+	applespi_setup_write_txfrs(applespi);
+
+	/* cache ACPI method handles */
+	acpi_sts = acpi_get_handle(spi_handle, "SIEN", &applespi->sien);
+	if (ACPI_FAILURE(acpi_sts)) {
+		dev_err(&applespi->spi->dev,
+			"Failed to get SIEN ACPI method handle: %s\n",
+			acpi_format_exception(acpi_sts));
+		return -ENODEV;
+	}
+
+	acpi_sts = acpi_get_handle(spi_handle, "SIST", &applespi->sist);
+	if (ACPI_FAILURE(acpi_sts)) {
+		dev_err(&applespi->spi->dev,
+			"Failed to get SIST ACPI method handle: %s\n",
+			acpi_format_exception(acpi_sts));
+		return -ENODEV;
+	}
+
+	/* switch on the SPI interface */
+	sts = applespi_setup_spi(applespi);
+	if (sts)
+		return sts;
+
+	sts = applespi_enable_spi(applespi);
+	if (sts)
+		return sts;
+
+	/* setup the keyboard input dev */
+	applespi->keyboard_input_dev = devm_input_allocate_device(&spi->dev);
+
+	if (!applespi->keyboard_input_dev)
+		return -ENOMEM;
+
+	applespi->keyboard_input_dev->name = "Apple SPI Keyboard";
+	applespi->keyboard_input_dev->phys = "applespi/input0";
+	applespi->keyboard_input_dev->dev.parent = &spi->dev;
+	applespi->keyboard_input_dev->id.bustype = BUS_SPI;
+
+	applespi->keyboard_input_dev->evbit[0] =
+			BIT_MASK(EV_KEY) | BIT_MASK(EV_LED) | BIT_MASK(EV_REP);
+	applespi->keyboard_input_dev->ledbit[0] = BIT_MASK(LED_CAPSL);
+
+	input_set_drvdata(applespi->keyboard_input_dev, applespi);
+	applespi->keyboard_input_dev->event = applespi_event;
+
+	for (i = 0; i < ARRAY_SIZE(applespi_scancodes); i++)
+		if (applespi_scancodes[i])
+			input_set_capability(applespi->keyboard_input_dev,
+					     EV_KEY, applespi_scancodes[i]);
+
+	for (i = 0; i < ARRAY_SIZE(applespi_controlcodes); i++)
+		if (applespi_controlcodes[i])
+			input_set_capability(applespi->keyboard_input_dev,
+					     EV_KEY, applespi_controlcodes[i]);
+
+	for (i = 0; i < ARRAY_SIZE(applespi_fn_codes); i++)
+		if (applespi_fn_codes[i].to)
+			input_set_capability(applespi->keyboard_input_dev,
+					     EV_KEY, applespi_fn_codes[i].to);
+
+	input_set_capability(applespi->keyboard_input_dev, EV_KEY, KEY_FN);
+
+	sts = input_register_device(applespi->keyboard_input_dev);
+	if (sts) {
+		dev_err(&applespi->spi->dev,
+			"Unable to register keyboard input device (%d)\n", sts);
+		return -ENODEV;
+	}
+
+	/*
+	 * The applespi device doesn't send interrupts normally (as is described
+	 * in its DSDT), but rather seems to use ACPI GPEs.
+	 */
+	acpi_sts = acpi_evaluate_integer(spi_handle, "_GPE", NULL, &gpe);
+	if (ACPI_FAILURE(acpi_sts)) {
+		dev_err(&applespi->spi->dev,
+			"Failed to obtain GPE for SPI slave device: %s\n",
+			acpi_format_exception(acpi_sts));
+		return -ENODEV;
+	}
+	applespi->gpe = (int)gpe;
+
+	acpi_sts = acpi_install_gpe_handler(NULL, applespi->gpe,
+					    ACPI_GPE_LEVEL_TRIGGERED,
+					    applespi_notify, applespi);
+	if (ACPI_FAILURE(acpi_sts)) {
+		dev_err(&applespi->spi->dev,
+			"Failed to install GPE handler for GPE %d: %s\n",
+			applespi->gpe, acpi_format_exception(acpi_sts));
+		return -ENODEV;
+	}
+
+	applespi->suspended = false;
+
+	acpi_sts = acpi_enable_gpe(NULL, applespi->gpe);
+	if (ACPI_FAILURE(acpi_sts)) {
+		dev_err(&applespi->spi->dev,
+			"Failed to enable GPE handler for GPE %d: %s\n",
+			applespi->gpe, acpi_format_exception(acpi_sts));
+		acpi_remove_gpe_handler(NULL, applespi->gpe, applespi_notify);
+		return -ENODEV;
+	}
+
+	/* trigger touchpad setup */
+	applespi_init(applespi, false);
+
+	/*
+	 * By default this device is not enabled for wakeup; but USB keyboards
+	 * generally are, so the expectation is that by default the keyboard
+	 * will wake the system.
+	 */
+	device_wakeup_enable(&spi->dev);
+
+	/* set up keyboard-backlight */
+	sts = applespi_get_saved_bl_level(applespi);
+	if (sts >= 0)
+		applespi_set_bl_level(&applespi->backlight_info, sts);
+
+	applespi->backlight_info.name            = "spi::kbd_backlight";
+	applespi->backlight_info.default_trigger = "kbd-backlight";
+	applespi->backlight_info.brightness_set  = applespi_set_bl_level;
+
+	sts = devm_led_classdev_register(&spi->dev, &applespi->backlight_info);
+	if (sts)
+		dev_warn(&applespi->spi->dev,
+			 "Unable to register keyboard backlight class dev (%d)\n",
+			 sts);
+
+	/* set up debugfs entries for touchpad dimensions logging */
+	applespi->debugfs_root = debugfs_create_dir("applespi", NULL);
+	if (IS_ERR(applespi->debugfs_root)) {
+		if (PTR_ERR(applespi->debugfs_root) != -ENODEV)
+			dev_warn(&applespi->spi->dev,
+				 "Error creating debugfs root entry (%ld)\n",
+				 PTR_ERR(applespi->debugfs_root));
+	} else {
+		struct dentry *ret;
+
+		ret = debugfs_create_bool("enable_tp_dim", 0600,
+					  applespi->debugfs_root,
+					  &applespi->debug_tp_dim);
+		if (IS_ERR(ret))
+			dev_dbg(&applespi->spi->dev,
+				"Error creating debugfs entry enable_tp_dim (%ld)\n",
+				PTR_ERR(ret));
+
+		ret = debugfs_create_file("tp_dim", 0400,
+					  applespi->debugfs_root, applespi,
+					  &applespi_tp_dim_fops);
+		if (IS_ERR(ret))
+			dev_dbg(&applespi->spi->dev,
+				"Error creating debugfs entry tp_dim (%ld)\n",
+				PTR_ERR(ret));
+	}
+
+	return 0;
+}
+
+static void applespi_drain_writes(struct applespi_data *applespi)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+	applespi->drain = true;
+	wait_event_lock_irq(applespi->drain_complete, !applespi->write_active,
+			    applespi->cmd_msg_lock);
+
+	spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+}
+
+static void applespi_drain_reads(struct applespi_data *applespi)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+	wait_event_lock_irq(applespi->drain_complete, !applespi->read_active,
+			    applespi->cmd_msg_lock);
+
+	applespi->suspended = true;
+
+	spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+}
+
+static int applespi_remove(struct spi_device *spi)
+{
+	struct applespi_data *applespi = spi_get_drvdata(spi);
+
+	applespi_drain_writes(applespi);
+
+	acpi_disable_gpe(NULL, applespi->gpe);
+	acpi_remove_gpe_handler(NULL, applespi->gpe, applespi_notify);
+	device_wakeup_disable(&spi->dev);
+
+	applespi_drain_reads(applespi);
+
+	debugfs_remove_recursive(applespi->debugfs_root);
+
+	return 0;
+}
+
+static void applespi_shutdown(struct spi_device *spi)
+{
+	struct applespi_data *applespi = spi_get_drvdata(spi);
+
+	applespi_save_bl_level(applespi, applespi->have_bl_level);
+}
+
+static int applespi_poweroff_late(struct device *dev)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	struct applespi_data *applespi = spi_get_drvdata(spi);
+
+	applespi_save_bl_level(applespi, applespi->have_bl_level);
+
+	return 0;
+}
+
+static int __maybe_unused applespi_suspend(struct device *dev)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	struct applespi_data *applespi = spi_get_drvdata(spi);
+	acpi_status acpi_sts;
+	int sts;
+
+	/* turn off caps-lock - it'll stay on otherwise */
+	sts = applespi_set_capsl_led(applespi, false);
+	if (sts)
+		dev_warn(&applespi->spi->dev,
+			 "Failed to turn off caps-lock led (%d)\n", sts);
+
+	applespi_drain_writes(applespi);
+
+	/* disable the interrupt */
+	acpi_sts = acpi_disable_gpe(NULL, applespi->gpe);
+	if (ACPI_FAILURE(acpi_sts))
+		dev_err(&applespi->spi->dev,
+			"Failed to disable GPE handler for GPE %d: %s\n",
+			applespi->gpe, acpi_format_exception(acpi_sts));
+
+	applespi_drain_reads(applespi);
+
+	return 0;
+}
+
+static int __maybe_unused applespi_resume(struct device *dev)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	struct applespi_data *applespi = spi_get_drvdata(spi);
+	acpi_status acpi_sts;
+	unsigned long flags;
+
+	/* ensure our flags and state reflect a newly resumed device */
+	spin_lock_irqsave(&applespi->cmd_msg_lock, flags);
+
+	applespi->drain = false;
+	applespi->have_cl_led_on = false;
+	applespi->have_bl_level = 0;
+	applespi->cmd_msg_queued = false;
+	applespi->read_active = false;
+	applespi->write_active = false;
+
+	applespi->suspended = false;
+
+	spin_unlock_irqrestore(&applespi->cmd_msg_lock, flags);
+
+	/* switch on the SPI interface */
+	applespi_enable_spi(applespi);
+
+	/* re-enable the interrupt */
+	acpi_sts = acpi_enable_gpe(NULL, applespi->gpe);
+	if (ACPI_FAILURE(acpi_sts))
+		dev_err(&applespi->spi->dev,
+			"Failed to re-enable GPE handler for GPE %d: %s\n",
+			applespi->gpe, acpi_format_exception(acpi_sts));
+
+	/* switch the touchpad into multitouch mode */
+	applespi_init(applespi, true);
+
+	return 0;
+}
+
+static const struct acpi_device_id applespi_acpi_match[] = {
+	{ "APP000D", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, applespi_acpi_match);
+
+const struct dev_pm_ops applespi_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(applespi_suspend, applespi_resume)
+	.poweroff_late	= applespi_poweroff_late,
+};
+
+static struct spi_driver applespi_driver = {
+	.driver		= {
+		.name			= "applespi",
+		.acpi_match_table	= applespi_acpi_match,
+		.pm			= &applespi_pm_ops,
+	},
+	.probe		= applespi_probe,
+	.remove		= applespi_remove,
+	.shutdown	= applespi_shutdown,
+};
+
+module_spi_driver(applespi_driver)
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("MacBook(Pro) SPI Keyboard/Touchpad driver");
+MODULE_AUTHOR("Federico Lorenzi");
+MODULE_AUTHOR("Ronald Tschalär");
diff --git a/drivers/input/keyboard/applespi.h b/drivers/input/keyboard/applespi.h
new file mode 100644
index 000000000000..7f5ab10c597a
--- /dev/null
+++ b/drivers/input/keyboard/applespi.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * MacBook (Pro) SPI keyboard and touchpad driver
+ *
+ * Copyright (c) 2015-2019 Federico Lorenzi
+ * Copyright (c) 2017-2019 Ronald Tschalär
+ */
+
+#ifndef _APPLESPI_H_
+#define _APPLESPI_H_
+
+enum applespi_evt_type {
+	ET_CMD_TP_INI = BIT(0),
+	ET_CMD_BL = BIT(1),
+	ET_CMD_CL = BIT(2),
+	ET_RD_KEYB = BIT(8),
+	ET_RD_TPAD = BIT(9),
+	ET_RD_UNKN = BIT(10),
+	ET_RD_IRQ = BIT(11),
+	ET_RD_CRC = BIT(12),
+};
+
+enum applespi_pkt_type {
+	PT_READ,
+	PT_WRITE,
+	PT_STATUS,
+};
+
+#endif /* _APPLESPI_H_ */
diff --git a/drivers/input/keyboard/applespi_trace.h b/drivers/input/keyboard/applespi_trace.h
new file mode 100644
index 000000000000..0ad1a3d79f50
--- /dev/null
+++ b/drivers/input/keyboard/applespi_trace.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * MacBook (Pro) SPI keyboard and touchpad driver
+ *
+ * Copyright (c) 2015-2019 Federico Lorenzi
+ * Copyright (c) 2017-2019 Ronald Tschalär
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM applespi
+
+#if !defined(_APPLESPI_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _APPLESPI_TRACE_H_
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#include "applespi.h"
+
+DECLARE_EVENT_CLASS(dump_message_template,
+	TP_PROTO(enum applespi_evt_type evt_type,
+		 enum applespi_pkt_type pkt_type,
+		 u8 *buf,
+		 size_t len),
+
+	TP_ARGS(evt_type, pkt_type, buf, len),
+
+	TP_STRUCT__entry(
+		__field(enum applespi_evt_type, evt_type)
+		__field(enum applespi_pkt_type, pkt_type)
+		__field(size_t, len)
+		__dynamic_array(u8, buf, len)
+	),
+
+	TP_fast_assign(
+		__entry->evt_type = evt_type;
+		__entry->pkt_type = pkt_type;
+		__entry->len = len;
+		memcpy(__get_dynamic_array(buf), buf, len);
+	),
+
+	TP_printk("%-6s: %s",
+		  __print_symbolic(__entry->pkt_type,
+				   { PT_READ, "read" },
+				   { PT_WRITE, "write" },
+				   { PT_STATUS, "status" }
+		  ),
+		  __print_hex(__get_dynamic_array(buf), __entry->len))
+);
+
+#define DEFINE_DUMP_MESSAGE_EVENT(name)			\
+DEFINE_EVENT(dump_message_template, name,		\
+	TP_PROTO(enum applespi_evt_type evt_type,	\
+		 enum applespi_pkt_type pkt_type,	\
+		 u8 *buf,				\
+		 size_t len),				\
+	TP_ARGS(evt_type, pkt_type, buf, len)		\
+)
+
+DEFINE_DUMP_MESSAGE_EVENT(applespi_tp_ini_cmd);
+DEFINE_DUMP_MESSAGE_EVENT(applespi_backlight_cmd);
+DEFINE_DUMP_MESSAGE_EVENT(applespi_caps_lock_cmd);
+DEFINE_DUMP_MESSAGE_EVENT(applespi_keyboard_data);
+DEFINE_DUMP_MESSAGE_EVENT(applespi_touchpad_data);
+DEFINE_DUMP_MESSAGE_EVENT(applespi_unknown_data);
+DEFINE_DUMP_MESSAGE_EVENT(applespi_bad_crc);
+
+TRACE_EVENT(applespi_irq_received,
+	TP_PROTO(enum applespi_evt_type evt_type,
+		 enum applespi_pkt_type pkt_type),
+
+	TP_ARGS(evt_type, pkt_type),
+
+	TP_STRUCT__entry(
+		__field(enum applespi_evt_type, evt_type)
+		__field(enum applespi_pkt_type, pkt_type)
+	),
+
+	TP_fast_assign(
+		__entry->evt_type = evt_type;
+		__entry->pkt_type = pkt_type;
+	),
+
+	"\n"
+);
+
+#endif /* _APPLESPI_TRACE_H_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/input/keyboard
+#define TRACE_INCLUDE_FILE applespi_trace
+#include <trace/define_trace.h>
diff --git a/drivers/input/keyboard/mtk-pmic-keys.c b/drivers/input/keyboard/mtk-pmic-keys.c
index 746ff06eaf8d..62391d6c7da6 100644
--- a/drivers/input/keyboard/mtk-pmic-keys.c
+++ b/drivers/input/keyboard/mtk-pmic-keys.c
@@ -277,8 +277,10 @@ static int mtk_pmic_keys_probe(struct platform_device *pdev)
 		keys->keys[index].regs = &mtk_pmic_regs->keys_regs[index];
 
 		keys->keys[index].irq = platform_get_irq(pdev, index);
-		if (keys->keys[index].irq < 0)
+		if (keys->keys[index].irq < 0) {
+			of_node_put(child);
 			return keys->keys[index].irq;
+		}
 
 		error = of_property_read_u32(child,
 			"linux,keycodes", &keys->keys[index].keycode);
@@ -286,6 +288,7 @@ static int mtk_pmic_keys_probe(struct platform_device *pdev)
 			dev_err(keys->dev,
 				"failed to read key:%d linux,keycode property: %d\n",
 				index, error);
+			of_node_put(child);
 			return error;
 		}
 
@@ -293,8 +296,10 @@ static int mtk_pmic_keys_probe(struct platform_device *pdev)
 			keys->keys[index].wakeup = true;
 
 		error = mtk_pmic_key_setup(keys, &keys->keys[index]);
-		if (error)
+		if (error) {
+			of_node_put(child);
 			return error;
+		}
 
 		index++;
 	}
diff --git a/drivers/input/keyboard/sun4i-lradc-keys.c b/drivers/input/keyboard/sun4i-lradc-keys.c
index 6ffdc26b9c89..4a796bed48ac 100644
--- a/drivers/input/keyboard/sun4i-lradc-keys.c
+++ b/drivers/input/keyboard/sun4i-lradc-keys.c
@@ -198,18 +198,21 @@ static int sun4i_lradc_load_dt_keymap(struct device *dev,
 		error = of_property_read_u32(pp, "channel", &channel);
 		if (error || channel != 0) {
 			dev_err(dev, "%pOFn: Inval channel prop\n", pp);
+			of_node_put(pp);
 			return -EINVAL;
 		}
 
 		error = of_property_read_u32(pp, "voltage", &map->voltage);
 		if (error) {
 			dev_err(dev, "%pOFn: Inval voltage prop\n", pp);
+			of_node_put(pp);
 			return -EINVAL;
 		}
 
 		error = of_property_read_u32(pp, "linux,code", &map->keycode);
 		if (error) {
 			dev_err(dev, "%pOFn: Inval linux,code prop\n", pp);
+			of_node_put(pp);
 			return -EINVAL;
 		}
 
diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 8996323ce8d9..34700eda0429 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -21,6 +21,7 @@
 
 #include "psmouse.h"
 #include "alps.h"
+#include "trackpoint.h"
 
 /*
  * Definitions for ALPS version 3 and 4 command mode protocol
@@ -2861,6 +2862,23 @@ static const struct alps_protocol_info *alps_match_table(unsigned char *e7,
 	return NULL;
 }
 
+static bool alps_is_cs19_trackpoint(struct psmouse *psmouse)
+{
+	u8 param[2] = { 0 };
+
+	if (ps2_command(&psmouse->ps2dev,
+			param, MAKE_PS2_CMD(0, 2, TP_READ_ID)))
+		return false;
+
+	/*
+	 * param[0] contains the trackpoint device variant_id while
+	 * param[1] contains the firmware_id. So far all alps
+	 * trackpoint-only devices have their variant_ids equal
+	 * TP_VARIANT_ALPS and their firmware_ids are in 0x20~0x2f range.
+	 */
+	return param[0] == TP_VARIANT_ALPS && ((param[1] & 0xf0) == 0x20);
+}
+
 static int alps_identify(struct psmouse *psmouse, struct alps_data *priv)
 {
 	const struct alps_protocol_info *protocol;
@@ -3162,6 +3180,20 @@ int alps_detect(struct psmouse *psmouse, bool set_properties)
 		return error;
 
 	/*
+	 * ALPS cs19 is a trackpoint-only device, and uses different
+	 * protocol than DualPoint ones, so we return -EINVAL here and let
+	 * trackpoint.c drive this device. If the trackpoint driver is not
+	 * enabled, the device will fall back to a bare PS/2 mouse.
+	 * If ps2_command() fails here, we depend on the immediately
+	 * followed psmouse_reset() to reset the device to normal state.
+	 */
+	if (alps_is_cs19_trackpoint(psmouse)) {
+		psmouse_dbg(psmouse,
+			    "ALPS CS19 trackpoint-only device detected, ignoring\n");
+		return -EINVAL;
+	}
+
+	/*
 	 * Reset the device to make sure it is fully operational:
 	 * on some laptops, like certain Dell Latitudes, we may
 	 * fail to properly detect presence of trackstick if device
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index 1080c0c49815..b1956ed4c0dd 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -176,6 +176,7 @@ static const char * const smbus_pnp_ids[] = {
 	"LEN0093", /* T480 */
 	"LEN0096", /* X280 */
 	"LEN0097", /* X280 -> ALPS trackpoint */
+	"LEN009b", /* T580 */
 	"LEN200f", /* T450s */
 	"LEN2054", /* E480 */
 	"LEN2055", /* E580 */
@@ -705,7 +706,7 @@ static void synaptics_pt_create(struct psmouse *psmouse)
 
 	serio->id.type = SERIO_PS_PSTHRU;
 	strlcpy(serio->name, "Synaptics pass-through", sizeof(serio->name));
-	strlcpy(serio->phys, "synaptics-pt/serio0", sizeof(serio->name));
+	strlcpy(serio->phys, "synaptics-pt/serio0", sizeof(serio->phys));
 	serio->write = synaptics_pt_write;
 	serio->start = synaptics_pt_start;
 	serio->stop = synaptics_pt_stop;
diff --git a/drivers/input/mouse/trackpoint.h b/drivers/input/mouse/trackpoint.h
index 0afffe8d824f..77110f3ec21d 100644
--- a/drivers/input/mouse/trackpoint.h
+++ b/drivers/input/mouse/trackpoint.h
@@ -158,7 +158,8 @@ struct trackpoint_data {
 #ifdef CONFIG_MOUSE_PS2_TRACKPOINT
 int trackpoint_detect(struct psmouse *psmouse, bool set_properties);
 #else
-inline int trackpoint_detect(struct psmouse *psmouse, bool set_properties)
+static inline int trackpoint_detect(struct psmouse *psmouse,
+				    bool set_properties)
 {
 	return -ENOSYS;
 }
diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c
index 8e457e50f837..88ae7c2ac3c8 100644
--- a/drivers/input/serio/hyperv-keyboard.c
+++ b/drivers/input/serio/hyperv-keyboard.c
@@ -75,8 +75,8 @@ struct synth_kbd_keystroke {
 
 #define HK_MAXIMUM_MESSAGE_SIZE 256
 
-#define KBD_VSC_SEND_RING_BUFFER_SIZE		(10 * PAGE_SIZE)
-#define KBD_VSC_RECV_RING_BUFFER_SIZE		(10 * PAGE_SIZE)
+#define KBD_VSC_SEND_RING_BUFFER_SIZE		(40 * 1024)
+#define KBD_VSC_RECV_RING_BUFFER_SIZE		(40 * 1024)
 
 #define XTKBD_EMUL0     0xe0
 #define XTKBD_EMUL1     0xe1
diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c
index 4b8b9d7aa75e..35031228a6d0 100644
--- a/drivers/input/tablet/gtco.c
+++ b/drivers/input/tablet/gtco.c
@@ -78,6 +78,7 @@ Scott Hill shill@gtcocalcomp.com
 
 /* Max size of a single report */
 #define REPORT_MAX_SIZE       10
+#define MAX_COLLECTION_LEVELS  10
 
 
 /* Bitmask whether pen is in range */
@@ -223,8 +224,7 @@ static void parse_hid_report_descriptor(struct gtco *device, char * report,
 	char  maintype = 'x';
 	char  globtype[12];
 	int   indent = 0;
-	char  indentstr[10] = "";
-
+	char  indentstr[MAX_COLLECTION_LEVELS + 1] = { 0 };
 
 	dev_dbg(ddev, "======>>>>>>PARSE<<<<<<======\n");
 
@@ -350,6 +350,13 @@ static void parse_hid_report_descriptor(struct gtco *device, char * report,
 			case TAG_MAIN_COL_START:
 				maintype = 'S';
 
+				if (indent == MAX_COLLECTION_LEVELS) {
+					dev_err(ddev, "Collection level %d would exceed limit of %d\n",
+						indent + 1,
+						MAX_COLLECTION_LEVELS);
+					break;
+				}
+
 				if (data == 0) {
 					dev_dbg(ddev, "======>>>>>> Physical\n");
 					strcpy(globtype, "Physical");
@@ -369,8 +376,15 @@ static void parse_hid_report_descriptor(struct gtco *device, char * report,
 				break;
 
 			case TAG_MAIN_COL_END:
-				dev_dbg(ddev, "<<<<<<======\n");
 				maintype = 'E';
+
+				if (indent == 0) {
+					dev_err(ddev, "Collection level already at zero\n");
+					break;
+				}
+
+				dev_dbg(ddev, "<<<<<<======\n");
+
 				indent--;
 				for (x = 0; x < indent; x++)
 					indentstr[x] = '-';
diff --git a/drivers/input/touchscreen/auo-pixcir-ts.c b/drivers/input/touchscreen/auo-pixcir-ts.c
index 8e48fbda487a..8e9f3b7b8180 100644
--- a/drivers/input/touchscreen/auo-pixcir-ts.c
+++ b/drivers/input/touchscreen/auo-pixcir-ts.c
@@ -602,9 +602,8 @@ static int auo_pixcir_probe(struct i2c_client *client,
 		return error;
 	}
 
-	error = devm_add_action(&client->dev, auo_pixcir_reset, ts);
+	error = devm_add_action_or_reset(&client->dev, auo_pixcir_reset, ts);
 	if (error) {
-		auo_pixcir_reset(ts);
 		dev_err(&client->dev, "failed to register reset action, %d\n",
 			error);
 		return error;
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 73740b969e62..b607a92791d3 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2533,7 +2533,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
 	npages = sg_num_pages(dev, sglist, nelems);
 
 	address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask);
-	if (address == DMA_MAPPING_ERROR)
+	if (!address)
 		goto out_err;
 
 	prot = dir2prot(direction);
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index eb104c719629..4413aa67000e 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -23,6 +23,8 @@
 #include <linux/mem_encrypt.h>
 #include <asm/pci-direct.h>
 #include <asm/iommu.h>
+#include <asm/apic.h>
+#include <asm/msidef.h>
 #include <asm/gart.h>
 #include <asm/x86_init.h>
 #include <asm/iommu_table.h>
@@ -1920,6 +1922,90 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
 	return 0;
 }
 
+#define XT_INT_DEST_MODE(x)	(((x) & 0x1ULL) << 2)
+#define XT_INT_DEST_LO(x)	(((x) & 0xFFFFFFULL) << 8)
+#define XT_INT_VEC(x)		(((x) & 0xFFULL) << 32)
+#define XT_INT_DEST_HI(x)	((((x) >> 24) & 0xFFULL) << 56)
+
+/**
+ * Setup the IntCapXT registers with interrupt routing information
+ * based on the PCI MSI capability block registers, accessed via
+ * MMIO MSI address low/hi and MSI data registers.
+ */
+static void iommu_update_intcapxt(struct amd_iommu *iommu)
+{
+	u64 val;
+	u32 addr_lo = readl(iommu->mmio_base + MMIO_MSI_ADDR_LO_OFFSET);
+	u32 addr_hi = readl(iommu->mmio_base + MMIO_MSI_ADDR_HI_OFFSET);
+	u32 data    = readl(iommu->mmio_base + MMIO_MSI_DATA_OFFSET);
+	bool dm     = (addr_lo >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
+	u32 dest    = ((addr_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xFF);
+
+	if (x2apic_enabled())
+		dest |= MSI_ADDR_EXT_DEST_ID(addr_hi);
+
+	val = XT_INT_VEC(data & 0xFF) |
+	      XT_INT_DEST_MODE(dm) |
+	      XT_INT_DEST_LO(dest) |
+	      XT_INT_DEST_HI(dest);
+
+	/**
+	 * Current IOMMU implemtation uses the same IRQ for all
+	 * 3 IOMMU interrupts.
+	 */
+	writeq(val, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
+	writeq(val, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
+	writeq(val, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
+}
+
+static void _irq_notifier_notify(struct irq_affinity_notify *notify,
+				 const cpumask_t *mask)
+{
+	struct amd_iommu *iommu;
+
+	for_each_iommu(iommu) {
+		if (iommu->dev->irq == notify->irq) {
+			iommu_update_intcapxt(iommu);
+			break;
+		}
+	}
+}
+
+static void _irq_notifier_release(struct kref *ref)
+{
+}
+
+static int iommu_init_intcapxt(struct amd_iommu *iommu)
+{
+	int ret;
+	struct irq_affinity_notify *notify = &iommu->intcapxt_notify;
+
+	/**
+	 * IntCapXT requires XTSup=1, which can be inferred
+	 * amd_iommu_xt_mode.
+	 */
+	if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE)
+		return 0;
+
+	/**
+	 * Also, we need to setup notifier to update the IntCapXT registers
+	 * whenever the irq affinity is changed from user-space.
+	 */
+	notify->irq = iommu->dev->irq;
+	notify->notify = _irq_notifier_notify,
+	notify->release = _irq_notifier_release,
+	ret = irq_set_affinity_notifier(iommu->dev->irq, notify);
+	if (ret) {
+		pr_err("Failed to register irq affinity notifier (devid=%#x, irq %d)\n",
+		       iommu->devid, iommu->dev->irq);
+		return ret;
+	}
+
+	iommu_update_intcapxt(iommu);
+	iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
+	return ret;
+}
+
 static int iommu_init_msi(struct amd_iommu *iommu)
 {
 	int ret;
@@ -1936,6 +2022,10 @@ static int iommu_init_msi(struct amd_iommu *iommu)
 		return ret;
 
 enable_faults:
+	ret = iommu_init_intcapxt(iommu);
+	if (ret)
+		return ret;
+
 	iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
 
 	if (iommu->ppr_log != NULL)
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 52c35d557fad..64edd5a9694c 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -60,6 +60,12 @@
 #define MMIO_PPR_LOG_OFFSET	0x0038
 #define MMIO_GA_LOG_BASE_OFFSET	0x00e0
 #define MMIO_GA_LOG_TAIL_OFFSET	0x00e8
+#define MMIO_MSI_ADDR_LO_OFFSET	0x015C
+#define MMIO_MSI_ADDR_HI_OFFSET	0x0160
+#define MMIO_MSI_DATA_OFFSET	0x0164
+#define MMIO_INTCAPXT_EVT_OFFSET	0x0170
+#define MMIO_INTCAPXT_PPR_OFFSET	0x0178
+#define MMIO_INTCAPXT_GALOG_OFFSET	0x0180
 #define MMIO_CMD_HEAD_OFFSET	0x2000
 #define MMIO_CMD_TAIL_OFFSET	0x2008
 #define MMIO_EVT_HEAD_OFFSET	0x2010
@@ -150,6 +156,7 @@
 #define CONTROL_GALOG_EN        0x1CULL
 #define CONTROL_GAINT_EN        0x1DULL
 #define CONTROL_XT_EN           0x32ULL
+#define CONTROL_INTCAPXT_EN     0x33ULL
 
 #define CTRL_INV_TO_MASK	(7 << CONTROL_INV_TIMEOUT)
 #define CTRL_INV_TO_NONE	0
@@ -592,6 +599,8 @@ struct amd_iommu {
 	/* DebugFS Info */
 	struct dentry *debugfs;
 #endif
+	/* IRQ notifier for IntCapXT interrupt */
+	struct irq_affinity_notify intcapxt_notify;
 };
 
 static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev)
diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c
index 73a552914455..2b25d9c59336 100644
--- a/drivers/iommu/intel-iommu-debugfs.c
+++ b/drivers/iommu/intel-iommu-debugfs.c
@@ -162,9 +162,9 @@ static inline void print_tbl_walk(struct seq_file *m)
 			   (u64)0, (u64)0, (u64)0);
 	else
 		seq_printf(m, "%-6d\t0x%016llx:0x%016llx:0x%016llx\n",
-			   tbl_wlk->pasid, tbl_wlk->pasid_tbl_entry->val[0],
+			   tbl_wlk->pasid, tbl_wlk->pasid_tbl_entry->val[2],
 			   tbl_wlk->pasid_tbl_entry->val[1],
-			   tbl_wlk->pasid_tbl_entry->val[2]);
+			   tbl_wlk->pasid_tbl_entry->val[0]);
 }
 
 static void pasid_tbl_walk(struct seq_file *m, struct pasid_entry *tbl_entry,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index ac4172c02244..bdaed2da8a55 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -339,8 +339,6 @@ static void domain_exit(struct dmar_domain *domain);
 static void domain_remove_dev_info(struct dmar_domain *domain);
 static void dmar_remove_one_dev_info(struct device *dev);
 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
-static void domain_context_clear(struct intel_iommu *iommu,
-				 struct device *dev);
 static int domain_detach_iommu(struct dmar_domain *domain,
 			       struct intel_iommu *iommu);
 static bool device_is_rmrr_locked(struct device *dev);
@@ -1833,9 +1831,65 @@ static inline int guestwidth_to_adjustwidth(int gaw)
 	return agaw;
 }
 
+static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
+		       int guest_width)
+{
+	int adjust_width, agaw;
+	unsigned long sagaw;
+	int err;
+
+	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
+
+	err = init_iova_flush_queue(&domain->iovad,
+				    iommu_flush_iova, iova_entry_free);
+	if (err)
+		return err;
+
+	domain_reserve_special_ranges(domain);
+
+	/* calculate AGAW */
+	if (guest_width > cap_mgaw(iommu->cap))
+		guest_width = cap_mgaw(iommu->cap);
+	domain->gaw = guest_width;
+	adjust_width = guestwidth_to_adjustwidth(guest_width);
+	agaw = width_to_agaw(adjust_width);
+	sagaw = cap_sagaw(iommu->cap);
+	if (!test_bit(agaw, &sagaw)) {
+		/* hardware doesn't support it, choose a bigger one */
+		pr_debug("Hardware doesn't support agaw %d\n", agaw);
+		agaw = find_next_bit(&sagaw, 5, agaw);
+		if (agaw >= 5)
+			return -ENODEV;
+	}
+	domain->agaw = agaw;
+
+	if (ecap_coherent(iommu->ecap))
+		domain->iommu_coherency = 1;
+	else
+		domain->iommu_coherency = 0;
+
+	if (ecap_sc_support(iommu->ecap))
+		domain->iommu_snooping = 1;
+	else
+		domain->iommu_snooping = 0;
+
+	if (intel_iommu_superpage)
+		domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
+	else
+		domain->iommu_superpage = 0;
+
+	domain->nid = iommu->node;
+
+	/* always allocate the top pgd */
+	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
+	if (!domain->pgd)
+		return -ENOMEM;
+	__iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
+	return 0;
+}
+
 static void domain_exit(struct dmar_domain *domain)
 {
-	struct page *freelist;
 
 	/* Remove associated devices and clear attached or cached domains */
 	domain_remove_dev_info(domain);
@@ -1843,9 +1897,12 @@ static void domain_exit(struct dmar_domain *domain)
 	/* destroy iovas */
 	put_iova_domain(&domain->iovad);
 
-	freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
+	if (domain->pgd) {
+		struct page *freelist;
 
-	dma_free_pagelist(freelist);
+		freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
+		dma_free_pagelist(freelist);
+	}
 
 	free_domain_mem(domain);
 }
@@ -2048,26 +2105,9 @@ out_unlock:
 	return ret;
 }
 
-struct domain_context_mapping_data {
-	struct dmar_domain *domain;
-	struct intel_iommu *iommu;
-	struct pasid_table *table;
-};
-
-static int domain_context_mapping_cb(struct pci_dev *pdev,
-				     u16 alias, void *opaque)
-{
-	struct domain_context_mapping_data *data = opaque;
-
-	return domain_context_mapping_one(data->domain, data->iommu,
-					  data->table, PCI_BUS_NUM(alias),
-					  alias & 0xff);
-}
-
 static int
 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
 {
-	struct domain_context_mapping_data data;
 	struct pasid_table *table;
 	struct intel_iommu *iommu;
 	u8 bus, devfn;
@@ -2077,17 +2117,7 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev)
 		return -ENODEV;
 
 	table = intel_pasid_get_table(dev);
-
-	if (!dev_is_pci(dev))
-		return domain_context_mapping_one(domain, iommu, table,
-						  bus, devfn);
-
-	data.domain = domain;
-	data.iommu = iommu;
-	data.table = table;
-
-	return pci_for_each_dma_alias(to_pci_dev(dev),
-				      &domain_context_mapping_cb, &data);
+	return domain_context_mapping_one(domain, iommu, table, bus, devfn);
 }
 
 static int domain_context_mapped_cb(struct pci_dev *pdev,
@@ -2513,31 +2543,6 @@ static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
 	return 0;
 }
 
-static int domain_init(struct dmar_domain *domain, int guest_width)
-{
-	int adjust_width;
-
-	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
-	domain_reserve_special_ranges(domain);
-
-	/* calculate AGAW */
-	domain->gaw = guest_width;
-	adjust_width = guestwidth_to_adjustwidth(guest_width);
-	domain->agaw = width_to_agaw(adjust_width);
-
-	domain->iommu_coherency = 0;
-	domain->iommu_snooping = 0;
-	domain->iommu_superpage = 0;
-	domain->max_addr = 0;
-
-	/* always allocate the top pgd */
-	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
-	if (!domain->pgd)
-		return -ENOMEM;
-	domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
-	return 0;
-}
-
 static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
 {
 	struct device_domain_info *info;
@@ -2575,19 +2580,11 @@ static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
 	domain = alloc_domain(0);
 	if (!domain)
 		return NULL;
-
-	if (domain_init(domain, gaw)) {
+	if (domain_init(domain, iommu, gaw)) {
 		domain_exit(domain);
 		return NULL;
 	}
 
-	if (init_iova_flush_queue(&domain->iovad,
-				  iommu_flush_iova,
-				  iova_entry_free)) {
-		pr_warn("iova flush queue initialization failed\n");
-		intel_iommu_strict = 1;
-	}
-
 out:
 	return domain;
 }
@@ -2692,6 +2689,8 @@ static int domain_prepare_identity_map(struct device *dev,
 	return iommu_domain_identity_map(domain, start, end);
 }
 
+static int md_domain_init(struct dmar_domain *domain, int guest_width);
+
 static int __init si_domain_init(int hw)
 {
 	struct dmar_rmrr_unit *rmrr;
@@ -2702,7 +2701,7 @@ static int __init si_domain_init(int hw)
 	if (!si_domain)
 		return -EFAULT;
 
-	if (domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+	if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
 		domain_exit(si_domain);
 		return -EFAULT;
 	}
@@ -3564,7 +3563,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
 
 	freelist = domain_unmap(domain, start_pfn, last_pfn);
 
-	if (intel_iommu_strict || (pdev && pdev->untrusted)) {
+	if (intel_iommu_strict || (pdev && pdev->untrusted) ||
+			!has_iova_flush_queue(&domain->iovad)) {
 		iommu_flush_iotlb_psi(iommu, domain, start_pfn,
 				      nrpages, !freelist, 0);
 		/* free iova */
@@ -4758,28 +4758,6 @@ out_free_dmar:
 	return ret;
 }
 
-static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
-{
-	struct intel_iommu *iommu = opaque;
-
-	domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
-	return 0;
-}
-
-/*
- * NB - intel-iommu lacks any sort of reference counting for the users of
- * dependent devices.  If multiple endpoints have intersecting dependent
- * devices, unbinding the driver from any one of them will possibly leave
- * the others unable to operate.
- */
-static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
-{
-	if (!iommu || !dev || !dev_is_pci(dev))
-		return;
-
-	pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
-}
-
 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
 {
 	struct dmar_domain *domain;
@@ -4800,7 +4778,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info)
 					PASID_RID2PASID);
 
 		iommu_disable_dev_iotlb(info);
-		domain_context_clear(iommu, info->dev);
+		domain_context_clear_one(iommu, info->bus, info->devfn);
 		intel_pasid_free_table(info->dev);
 	}
 
@@ -4829,6 +4807,31 @@ static void dmar_remove_one_dev_info(struct device *dev)
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 }
 
+static int md_domain_init(struct dmar_domain *domain, int guest_width)
+{
+	int adjust_width;
+
+	init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
+	domain_reserve_special_ranges(domain);
+
+	/* calculate AGAW */
+	domain->gaw = guest_width;
+	adjust_width = guestwidth_to_adjustwidth(guest_width);
+	domain->agaw = width_to_agaw(adjust_width);
+
+	domain->iommu_coherency = 0;
+	domain->iommu_snooping = 0;
+	domain->iommu_superpage = 0;
+	domain->max_addr = 0;
+
+	/* always allocate the top pgd */
+	domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
+	if (!domain->pgd)
+		return -ENOMEM;
+	domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
+	return 0;
+}
+
 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
 {
 	struct dmar_domain *dmar_domain;
@@ -4843,7 +4846,7 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
 			pr_err("Can't allocate dmar_domain\n");
 			return NULL;
 		}
-		if (domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+		if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
 			pr_err("Domain initialization failed\n");
 			domain_exit(dmar_domain);
 			return NULL;
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index d499b2621239..3e1a8a675572 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -54,9 +54,14 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 }
 EXPORT_SYMBOL_GPL(init_iova_domain);
 
+bool has_iova_flush_queue(struct iova_domain *iovad)
+{
+	return !!iovad->fq;
+}
+
 static void free_iova_flush_queue(struct iova_domain *iovad)
 {
-	if (!iovad->fq)
+	if (!has_iova_flush_queue(iovad))
 		return;
 
 	if (timer_pending(&iovad->fq_timer))
@@ -74,13 +79,14 @@ static void free_iova_flush_queue(struct iova_domain *iovad)
 int init_iova_flush_queue(struct iova_domain *iovad,
 			  iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
 {
+	struct iova_fq __percpu *queue;
 	int cpu;
 
 	atomic64_set(&iovad->fq_flush_start_cnt,  0);
 	atomic64_set(&iovad->fq_flush_finish_cnt, 0);
 
-	iovad->fq = alloc_percpu(struct iova_fq);
-	if (!iovad->fq)
+	queue = alloc_percpu(struct iova_fq);
+	if (!queue)
 		return -ENOMEM;
 
 	iovad->flush_cb   = flush_cb;
@@ -89,13 +95,17 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 	for_each_possible_cpu(cpu) {
 		struct iova_fq *fq;
 
-		fq = per_cpu_ptr(iovad->fq, cpu);
+		fq = per_cpu_ptr(queue, cpu);
 		fq->head = 0;
 		fq->tail = 0;
 
 		spin_lock_init(&fq->lock);
 	}
 
+	smp_wmb();
+
+	iovad->fq = queue;
+
 	timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
 	atomic_set(&iovad->fq_timer_on, 0);
 
@@ -127,8 +137,9 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
 	struct iova *cached_iova;
 
 	cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
-	if (free->pfn_hi < iovad->dma_32bit_pfn &&
-	    free->pfn_lo >= cached_iova->pfn_lo) {
+	if (free == cached_iova ||
+	    (free->pfn_hi < iovad->dma_32bit_pfn &&
+	     free->pfn_lo >= cached_iova->pfn_lo)) {
 		iovad->cached32_node = rb_next(&free->node);
 		iovad->max32_alloc_size = iovad->dma_32bit_pfn;
 	}
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 26e374fbf57c..20ed838e9413 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -931,6 +931,9 @@ int bch_cached_dev_run(struct cached_dev *dc)
 	if (dc->io_disable) {
 		pr_err("I/O disabled on cached dev %s",
 		       dc->backing_dev_name);
+		kfree(env[1]);
+		kfree(env[2]);
+		kfree(buf);
 		return -EIO;
 	}
 
diff --git a/drivers/media/v4l2-core/v4l2-subdev.c b/drivers/media/v4l2-core/v4l2-subdev.c
index 21fb90d66bfc..25c73c13cc7e 100644
--- a/drivers/media/v4l2-core/v4l2-subdev.c
+++ b/drivers/media/v4l2-core/v4l2-subdev.c
@@ -124,7 +124,7 @@ static inline int check_which(__u32 which)
 static inline int check_pad(struct v4l2_subdev *sd, __u32 pad)
 {
 #if defined(CONFIG_MEDIA_CONTROLLER)
-	if (sd->entity.graph_obj.mdev) {
+	if (sd->entity.num_pads) {
 		if (pad >= sd->entity.num_pads)
 			return -EINVAL;
 		return 0;
diff --git a/drivers/memory/.gitignore b/drivers/memory/.gitignore
new file mode 100644
index 000000000000..cbca8b028437
--- /dev/null
+++ b/drivers/memory/.gitignore
@@ -0,0 +1 @@
+ti-emif-asm-offsets.h
diff --git a/drivers/memory/Makefile b/drivers/memory/Makefile
index 9d5c409a1591..27b493435e61 100644
--- a/drivers/memory/Makefile
+++ b/drivers/memory/Makefile
@@ -29,9 +29,10 @@ ti-emif-sram-objs		:= ti-emif-pm.o ti-emif-sram-pm.o
 
 AFLAGS_ti-emif-sram-pm.o	:=-Wa,-march=armv7-a
 
-drivers/memory/ti-emif-sram-pm.o: include/generated/ti-emif-asm-offsets.h
+$(obj)/ti-emif-sram-pm.o: $(obj)/ti-emif-asm-offsets.h
 
-include/generated/ti-emif-asm-offsets.h: drivers/memory/emif-asm-offsets.s FORCE
+$(obj)/ti-emif-asm-offsets.h: $(obj)/emif-asm-offsets.s FORCE
 	$(call filechk,offsets,__TI_EMIF_ASM_OFFSETS_H__)
 
 targets += emif-asm-offsets.s
+clean-files += ti-emif-asm-offsets.h
diff --git a/drivers/memory/ti-emif-sram-pm.S b/drivers/memory/ti-emif-sram-pm.S
index d75ae18efa7d..d1c83bd5b98e 100644
--- a/drivers/memory/ti-emif-sram-pm.S
+++ b/drivers/memory/ti-emif-sram-pm.S
@@ -14,12 +14,12 @@
  * GNU General Public License for more details.
  */
 
-#include <generated/ti-emif-asm-offsets.h>
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <asm/memory.h>
 
 #include "emif.h"
+#include "ti-emif-asm-offsets.h"
 
 #define EMIF_POWER_MGMT_WAIT_SELF_REFRESH_8192_CYCLES	0x00a0
 #define EMIF_POWER_MGMT_SR_TIMER_MASK			0x00f0
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 656ed80647f0..e2be5a685130 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -285,6 +285,9 @@ int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata)
 	hw_cons = le16_to_cpu(*txdata->tx_cons_sb);
 	sw_cons = txdata->tx_pkt_cons;
 
+	/* Ensure subsequent loads occur after hw_cons */
+	smp_rmb();
+
 	while (sw_cons != hw_cons) {
 		u16 pkt_cons;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb/my3126.c b/drivers/net/ethernet/chelsio/cxgb/my3126.c
index 20c09cc4b323..60aa45b375b6 100644
--- a/drivers/net/ethernet/chelsio/cxgb/my3126.c
+++ b/drivers/net/ethernet/chelsio/cxgb/my3126.c
@@ -94,7 +94,7 @@ static int my3126_interrupt_handler(struct cphy *cphy)
 	return cphy_cause_link_change;
 }
 
-static void my3216_poll(struct work_struct *work)
+static void my3126_poll(struct work_struct *work)
 {
 	struct cphy *cphy = container_of(work, struct cphy, phy_update.work);
 
@@ -177,7 +177,7 @@ static struct cphy *my3126_phy_create(struct net_device *dev,
 		return NULL;
 
 	cphy_init(cphy, dev, phy_addr, &my3126_ops, mdio_ops);
-	INIT_DELAYED_WORK(&cphy->phy_update, my3216_poll);
+	INIT_DELAYED_WORK(&cphy->phy_update, my3126_poll);
 	cphy->bmsr = 0;
 
 	return cphy;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 67202b6f352e..4311ad9c84b2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -5561,7 +5561,6 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
 		char name[IFNAMSIZ];
 		u32 devcap2;
 		u16 flags;
-		int pos;
 
 		/* If we want to instantiate Virtual Functions, then our
 		 * parent bridge's PCI-E needs to support Alternative Routing
@@ -5569,9 +5568,8 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
 		 * and above.
 		 */
 		pbridge = pdev->bus->self;
-		pos = pci_find_capability(pbridge, PCI_CAP_ID_EXP);
-		pci_read_config_word(pbridge, pos + PCI_EXP_FLAGS, &flags);
-		pci_read_config_dword(pbridge, pos + PCI_EXP_DEVCAP2, &devcap2);
+		pcie_capability_read_word(pbridge, PCI_EXP_FLAGS, &flags);
+		pcie_capability_read_dword(pbridge, PCI_EXP_DEVCAP2, &devcap2);
 
 		if ((flags & PCI_EXP_FLAGS_VERS) < 2 ||
 		    !(devcap2 & PCI_EXP_DEVCAP2_ARI)) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index 312599c6b35a..e447976bdd3e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -67,7 +67,8 @@ static struct ch_tc_pedit_fields pedits[] = {
 static struct ch_tc_flower_entry *allocate_flower_entry(void)
 {
 	struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL);
-	spin_lock_init(&new->lock);
+	if (new)
+		spin_lock_init(&new->lock);
 	return new;
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 9dd5ed9a2965..f7fc553356f2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -7309,7 +7309,6 @@ int t4_fixup_host_params(struct adapter *adap, unsigned int page_size,
 	} else {
 		unsigned int pack_align;
 		unsigned int ingpad, ingpack;
-		unsigned int pcie_cap;
 
 		/* T5 introduced the separation of the Free List Padding and
 		 * Packing Boundaries.  Thus, we can select a smaller Padding
@@ -7334,8 +7333,7 @@ int t4_fixup_host_params(struct adapter *adap, unsigned int page_size,
 		 * multiple of the Maximum Payload Size.
 		 */
 		pack_align = fl_align;
-		pcie_cap = pci_find_capability(adap->pdev, PCI_CAP_ID_EXP);
-		if (pcie_cap) {
+		if (pci_is_pcie(adap->pdev)) {
 			unsigned int mps, mps_log;
 			u16 devctl;
 
@@ -7343,9 +7341,8 @@ int t4_fixup_host_params(struct adapter *adap, unsigned int page_size,
 			 * [bits 7:5] encodes sizes as powers of 2 starting at
 			 * 128 bytes.
 			 */
-			pci_read_config_word(adap->pdev,
-					     pcie_cap + PCI_EXP_DEVCTL,
-					     &devctl);
+			pcie_capability_read_word(adap->pdev, PCI_EXP_DEVCTL,
+						  &devctl);
 			mps_log = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5) + 7;
 			mps = 1 << mps_log;
 			if (mps > pack_align)
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index b7a246b33599..2edb86ec9fe9 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4698,8 +4698,13 @@ int be_update_queues(struct be_adapter *adapter)
 	int status;
 
 	if (netif_running(netdev)) {
+		/* be_tx_timeout() must not run concurrently with this
+		 * function, synchronize with an already-running dev_watchdog
+		 */
+		netif_tx_lock_bh(netdev);
 		/* device cannot transmit now, avoid dev_watchdog timeouts */
 		netif_carrier_off(netdev);
+		netif_tx_unlock_bh(netdev);
 
 		be_close(netdev);
 	}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index 8ad5292eebbe..75329ab775a6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -43,7 +43,7 @@ enum HCLGE_MBX_OPCODE {
 	HCLGE_MBX_GET_QID_IN_PF,	/* (VF -> PF) get queue id in pf */
 	HCLGE_MBX_LINK_STAT_MODE,	/* (PF -> VF) link mode has changed */
 	HCLGE_MBX_GET_LINK_MODE,	/* (VF -> PF) get the link mode of pf */
-	HLCGE_MBX_PUSH_VLAN_INFO,	/* (PF -> VF) push port base vlan */
+	HCLGE_MBX_PUSH_VLAN_INFO,	/* (PF -> VF) push port base vlan */
 	HCLGE_MBX_GET_MEDIA_TYPE,       /* (VF -> PF) get media type */
 
 	HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf reset status */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index a38ac7cfe16b..690b9990215c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -304,7 +304,7 @@ int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid,
 	memcpy(&msg_data[6], &vlan_tag, sizeof(u16));
 
 	return hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data),
-				  HLCGE_MBX_PUSH_VLAN_INFO, vfid);
+				  HCLGE_MBX_PUSH_VLAN_INFO, vfid);
 }
 
 static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index f60b80bd605e..6a96987bd8f0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -204,7 +204,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 		case HCLGE_MBX_LINK_STAT_CHANGE:
 		case HCLGE_MBX_ASSERTING_RESET:
 		case HCLGE_MBX_LINK_STAT_MODE:
-		case HLCGE_MBX_PUSH_VLAN_INFO:
+		case HCLGE_MBX_PUSH_VLAN_INFO:
 			/* set this mbx event as pending. This is required as we
 			 * might loose interrupt event when mbx task is busy
 			 * handling. This shall be cleared when mbx task just
@@ -307,7 +307,7 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev)
 			hclgevf_reset_task_schedule(hdev);
 
 			break;
-		case HLCGE_MBX_PUSH_VLAN_INFO:
+		case HCLGE_MBX_PUSH_VLAN_INFO:
 			state = le16_to_cpu(msg_q[1]);
 			vlan_info = &msg_q[1];
 			hclgevf_update_port_base_vlan_info(hdev, state,
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 93f3b4e6185b..aa9323e55406 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -3912,13 +3912,11 @@ void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
 s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
 {
 	struct igc_adapter *adapter = hw->back;
-	u16 cap_offset;
 
-	cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
-	if (!cap_offset)
+	if (!pci_is_pcie(adapter->pdev))
 		return -IGC_ERR_CONFIG;
 
-	pci_read_config_word(adapter->pdev, cap_offset + reg, value);
+	pcie_capability_read_word(adapter->pdev, reg, value);
 
 	return IGC_SUCCESS;
 }
@@ -3926,13 +3924,11 @@ s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
 s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
 {
 	struct igc_adapter *adapter = hw->back;
-	u16 cap_offset;
 
-	cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
-	if (!cap_offset)
+	if (!pci_is_pcie(adapter->pdev))
 		return -IGC_ERR_CONFIG;
 
-	pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
+	pcie_capability_write_word(adapter->pdev, reg, *value);
 
 	return IGC_SUCCESS;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 7245d287633d..7f747cb1a4f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -735,8 +735,7 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev,
 		list_add(&indr_priv->list,
 			 &rpriv->uplink_priv.tc_indr_block_priv_list);
 
-		block_cb = flow_block_cb_alloc(f->net,
-					       mlx5e_rep_indr_setup_block_cb,
+		block_cb = flow_block_cb_alloc(mlx5e_rep_indr_setup_block_cb,
 					       indr_priv, indr_priv,
 					       mlx5e_rep_indr_tc_block_unbind);
 		if (IS_ERR(block_cb)) {
@@ -753,7 +752,7 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev,
 		if (!indr_priv)
 			return -ENOENT;
 
-		block_cb = flow_block_cb_lookup(f,
+		block_cb = flow_block_cb_lookup(f->block,
 						mlx5e_rep_indr_setup_block_cb,
 						indr_priv);
 		if (!block_cb)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 4d34d42b3b0e..650638152bbc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1604,14 +1604,14 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
 	bool register_block = false;
 	int err;
 
-	block_cb = flow_block_cb_lookup(f, mlxsw_sp_setup_tc_block_cb_flower,
+	block_cb = flow_block_cb_lookup(f->block,
+					mlxsw_sp_setup_tc_block_cb_flower,
 					mlxsw_sp);
 	if (!block_cb) {
 		acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, f->net);
 		if (!acl_block)
 			return -ENOMEM;
-		block_cb = flow_block_cb_alloc(f->net,
-					       mlxsw_sp_setup_tc_block_cb_flower,
+		block_cb = flow_block_cb_alloc(mlxsw_sp_setup_tc_block_cb_flower,
 					       mlxsw_sp, acl_block,
 					       mlxsw_sp_tc_block_flower_release);
 		if (IS_ERR(block_cb)) {
@@ -1657,7 +1657,8 @@ mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
 	struct flow_block_cb *block_cb;
 	int err;
 
-	block_cb = flow_block_cb_lookup(f, mlxsw_sp_setup_tc_block_cb_flower,
+	block_cb = flow_block_cb_lookup(f->block,
+					mlxsw_sp_setup_tc_block_cb_flower,
 					mlxsw_sp);
 	if (!block_cb)
 		return;
@@ -1680,7 +1681,7 @@ static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
 				   struct flow_block_offload *f)
 {
 	struct flow_block_cb *block_cb;
-	tc_setup_cb_t *cb;
+	flow_setup_cb_t *cb;
 	bool ingress;
 	int err;
 
@@ -1702,7 +1703,7 @@ static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
 					  &mlxsw_sp_block_cb_list))
 			return -EBUSY;
 
-		block_cb = flow_block_cb_alloc(f->net, cb, mlxsw_sp_port,
+		block_cb = flow_block_cb_alloc(cb, mlxsw_sp_port,
 					       mlxsw_sp_port, NULL);
 		if (IS_ERR(block_cb))
 			return PTR_ERR(block_cb);
@@ -1718,7 +1719,7 @@ static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
 	case FLOW_BLOCK_UNBIND:
 		mlxsw_sp_setup_tc_block_flower_unbind(mlxsw_sp_port,
 						      f, ingress);
-		block_cb = flow_block_cb_lookup(f, cb, mlxsw_sp_port);
+		block_cb = flow_block_cb_lookup(f->block, cb, mlxsw_sp_port);
 		if (!block_cb)
 			return -ENOENT;
 
diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index 7aaddc09c185..59487d446a09 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -316,15 +316,14 @@ int ocelot_setup_tc_block_flower_bind(struct ocelot_port *port,
 	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
 		return -EOPNOTSUPP;
 
-	block_cb = flow_block_cb_lookup(f, ocelot_setup_tc_block_cb_flower,
-					port);
+	block_cb = flow_block_cb_lookup(f->block,
+					ocelot_setup_tc_block_cb_flower, port);
 	if (!block_cb) {
 		port_block = ocelot_port_block_create(port);
 		if (!port_block)
 			return -ENOMEM;
 
-		block_cb = flow_block_cb_alloc(f->net,
-					       ocelot_setup_tc_block_cb_flower,
+		block_cb = flow_block_cb_alloc(ocelot_setup_tc_block_cb_flower,
 					       port, port_block,
 					       ocelot_tc_block_unbind);
 		if (IS_ERR(block_cb)) {
@@ -351,8 +350,8 @@ void ocelot_setup_tc_block_flower_unbind(struct ocelot_port *port,
 {
 	struct flow_block_cb *block_cb;
 
-	block_cb = flow_block_cb_lookup(f, ocelot_setup_tc_block_cb_flower,
-					port);
+	block_cb = flow_block_cb_lookup(f->block,
+					ocelot_setup_tc_block_cb_flower, port);
 	if (!block_cb)
 		return;
 
diff --git a/drivers/net/ethernet/mscc/ocelot_tc.c b/drivers/net/ethernet/mscc/ocelot_tc.c
index 9e6464ffae5d..16a6db71ca5e 100644
--- a/drivers/net/ethernet/mscc/ocelot_tc.c
+++ b/drivers/net/ethernet/mscc/ocelot_tc.c
@@ -134,7 +134,7 @@ static int ocelot_setup_tc_block(struct ocelot_port *port,
 				 struct flow_block_offload *f)
 {
 	struct flow_block_cb *block_cb;
-	tc_setup_cb_t *cb;
+	flow_setup_cb_t *cb;
 	int err;
 
 	netdev_dbg(port->dev, "tc_block command %d, binder_type %d\n",
@@ -156,7 +156,7 @@ static int ocelot_setup_tc_block(struct ocelot_port *port,
 		if (flow_block_cb_is_busy(cb, port, &ocelot_block_cb_list))
 			return -EBUSY;
 
-		block_cb = flow_block_cb_alloc(f->net, cb, port, port, NULL);
+		block_cb = flow_block_cb_alloc(cb, port, port, NULL);
 		if (IS_ERR(block_cb))
 			return PTR_ERR(block_cb);
 
@@ -169,7 +169,7 @@ static int ocelot_setup_tc_block(struct ocelot_port *port,
 		list_add_tail(&block_cb->driver_list, f->driver_block_list);
 		return 0;
 	case FLOW_BLOCK_UNBIND:
-		block_cb = flow_block_cb_lookup(f, cb, port);
+		block_cb = flow_block_cb_lookup(f->block, cb, port);
 		if (!block_cb)
 			return -ENOENT;
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index faa8ba012a37..e209f150c5f2 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -1318,8 +1318,7 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev,
 					  &nfp_block_cb_list))
 			return -EBUSY;
 
-		block_cb = flow_block_cb_alloc(f->net,
-					       nfp_flower_setup_tc_block_cb,
+		block_cb = flow_block_cb_alloc(nfp_flower_setup_tc_block_cb,
 					       repr, repr, NULL);
 		if (IS_ERR(block_cb))
 			return PTR_ERR(block_cb);
@@ -1328,7 +1327,8 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev,
 		list_add_tail(&block_cb->driver_list, &nfp_block_cb_list);
 		return 0;
 	case FLOW_BLOCK_UNBIND:
-		block_cb = flow_block_cb_lookup(f, nfp_flower_setup_tc_block_cb,
+		block_cb = flow_block_cb_lookup(f->block,
+						nfp_flower_setup_tc_block_cb,
 						repr);
 		if (!block_cb)
 			return -ENOENT;
@@ -1424,8 +1424,7 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app,
 		cb_priv->app = app;
 		list_add(&cb_priv->list, &priv->indr_block_cb_priv);
 
-		block_cb = flow_block_cb_alloc(f->net,
-					       nfp_flower_setup_indr_block_cb,
+		block_cb = flow_block_cb_alloc(nfp_flower_setup_indr_block_cb,
 					       cb_priv, cb_priv,
 					       nfp_flower_setup_indr_tc_release);
 		if (IS_ERR(block_cb)) {
@@ -1442,7 +1441,7 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app,
 		if (!cb_priv)
 			return -ENOENT;
 
-		block_cb = flow_block_cb_lookup(f,
+		block_cb = flow_block_cb_lookup(f->block,
 						nfp_flower_setup_indr_block_cb,
 						cb_priv);
 		if (!block_cb)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index f900fde448db..17c64e43d6c3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -530,9 +530,8 @@ static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn,
 	SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_LOCAL_INV_FENCE, 1);
 
 	/* Check atomic operations support in PCI configuration space. */
-	pci_read_config_dword(cdev->pdev,
-			      cdev->pdev->pcie_cap + PCI_EXP_DEVCTL2,
-			      &pci_status_control);
+	pcie_capability_read_dword(cdev->pdev, PCI_EXP_DEVCTL2,
+				   &pci_status_control);
 
 	if (pci_status_control & PCI_EXP_DEVCTL2_LTR_EN)
 		SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ATOMIC_OP, 1);
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 0637c6752a78..6272115b2848 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -3251,9 +3251,9 @@ static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp)
 
 	ret = phy_read_paged(tp->phydev, 0x0a46, 0x13);
 	if (ret & BIT(8))
-		phy_modify_paged(tp->phydev, 0x0c41, 0x12, 0, BIT(1));
+		phy_modify_paged(tp->phydev, 0x0c41, 0x15, 0, BIT(1));
 	else
-		phy_modify_paged(tp->phydev, 0x0c41, 0x12, BIT(1), 0);
+		phy_modify_paged(tp->phydev, 0x0c41, 0x15, BIT(1), 0);
 
 	/* Enable PHY auto speed down */
 	phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2));
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index afdcc5664ea6..3544e1991579 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -836,7 +836,6 @@ int netvsc_recv_callback(struct net_device *net,
 
 	if (unlikely(!skb)) {
 		++net_device_ctx->eth_stats.rx_no_memory;
-		rcu_read_unlock();
 		return NVSP_STAT_FAIL;
 	}
 
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 2d816aadea79..e36c04c26866 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -517,7 +517,7 @@ static int sfp_hwmon_read_sensor(struct sfp *sfp, int reg, long *value)
 
 static void sfp_hwmon_to_rx_power(long *value)
 {
-	*value = DIV_ROUND_CLOSEST(*value, 100);
+	*value = DIV_ROUND_CLOSEST(*value, 10);
 }
 
 static void sfp_hwmon_calibrate(struct sfp *sfp, unsigned int slope, int offset,
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 54edf8956a25..6e84328bdd40 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -165,23 +165,29 @@ static int vrf_ip6_local_out(struct net *net, struct sock *sk,
 static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
 					   struct net_device *dev)
 {
-	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	const struct ipv6hdr *iph;
 	struct net *net = dev_net(skb->dev);
-	struct flowi6 fl6 = {
-		/* needed to match OIF rule */
-		.flowi6_oif = dev->ifindex,
-		.flowi6_iif = LOOPBACK_IFINDEX,
-		.daddr = iph->daddr,
-		.saddr = iph->saddr,
-		.flowlabel = ip6_flowinfo(iph),
-		.flowi6_mark = skb->mark,
-		.flowi6_proto = iph->nexthdr,
-		.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF,
-	};
+	struct flowi6 fl6;
 	int ret = NET_XMIT_DROP;
 	struct dst_entry *dst;
 	struct dst_entry *dst_null = &net->ipv6.ip6_null_entry->dst;
 
+	if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr)))
+		goto err;
+
+	iph = ipv6_hdr(skb);
+
+	memset(&fl6, 0, sizeof(fl6));
+	/* needed to match OIF rule */
+	fl6.flowi6_oif = dev->ifindex;
+	fl6.flowi6_iif = LOOPBACK_IFINDEX;
+	fl6.daddr = iph->daddr;
+	fl6.saddr = iph->saddr;
+	fl6.flowlabel = ip6_flowinfo(iph);
+	fl6.flowi6_mark = skb->mark;
+	fl6.flowi6_proto = iph->nexthdr;
+	fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
+
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (dst == dst_null)
 		goto err;
@@ -237,21 +243,27 @@ static int vrf_ip_local_out(struct net *net, struct sock *sk,
 static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
 					   struct net_device *vrf_dev)
 {
-	struct iphdr *ip4h = ip_hdr(skb);
+	struct iphdr *ip4h;
 	int ret = NET_XMIT_DROP;
-	struct flowi4 fl4 = {
-		/* needed to match OIF rule */
-		.flowi4_oif = vrf_dev->ifindex,
-		.flowi4_iif = LOOPBACK_IFINDEX,
-		.flowi4_tos = RT_TOS(ip4h->tos),
-		.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF,
-		.flowi4_proto = ip4h->protocol,
-		.daddr = ip4h->daddr,
-		.saddr = ip4h->saddr,
-	};
+	struct flowi4 fl4;
 	struct net *net = dev_net(vrf_dev);
 	struct rtable *rt;
 
+	if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr)))
+		goto err;
+
+	ip4h = ip_hdr(skb);
+
+	memset(&fl4, 0, sizeof(fl4));
+	/* needed to match OIF rule */
+	fl4.flowi4_oif = vrf_dev->ifindex;
+	fl4.flowi4_iif = LOOPBACK_IFINDEX;
+	fl4.flowi4_tos = RT_TOS(ip4h->tos);
+	fl4.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF;
+	fl4.flowi4_proto = ip4h->protocol;
+	fl4.daddr = ip4h->daddr;
+	fl4.saddr = ip4h->saddr;
+
 	rt = ip_route_output_flow(net, &fl4, NULL);
 	if (IS_ERR(rt))
 		goto err;
diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index d436cc51dfd1..2fb4258941a5 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -177,6 +177,7 @@ static const struct wiphy_vendor_command wil_nl80211_vendor_commands[] = {
 		.info.subcmd = QCA_NL80211_VENDOR_SUBCMD_DMG_RF_GET_SECTOR_CFG,
 		.flags = WIPHY_VENDOR_CMD_NEED_WDEV |
 			 WIPHY_VENDOR_CMD_NEED_RUNNING,
+		.policy = wil_rf_sector_policy,
 		.doit = wil_rf_sector_get_cfg
 	},
 	{
@@ -184,6 +185,7 @@ static const struct wiphy_vendor_command wil_nl80211_vendor_commands[] = {
 		.info.subcmd = QCA_NL80211_VENDOR_SUBCMD_DMG_RF_SET_SECTOR_CFG,
 		.flags = WIPHY_VENDOR_CMD_NEED_WDEV |
 			 WIPHY_VENDOR_CMD_NEED_RUNNING,
+		.policy = wil_rf_sector_policy,
 		.doit = wil_rf_sector_set_cfg
 	},
 	{
@@ -192,6 +194,7 @@ static const struct wiphy_vendor_command wil_nl80211_vendor_commands[] = {
 			QCA_NL80211_VENDOR_SUBCMD_DMG_RF_GET_SELECTED_SECTOR,
 		.flags = WIPHY_VENDOR_CMD_NEED_WDEV |
 			 WIPHY_VENDOR_CMD_NEED_RUNNING,
+		.policy = wil_rf_sector_policy,
 		.doit = wil_rf_sector_get_selected
 	},
 	{
@@ -200,6 +203,7 @@ static const struct wiphy_vendor_command wil_nl80211_vendor_commands[] = {
 			QCA_NL80211_VENDOR_SUBCMD_DMG_RF_SET_SELECTED_SECTOR,
 		.flags = WIPHY_VENDOR_CMD_NEED_WDEV |
 			 WIPHY_VENDOR_CMD_NEED_RUNNING,
+		.policy = wil_rf_sector_policy,
 		.doit = wil_rf_sector_set_selected
 	},
 };
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/vendor.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/vendor.c
index f6500899fc14..d07e7c7355d9 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/vendor.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/vendor.c
@@ -112,6 +112,7 @@ const struct wiphy_vendor_command brcmf_vendor_cmds[] = {
 		},
 		.flags = WIPHY_VENDOR_CMD_NEED_WDEV |
 			 WIPHY_VENDOR_CMD_NEED_NETDEV,
+		.policy = VENDOR_CMD_RAW_DATA,
 		.doit = brcmf_cfg80211_vndr_cmds_dcmd_handler
 	},
 };
diff --git a/drivers/net/wireless/ti/wlcore/vendor_cmd.c b/drivers/net/wireless/ti/wlcore/vendor_cmd.c
index 5cf0b32c413b..e1bd344c4ebc 100644
--- a/drivers/net/wireless/ti/wlcore/vendor_cmd.c
+++ b/drivers/net/wireless/ti/wlcore/vendor_cmd.c
@@ -163,6 +163,7 @@ static const struct wiphy_vendor_command wlcore_vendor_commands[] = {
 		.flags = WIPHY_VENDOR_CMD_NEED_NETDEV |
 			 WIPHY_VENDOR_CMD_NEED_RUNNING,
 		.doit = wlcore_vendor_cmd_smart_config_start,
+		.policy = wlcore_vendor_attr_policy,
 	},
 	{
 		.info = {
@@ -172,6 +173,7 @@ static const struct wiphy_vendor_command wlcore_vendor_commands[] = {
 		.flags = WIPHY_VENDOR_CMD_NEED_NETDEV |
 			 WIPHY_VENDOR_CMD_NEED_RUNNING,
 		.doit = wlcore_vendor_cmd_smart_config_stop,
+		.policy = wlcore_vendor_attr_policy,
 	},
 	{
 		.info = {
@@ -181,6 +183,7 @@ static const struct wiphy_vendor_command wlcore_vendor_commands[] = {
 		.flags = WIPHY_VENDOR_CMD_NEED_NETDEV |
 			 WIPHY_VENDOR_CMD_NEED_RUNNING,
 		.doit = wlcore_vendor_cmd_smart_config_set_group_key,
+		.policy = wlcore_vendor_attr_policy,
 	},
 };
 
diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig
index c99eed87382a..df16c755b4da 100644
--- a/drivers/ntb/Kconfig
+++ b/drivers/ntb/Kconfig
@@ -13,6 +13,17 @@ menuconfig NTB
 
 if NTB
 
+config NTB_MSI
+	bool "MSI Interrupt Support"
+	depends on PCI_MSI
+	help
+	 Support using MSI interrupt forwarding instead of (or in addition to)
+	 hardware doorbells. MSI interrupts typically offer lower latency
+	 than doorbells and more MSI interrupts can be made available to
+	 clients. However this requires an extra memory window and support
+	 in the hardware driver for creating the MSI interrupts.
+
+	 If unsure, say N.
 source "drivers/ntb/hw/Kconfig"
 
 source "drivers/ntb/test/Kconfig"
diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile
index 5c64438d5b3f..3a6fa181ff99 100644
--- a/drivers/ntb/Makefile
+++ b/drivers/ntb/Makefile
@@ -1,3 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_NTB) += ntb.o hw/ test/
 obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o
+
+ntb-y			:= core.o
+ntb-$(CONFIG_NTB_MSI)	+= msi.o
diff --git a/drivers/ntb/ntb.c b/drivers/ntb/core.c
index 2581ab724c34..2581ab724c34 100644
--- a/drivers/ntb/ntb.c
+++ b/drivers/ntb/core.c
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
index efb214fc545a..2859cc99b73e 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -160,8 +160,8 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 		}
 
 		/* set and verify setting the limit */
-		write64(limit, mmio + limit_reg);
-		reg_val = read64(mmio + limit_reg);
+		write64(limit, peer_mmio + limit_reg);
+		reg_val = read64(peer_mmio + limit_reg);
 		if (reg_val != limit) {
 			write64(base_addr, mmio + limit_reg);
 			write64(0, peer_mmio + xlat_reg);
@@ -183,8 +183,8 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 		}
 
 		/* set and verify setting the limit */
-		writel(limit, mmio + limit_reg);
-		reg_val = readl(mmio + limit_reg);
+		writel(limit, peer_mmio + limit_reg);
+		reg_val = readl(peer_mmio + limit_reg);
 		if (reg_val != limit) {
 			writel(base_addr, mmio + limit_reg);
 			writel(0, peer_mmio + xlat_reg);
@@ -333,7 +333,7 @@ static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
 	if (db_vector < 0 || db_vector > ndev->db_count)
 		return 0;
 
-	return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector);
+	return ntb_ndev(ntb)->db_valid_mask & (1ULL << db_vector);
 }
 
 static u64 amd_ntb_db_read(struct ntb_dev *ntb)
diff --git a/drivers/ntb/hw/intel/ntb_hw_gen3.c b/drivers/ntb/hw/intel/ntb_hw_gen3.c
index f475b56a3f49..c3397160db7f 100644
--- a/drivers/ntb/hw/intel/ntb_hw_gen3.c
+++ b/drivers/ntb/hw/intel/ntb_hw_gen3.c
@@ -532,9 +532,9 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 	return 0;
 }
 
-int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
-				resource_size_t *db_size,
-				u64 *db_data, int db_bit)
+static int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
+				   resource_size_t *db_size,
+				   u64 *db_data, int db_bit)
 {
 	phys_addr_t db_addr_base;
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
index db4967748e4d..f4959458d909 100644
--- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
+++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
@@ -86,7 +86,8 @@ struct switchtec_ntb {
 	bool link_is_up;
 	enum ntb_speed link_speed;
 	enum ntb_width link_width;
-	struct work_struct link_reinit_work;
+	struct work_struct check_link_status_work;
+	bool link_force_down;
 };
 
 static struct switchtec_ntb *ntb_sndev(struct ntb_dev *ntb)
@@ -485,33 +486,11 @@ enum switchtec_msg {
 
 static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev);
 
-static void link_reinit_work(struct work_struct *work)
-{
-	struct switchtec_ntb *sndev;
-
-	sndev = container_of(work, struct switchtec_ntb, link_reinit_work);
-
-	switchtec_ntb_reinit_peer(sndev);
-}
-
-static void switchtec_ntb_check_link(struct switchtec_ntb *sndev,
-				     enum switchtec_msg msg)
+static void switchtec_ntb_link_status_update(struct switchtec_ntb *sndev)
 {
 	int link_sta;
 	int old = sndev->link_is_up;
 
-	if (msg == MSG_LINK_FORCE_DOWN) {
-		schedule_work(&sndev->link_reinit_work);
-
-		if (sndev->link_is_up) {
-			sndev->link_is_up = 0;
-			ntb_link_event(&sndev->ntb);
-			dev_info(&sndev->stdev->dev, "ntb link forced down\n");
-		}
-
-		return;
-	}
-
 	link_sta = sndev->self_shared->link_sta;
 	if (link_sta) {
 		u64 peer = ioread64(&sndev->peer_shared->magic);
@@ -536,6 +515,38 @@ static void switchtec_ntb_check_link(struct switchtec_ntb *sndev,
 	}
 }
 
+static void check_link_status_work(struct work_struct *work)
+{
+	struct switchtec_ntb *sndev;
+
+	sndev = container_of(work, struct switchtec_ntb,
+			     check_link_status_work);
+
+	if (sndev->link_force_down) {
+		sndev->link_force_down = false;
+		switchtec_ntb_reinit_peer(sndev);
+
+		if (sndev->link_is_up) {
+			sndev->link_is_up = 0;
+			ntb_link_event(&sndev->ntb);
+			dev_info(&sndev->stdev->dev, "ntb link forced down\n");
+		}
+
+		return;
+	}
+
+	switchtec_ntb_link_status_update(sndev);
+}
+
+static void switchtec_ntb_check_link(struct switchtec_ntb *sndev,
+				      enum switchtec_msg msg)
+{
+	if (msg == MSG_LINK_FORCE_DOWN)
+		sndev->link_force_down = true;
+
+	schedule_work(&sndev->check_link_status_work);
+}
+
 static void switchtec_ntb_link_notification(struct switchtec_dev *stdev)
 {
 	struct switchtec_ntb *sndev = stdev->sndev;
@@ -568,7 +579,7 @@ static int switchtec_ntb_link_enable(struct ntb_dev *ntb,
 	sndev->self_shared->link_sta = 1;
 	switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_UP);
 
-	switchtec_ntb_check_link(sndev, MSG_CHECK_LINK);
+	switchtec_ntb_link_status_update(sndev);
 
 	return 0;
 }
@@ -582,7 +593,7 @@ static int switchtec_ntb_link_disable(struct ntb_dev *ntb)
 	sndev->self_shared->link_sta = 0;
 	switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_DOWN);
 
-	switchtec_ntb_check_link(sndev, MSG_CHECK_LINK);
+	switchtec_ntb_link_status_update(sndev);
 
 	return 0;
 }
@@ -835,7 +846,8 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev)
 	sndev->ntb.topo = NTB_TOPO_SWITCH;
 	sndev->ntb.ops = &switchtec_ntb_ops;
 
-	INIT_WORK(&sndev->link_reinit_work, link_reinit_work);
+	INIT_WORK(&sndev->check_link_status_work, check_link_status_work);
+	sndev->link_force_down = false;
 
 	sndev->self_partition = sndev->stdev->partition;
 
@@ -872,7 +884,7 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev)
 		}
 
 		sndev->peer_partition = ffs(tpart_vec) - 1;
-		if (!(part_map & (1 << sndev->peer_partition))) {
+		if (!(part_map & (1ULL << sndev->peer_partition))) {
 			dev_err(&sndev->stdev->dev,
 				"ntb target partition is not NT partition\n");
 			return -ENODEV;
@@ -1448,10 +1460,16 @@ static void switchtec_ntb_deinit_db_msg_irq(struct switchtec_ntb *sndev)
 
 static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev)
 {
-	dev_info(&sndev->stdev->dev, "peer reinitialized\n");
-	switchtec_ntb_deinit_shared_mw(sndev);
-	switchtec_ntb_init_mw(sndev);
-	return switchtec_ntb_init_shared_mw(sndev);
+	int rc;
+
+	if (crosslink_is_enabled(sndev))
+		return 0;
+
+	dev_info(&sndev->stdev->dev, "reinitialize shared memory window\n");
+	rc = config_rsvd_lut_win(sndev, sndev->mmio_peer_ctrl, 0,
+				 sndev->self_partition,
+				 sndev->self_shared_dma);
+	return rc;
 }
 
 static int switchtec_ntb_add(struct device *dev,
diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c
new file mode 100644
index 000000000000..9dddf133658f
--- /dev/null
+++ b/drivers/ntb/msi.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/ntb.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION("0.1");
+MODULE_AUTHOR("Logan Gunthorpe <logang@deltatee.com>");
+MODULE_DESCRIPTION("NTB MSI Interrupt Library");
+
+struct ntb_msi {
+	u64 base_addr;
+	u64 end_addr;
+
+	void (*desc_changed)(void *ctx);
+
+	u32 __iomem *peer_mws[];
+};
+
+/**
+ * ntb_msi_init() - Initialize the MSI context
+ * @ntb:	NTB device context
+ *
+ * This function must be called before any other ntb_msi function.
+ * It initializes the context for MSI operations and maps
+ * the peer memory windows.
+ *
+ * This function reserves the last N outbound memory windows (where N
+ * is the number of peers).
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+int ntb_msi_init(struct ntb_dev *ntb,
+		 void (*desc_changed)(void *ctx))
+{
+	phys_addr_t mw_phys_addr;
+	resource_size_t mw_size;
+	size_t struct_size;
+	int peer_widx;
+	int peers;
+	int ret;
+	int i;
+
+	peers = ntb_peer_port_count(ntb);
+	if (peers <= 0)
+		return -EINVAL;
+
+	struct_size = sizeof(*ntb->msi) + sizeof(*ntb->msi->peer_mws) * peers;
+
+	ntb->msi = devm_kzalloc(&ntb->dev, struct_size, GFP_KERNEL);
+	if (!ntb->msi)
+		return -ENOMEM;
+
+	ntb->msi->desc_changed = desc_changed;
+
+	for (i = 0; i < peers; i++) {
+		peer_widx = ntb_peer_mw_count(ntb) - 1 - i;
+
+		ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr,
+					   &mw_size);
+		if (ret)
+			goto unroll;
+
+		ntb->msi->peer_mws[i] = devm_ioremap(&ntb->dev, mw_phys_addr,
+						     mw_size);
+		if (!ntb->msi->peer_mws[i]) {
+			ret = -EFAULT;
+			goto unroll;
+		}
+	}
+
+	return 0;
+
+unroll:
+	for (i = 0; i < peers; i++)
+		if (ntb->msi->peer_mws[i])
+			devm_iounmap(&ntb->dev, ntb->msi->peer_mws[i]);
+
+	devm_kfree(&ntb->dev, ntb->msi);
+	ntb->msi = NULL;
+	return ret;
+}
+EXPORT_SYMBOL(ntb_msi_init);
+
+/**
+ * ntb_msi_setup_mws() - Initialize the MSI inbound memory windows
+ * @ntb:	NTB device context
+ *
+ * This function sets up the required inbound memory windows. It should be
+ * called from a work function after a link up event.
+ *
+ * Over the entire network, this function will reserves the last N
+ * inbound memory windows for each peer (where N is the number of peers).
+ *
+ * ntb_msi_init() must be called before this function.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+int ntb_msi_setup_mws(struct ntb_dev *ntb)
+{
+	struct msi_desc *desc;
+	u64 addr;
+	int peer, peer_widx;
+	resource_size_t addr_align, size_align, size_max;
+	resource_size_t mw_size = SZ_32K;
+	resource_size_t mw_min_size = mw_size;
+	int i;
+	int ret;
+
+	if (!ntb->msi)
+		return -EINVAL;
+
+	desc = first_msi_entry(&ntb->pdev->dev);
+	addr = desc->msg.address_lo + ((uint64_t)desc->msg.address_hi << 32);
+
+	for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) {
+		peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
+		if (peer_widx < 0)
+			return peer_widx;
+
+		ret = ntb_mw_get_align(ntb, peer, peer_widx, &addr_align,
+				       NULL, NULL);
+		if (ret)
+			return ret;
+
+		addr &= ~(addr_align - 1);
+	}
+
+	for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) {
+		peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
+		if (peer_widx < 0) {
+			ret = peer_widx;
+			goto error_out;
+		}
+
+		ret = ntb_mw_get_align(ntb, peer, peer_widx, NULL,
+				       &size_align, &size_max);
+		if (ret)
+			goto error_out;
+
+		mw_size = round_up(mw_size, size_align);
+		mw_size = max(mw_size, size_max);
+		if (mw_size < mw_min_size)
+			mw_min_size = mw_size;
+
+		ret = ntb_mw_set_trans(ntb, peer, peer_widx,
+				       addr, mw_size);
+		if (ret)
+			goto error_out;
+	}
+
+	ntb->msi->base_addr = addr;
+	ntb->msi->end_addr = addr + mw_min_size;
+
+	return 0;
+
+error_out:
+	for (i = 0; i < peer; i++) {
+		peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
+		if (peer_widx < 0)
+			continue;
+
+		ntb_mw_clear_trans(ntb, i, peer_widx);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(ntb_msi_setup_mws);
+
+/**
+ * ntb_msi_clear_mws() - Clear all inbound memory windows
+ * @ntb:	NTB device context
+ *
+ * This function tears down the resources used by ntb_msi_setup_mws().
+ */
+void ntb_msi_clear_mws(struct ntb_dev *ntb)
+{
+	int peer;
+	int peer_widx;
+
+	for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) {
+		peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
+		if (peer_widx < 0)
+			continue;
+
+		ntb_mw_clear_trans(ntb, peer, peer_widx);
+	}
+}
+EXPORT_SYMBOL(ntb_msi_clear_mws);
+
+struct ntb_msi_devres {
+	struct ntb_dev *ntb;
+	struct msi_desc *entry;
+	struct ntb_msi_desc *msi_desc;
+};
+
+static int ntb_msi_set_desc(struct ntb_dev *ntb, struct msi_desc *entry,
+			    struct ntb_msi_desc *msi_desc)
+{
+	u64 addr;
+
+	addr = entry->msg.address_lo +
+		((uint64_t)entry->msg.address_hi << 32);
+
+	if (addr < ntb->msi->base_addr || addr >= ntb->msi->end_addr) {
+		dev_warn_once(&ntb->dev,
+			      "IRQ %d: MSI Address not within the memory window (%llx, [%llx %llx])\n",
+			      entry->irq, addr, ntb->msi->base_addr,
+			      ntb->msi->end_addr);
+		return -EFAULT;
+	}
+
+	msi_desc->addr_offset = addr - ntb->msi->base_addr;
+	msi_desc->data = entry->msg.data;
+
+	return 0;
+}
+
+static void ntb_msi_write_msg(struct msi_desc *entry, void *data)
+{
+	struct ntb_msi_devres *dr = data;
+
+	WARN_ON(ntb_msi_set_desc(dr->ntb, entry, dr->msi_desc));
+
+	if (dr->ntb->msi->desc_changed)
+		dr->ntb->msi->desc_changed(dr->ntb->ctx);
+}
+
+static void ntbm_msi_callback_release(struct device *dev, void *res)
+{
+	struct ntb_msi_devres *dr = res;
+
+	dr->entry->write_msi_msg = NULL;
+	dr->entry->write_msi_msg_data = NULL;
+}
+
+static int ntbm_msi_setup_callback(struct ntb_dev *ntb, struct msi_desc *entry,
+				   struct ntb_msi_desc *msi_desc)
+{
+	struct ntb_msi_devres *dr;
+
+	dr = devres_alloc(ntbm_msi_callback_release,
+			  sizeof(struct ntb_msi_devres), GFP_KERNEL);
+	if (!dr)
+		return -ENOMEM;
+
+	dr->ntb = ntb;
+	dr->entry = entry;
+	dr->msi_desc = msi_desc;
+
+	devres_add(&ntb->dev, dr);
+
+	dr->entry->write_msi_msg = ntb_msi_write_msg;
+	dr->entry->write_msi_msg_data = dr;
+
+	return 0;
+}
+
+/**
+ * ntbm_msi_request_threaded_irq() - allocate an MSI interrupt
+ * @ntb:	NTB device context
+ * @handler:	Function to be called when the IRQ occurs
+ * @thread_fn:  Function to be called in a threaded interrupt context. NULL
+ *              for clients which handle everything in @handler
+ * @devname:    An ascii name for the claiming device, dev_name(dev) if NULL
+ * @dev_id:     A cookie passed back to the handler function
+ *
+ * This function assigns an interrupt handler to an unused
+ * MSI interrupt and returns the descriptor used to trigger
+ * it. The descriptor can then be sent to a peer to trigger
+ * the interrupt.
+ *
+ * The interrupt resource is managed with devres so it will
+ * be automatically freed when the NTB device is torn down.
+ *
+ * If an IRQ allocated with this function needs to be freed
+ * separately, ntbm_free_irq() must be used.
+ *
+ * Return: IRQ number assigned on success, otherwise a negative error number.
+ */
+int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler,
+				  irq_handler_t thread_fn,
+				  const char *name, void *dev_id,
+				  struct ntb_msi_desc *msi_desc)
+{
+	struct msi_desc *entry;
+	struct irq_desc *desc;
+	int ret;
+
+	if (!ntb->msi)
+		return -EINVAL;
+
+	for_each_pci_msi_entry(entry, ntb->pdev) {
+		desc = irq_to_desc(entry->irq);
+		if (desc->action)
+			continue;
+
+		ret = devm_request_threaded_irq(&ntb->dev, entry->irq, handler,
+						thread_fn, 0, name, dev_id);
+		if (ret)
+			continue;
+
+		if (ntb_msi_set_desc(ntb, entry, msi_desc)) {
+			devm_free_irq(&ntb->dev, entry->irq, dev_id);
+			continue;
+		}
+
+		ret = ntbm_msi_setup_callback(ntb, entry, msi_desc);
+		if (ret) {
+			devm_free_irq(&ntb->dev, entry->irq, dev_id);
+			return ret;
+		}
+
+
+		return entry->irq;
+	}
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL(ntbm_msi_request_threaded_irq);
+
+static int ntbm_msi_callback_match(struct device *dev, void *res, void *data)
+{
+	struct ntb_dev *ntb = dev_ntb(dev);
+	struct ntb_msi_devres *dr = res;
+
+	return dr->ntb == ntb && dr->entry == data;
+}
+
+/**
+ * ntbm_msi_free_irq() - free an interrupt
+ * @ntb:	NTB device context
+ * @irq:	Interrupt line to free
+ * @dev_id:	Device identity to free
+ *
+ * This function should be used to manually free IRQs allocated with
+ * ntbm_request_[threaded_]irq().
+ */
+void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id)
+{
+	struct msi_desc *entry = irq_get_msi_desc(irq);
+
+	entry->write_msi_msg = NULL;
+	entry->write_msi_msg_data = NULL;
+
+	WARN_ON(devres_destroy(&ntb->dev, ntbm_msi_callback_release,
+			       ntbm_msi_callback_match, entry));
+
+	devm_free_irq(&ntb->dev, irq, dev_id);
+}
+EXPORT_SYMBOL(ntbm_msi_free_irq);
+
+/**
+ * ntb_msi_peer_trigger() - Trigger an interrupt handler on a peer
+ * @ntb:	NTB device context
+ * @peer:	Peer index
+ * @desc:	MSI descriptor data which triggers the interrupt
+ *
+ * This function triggers an interrupt on a peer. It requires
+ * the descriptor structure to have been passed from that peer
+ * by some other means.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer,
+			 struct ntb_msi_desc *desc)
+{
+	int idx;
+
+	if (!ntb->msi)
+		return -EINVAL;
+
+	idx = desc->addr_offset / sizeof(*ntb->msi->peer_mws[peer]);
+
+	iowrite32(desc->data, &ntb->msi->peer_mws[peer][idx]);
+
+	return 0;
+}
+EXPORT_SYMBOL(ntb_msi_peer_trigger);
+
+/**
+ * ntb_msi_peer_addr() - Get the DMA address to trigger a peer's MSI interrupt
+ * @ntb:	NTB device context
+ * @peer:	Peer index
+ * @desc:	MSI descriptor data which triggers the interrupt
+ * @msi_addr:   Physical address to trigger the interrupt
+ *
+ * This function allows using DMA engines to trigger an interrupt
+ * (for example, trigger an interrupt to process the data after
+ * sending it). To trigger the interrupt, write @desc.data to the address
+ * returned in @msi_addr
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer,
+		      struct ntb_msi_desc *desc,
+		      phys_addr_t *msi_addr)
+{
+	int peer_widx = ntb_peer_mw_count(ntb) - 1 - peer;
+	phys_addr_t mw_phys_addr;
+	int ret;
+
+	ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr, NULL);
+	if (ret)
+		return ret;
+
+	if (msi_addr)
+		*msi_addr = mw_phys_addr + desc->addr_offset;
+
+	return 0;
+}
+EXPORT_SYMBOL(ntb_msi_peer_addr);
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index d4f39ba1d976..40c90ca10729 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -93,6 +93,12 @@ static bool use_dma;
 module_param(use_dma, bool, 0644);
 MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy");
 
+static bool use_msi;
+#ifdef CONFIG_NTB_MSI
+module_param(use_msi, bool, 0644);
+MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells");
+#endif
+
 static struct dentry *nt_debugfs_dir;
 
 /* Only two-ports NTB devices are supported */
@@ -188,6 +194,11 @@ struct ntb_transport_qp {
 	u64 tx_err_no_buf;
 	u64 tx_memcpy;
 	u64 tx_async;
+
+	bool use_msi;
+	int msi_irq;
+	struct ntb_msi_desc msi_desc;
+	struct ntb_msi_desc peer_msi_desc;
 };
 
 struct ntb_transport_mw {
@@ -221,6 +232,10 @@ struct ntb_transport_ctx {
 	u64 qp_bitmap;
 	u64 qp_bitmap_free;
 
+	bool use_msi;
+	unsigned int msi_spad_offset;
+	u64 msi_db_mask;
+
 	bool link_is_up;
 	struct delayed_work link_work;
 	struct work_struct link_cleanup;
@@ -667,6 +682,114 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
 	return 0;
 }
 
+static irqreturn_t ntb_transport_isr(int irq, void *dev)
+{
+	struct ntb_transport_qp *qp = dev;
+
+	tasklet_schedule(&qp->rxc_db_work);
+
+	return IRQ_HANDLED;
+}
+
+static void ntb_transport_setup_qp_peer_msi(struct ntb_transport_ctx *nt,
+					    unsigned int qp_num)
+{
+	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+	int spad = qp_num * 2 + nt->msi_spad_offset;
+
+	if (!nt->use_msi)
+		return;
+
+	if (spad >= ntb_spad_count(nt->ndev))
+		return;
+
+	qp->peer_msi_desc.addr_offset =
+		ntb_peer_spad_read(qp->ndev, PIDX, spad);
+	qp->peer_msi_desc.data =
+		ntb_peer_spad_read(qp->ndev, PIDX, spad + 1);
+
+	dev_dbg(&qp->ndev->pdev->dev, "QP%d Peer MSI addr=%x data=%x\n",
+		qp_num, qp->peer_msi_desc.addr_offset, qp->peer_msi_desc.data);
+
+	if (qp->peer_msi_desc.addr_offset) {
+		qp->use_msi = true;
+		dev_info(&qp->ndev->pdev->dev,
+			 "Using MSI interrupts for QP%d\n", qp_num);
+	}
+}
+
+static void ntb_transport_setup_qp_msi(struct ntb_transport_ctx *nt,
+				       unsigned int qp_num)
+{
+	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+	int spad = qp_num * 2 + nt->msi_spad_offset;
+	int rc;
+
+	if (!nt->use_msi)
+		return;
+
+	if (spad >= ntb_spad_count(nt->ndev)) {
+		dev_warn_once(&qp->ndev->pdev->dev,
+			      "Not enough SPADS to use MSI interrupts\n");
+		return;
+	}
+
+	ntb_spad_write(qp->ndev, spad, 0);
+	ntb_spad_write(qp->ndev, spad + 1, 0);
+
+	if (!qp->msi_irq) {
+		qp->msi_irq = ntbm_msi_request_irq(qp->ndev, ntb_transport_isr,
+						   KBUILD_MODNAME, qp,
+						   &qp->msi_desc);
+		if (qp->msi_irq < 0) {
+			dev_warn(&qp->ndev->pdev->dev,
+				 "Unable to allocate MSI interrupt for qp%d\n",
+				 qp_num);
+			return;
+		}
+	}
+
+	rc = ntb_spad_write(qp->ndev, spad, qp->msi_desc.addr_offset);
+	if (rc)
+		goto err_free_interrupt;
+
+	rc = ntb_spad_write(qp->ndev, spad + 1, qp->msi_desc.data);
+	if (rc)
+		goto err_free_interrupt;
+
+	dev_dbg(&qp->ndev->pdev->dev, "QP%d MSI %d addr=%x data=%x\n",
+		qp_num, qp->msi_irq, qp->msi_desc.addr_offset,
+		qp->msi_desc.data);
+
+	return;
+
+err_free_interrupt:
+	devm_free_irq(&nt->ndev->dev, qp->msi_irq, qp);
+}
+
+static void ntb_transport_msi_peer_desc_changed(struct ntb_transport_ctx *nt)
+{
+	int i;
+
+	dev_dbg(&nt->ndev->pdev->dev, "Peer MSI descriptors changed");
+
+	for (i = 0; i < nt->qp_count; i++)
+		ntb_transport_setup_qp_peer_msi(nt, i);
+}
+
+static void ntb_transport_msi_desc_changed(void *data)
+{
+	struct ntb_transport_ctx *nt = data;
+	int i;
+
+	dev_dbg(&nt->ndev->pdev->dev, "MSI descriptors changed");
+
+	for (i = 0; i < nt->qp_count; i++)
+		ntb_transport_setup_qp_msi(nt, i);
+
+	ntb_peer_db_set(nt->ndev, nt->msi_db_mask);
+}
+
 static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
 {
 	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
@@ -905,6 +1028,20 @@ static void ntb_transport_link_work(struct work_struct *work)
 	int rc = 0, i, spad;
 
 	/* send the local info, in the opposite order of the way we read it */
+
+	if (nt->use_msi) {
+		rc = ntb_msi_setup_mws(ndev);
+		if (rc) {
+			dev_warn(&pdev->dev,
+				 "Failed to register MSI memory window: %d\n",
+				 rc);
+			nt->use_msi = false;
+		}
+	}
+
+	for (i = 0; i < nt->qp_count; i++)
+		ntb_transport_setup_qp_msi(nt, i);
+
 	for (i = 0; i < nt->mw_count; i++) {
 		size = nt->mw_vec[i].phys_size;
 
@@ -962,6 +1099,7 @@ static void ntb_transport_link_work(struct work_struct *work)
 		struct ntb_transport_qp *qp = &nt->qp_vec[i];
 
 		ntb_transport_setup_qp_mw(nt, i);
+		ntb_transport_setup_qp_peer_msi(nt, i);
 
 		if (qp->client_ready)
 			schedule_delayed_work(&qp->link_work, 0);
@@ -1135,6 +1273,19 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 		return -ENOMEM;
 
 	nt->ndev = ndev;
+
+	/*
+	 * If we are using MSI, and have at least one extra memory window,
+	 * we will reserve the last MW for the MSI window.
+	 */
+	if (use_msi && mw_count > 1) {
+		rc = ntb_msi_init(ndev, ntb_transport_msi_desc_changed);
+		if (!rc) {
+			mw_count -= 1;
+			nt->use_msi = true;
+		}
+	}
+
 	spad_count = ntb_spad_count(ndev);
 
 	/* Limit the MW's based on the availability of scratchpads */
@@ -1148,6 +1299,8 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 	max_mw_count_for_spads = (spad_count - MW0_SZ_HIGH) / 2;
 	nt->mw_count = min(mw_count, max_mw_count_for_spads);
 
+	nt->msi_spad_offset = nt->mw_count * 2 + MW0_SZ_HIGH;
+
 	nt->mw_vec = kcalloc_node(mw_count, sizeof(*nt->mw_vec),
 				  GFP_KERNEL, node);
 	if (!nt->mw_vec) {
@@ -1178,6 +1331,12 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 	qp_bitmap = ntb_db_valid_mask(ndev);
 
 	qp_count = ilog2(qp_bitmap);
+	if (nt->use_msi) {
+		qp_count -= 1;
+		nt->msi_db_mask = 1 << qp_count;
+		ntb_db_clear_mask(ndev, nt->msi_db_mask);
+	}
+
 	if (max_num_clients && max_num_clients < qp_count)
 		qp_count = max_num_clients;
 	else if (nt->mw_count < qp_count)
@@ -1601,7 +1760,10 @@ static void ntb_tx_copy_callback(void *data,
 
 	iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags);
 
-	ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
+	if (qp->use_msi)
+		ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc);
+	else
+		ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
 
 	/* The entry length can only be zero if the packet is intended to be a
 	 * "link down" or similar.  Since no payload is being sent in these
@@ -1869,6 +2031,7 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
 		qp->rx_dma_chan = NULL;
 	}
 
+	qp->tx_mw_dma_addr = 0;
 	if (qp->tx_dma_chan) {
 		qp->tx_mw_dma_addr =
 			dma_map_resource(qp->tx_dma_chan->device->dev,
@@ -2268,6 +2431,11 @@ static void ntb_transport_doorbell_callback(void *data, int vector)
 	u64 db_bits;
 	unsigned int qp_num;
 
+	if (ntb_db_read(nt->ndev) & nt->msi_db_mask) {
+		ntb_transport_msi_peer_desc_changed(nt);
+		ntb_db_clear(nt->ndev, nt->msi_db_mask);
+	}
+
 	db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free &
 		   ntb_db_vector_mask(nt->ndev, vector));
 
diff --git a/drivers/ntb/test/Kconfig b/drivers/ntb/test/Kconfig
index a8db00a7e087..516b991f33b9 100644
--- a/drivers/ntb/test/Kconfig
+++ b/drivers/ntb/test/Kconfig
@@ -26,3 +26,12 @@ config NTB_PERF
 	 to and from the window without additional software interaction.
 
 	 If unsure, say N.
+
+config NTB_MSI_TEST
+	tristate "NTB MSI Test Client"
+	depends on NTB_MSI
+	help
+	  This tool demonstrates the use of the NTB MSI library to
+	  send MSI interrupts between peers.
+
+	  If unsure, say N.
diff --git a/drivers/ntb/test/Makefile b/drivers/ntb/test/Makefile
index cbfd67622ef7..19ed91d8a3b1 100644
--- a/drivers/ntb/test/Makefile
+++ b/drivers/ntb/test/Makefile
@@ -2,3 +2,4 @@
 obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o
 obj-$(CONFIG_NTB_TOOL) += ntb_tool.o
 obj-$(CONFIG_NTB_PERF) += ntb_perf.o
+obj-$(CONFIG_NTB_MSI_TEST) += ntb_msi_test.o
diff --git a/drivers/ntb/test/ntb_msi_test.c b/drivers/ntb/test/ntb_msi_test.c
new file mode 100644
index 000000000000..99d826ed9c34
--- /dev/null
+++ b/drivers/ntb/test/ntb_msi_test.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/ntb.h>
+#include <linux/pci.h>
+#include <linux/radix-tree.h>
+#include <linux/workqueue.h>
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION("0.1");
+MODULE_AUTHOR("Logan Gunthorpe <logang@deltatee.com>");
+MODULE_DESCRIPTION("Test for sending MSI interrupts over an NTB memory window");
+
+static int num_irqs = 4;
+module_param(num_irqs, int, 0644);
+MODULE_PARM_DESC(num_irqs, "number of irqs to use");
+
+struct ntb_msit_ctx {
+	struct ntb_dev *ntb;
+	struct dentry *dbgfs_dir;
+	struct work_struct setup_work;
+
+	struct ntb_msit_isr_ctx {
+		int irq_idx;
+		int irq_num;
+		int occurrences;
+		struct ntb_msit_ctx *nm;
+		struct ntb_msi_desc desc;
+	} *isr_ctx;
+
+	struct ntb_msit_peer {
+		struct ntb_msit_ctx *nm;
+		int pidx;
+		int num_irqs;
+		struct completion init_comp;
+		struct ntb_msi_desc *msi_desc;
+	} peers[];
+};
+
+static struct dentry *ntb_msit_dbgfs_topdir;
+
+static irqreturn_t ntb_msit_isr(int irq, void *dev)
+{
+	struct ntb_msit_isr_ctx *isr_ctx = dev;
+	struct ntb_msit_ctx *nm = isr_ctx->nm;
+
+	dev_dbg(&nm->ntb->dev, "Interrupt Occurred: %d",
+		isr_ctx->irq_idx);
+
+	isr_ctx->occurrences++;
+
+	return IRQ_HANDLED;
+}
+
+static void ntb_msit_setup_work(struct work_struct *work)
+{
+	struct ntb_msit_ctx *nm = container_of(work, struct ntb_msit_ctx,
+					       setup_work);
+	int irq_count = 0;
+	int irq;
+	int ret;
+	uintptr_t i;
+
+	ret = ntb_msi_setup_mws(nm->ntb);
+	if (ret) {
+		dev_err(&nm->ntb->dev, "Unable to setup MSI windows: %d\n",
+			ret);
+		return;
+	}
+
+	for (i = 0; i < num_irqs; i++) {
+		nm->isr_ctx[i].irq_idx = i;
+		nm->isr_ctx[i].nm = nm;
+
+		if (!nm->isr_ctx[i].irq_num) {
+			irq = ntbm_msi_request_irq(nm->ntb, ntb_msit_isr,
+						   KBUILD_MODNAME,
+						   &nm->isr_ctx[i],
+						   &nm->isr_ctx[i].desc);
+			if (irq < 0)
+				break;
+
+			nm->isr_ctx[i].irq_num = irq;
+		}
+
+		ret = ntb_spad_write(nm->ntb, 2 * i + 1,
+				     nm->isr_ctx[i].desc.addr_offset);
+		if (ret)
+			break;
+
+		ret = ntb_spad_write(nm->ntb, 2 * i + 2,
+				     nm->isr_ctx[i].desc.data);
+		if (ret)
+			break;
+
+		irq_count++;
+	}
+
+	ntb_spad_write(nm->ntb, 0, irq_count);
+	ntb_peer_db_set(nm->ntb, BIT(ntb_port_number(nm->ntb)));
+}
+
+static void ntb_msit_desc_changed(void *ctx)
+{
+	struct ntb_msit_ctx *nm = ctx;
+	int i;
+
+	dev_dbg(&nm->ntb->dev, "MSI Descriptors Changed\n");
+
+	for (i = 0; i < num_irqs; i++) {
+		ntb_spad_write(nm->ntb, 2 * i + 1,
+			       nm->isr_ctx[i].desc.addr_offset);
+		ntb_spad_write(nm->ntb, 2 * i + 2,
+			       nm->isr_ctx[i].desc.data);
+	}
+
+	ntb_peer_db_set(nm->ntb, BIT(ntb_port_number(nm->ntb)));
+}
+
+static void ntb_msit_link_event(void *ctx)
+{
+	struct ntb_msit_ctx *nm = ctx;
+
+	if (!ntb_link_is_up(nm->ntb, NULL, NULL))
+		return;
+
+	schedule_work(&nm->setup_work);
+}
+
+static void ntb_msit_copy_peer_desc(struct ntb_msit_ctx *nm, int peer)
+{
+	int i;
+	struct ntb_msi_desc *desc = nm->peers[peer].msi_desc;
+	int irq_count = nm->peers[peer].num_irqs;
+
+	for (i = 0; i < irq_count; i++) {
+		desc[i].addr_offset = ntb_peer_spad_read(nm->ntb, peer,
+							 2 * i + 1);
+		desc[i].data = ntb_peer_spad_read(nm->ntb, peer, 2 * i + 2);
+	}
+
+	dev_info(&nm->ntb->dev, "Found %d interrupts on peer %d\n",
+		 irq_count, peer);
+
+	complete_all(&nm->peers[peer].init_comp);
+}
+
+static void ntb_msit_db_event(void *ctx, int vec)
+{
+	struct ntb_msit_ctx *nm = ctx;
+	struct ntb_msi_desc *desc;
+	u64 peer_mask = ntb_db_read(nm->ntb);
+	u32 irq_count;
+	int peer;
+
+	ntb_db_clear(nm->ntb, peer_mask);
+
+	for (peer = 0; peer < sizeof(peer_mask) * 8; peer++) {
+		if (!(peer_mask & BIT(peer)))
+			continue;
+
+		irq_count = ntb_peer_spad_read(nm->ntb, peer, 0);
+		if (irq_count == -1)
+			continue;
+
+		desc = kcalloc(irq_count, sizeof(*desc), GFP_ATOMIC);
+		if (!desc)
+			continue;
+
+		kfree(nm->peers[peer].msi_desc);
+		nm->peers[peer].msi_desc = desc;
+		nm->peers[peer].num_irqs = irq_count;
+
+		ntb_msit_copy_peer_desc(nm, peer);
+	}
+}
+
+static const struct ntb_ctx_ops ntb_msit_ops = {
+	.link_event = ntb_msit_link_event,
+	.db_event = ntb_msit_db_event,
+};
+
+static int ntb_msit_dbgfs_trigger(void *data, u64 idx)
+{
+	struct ntb_msit_peer *peer = data;
+
+	if (idx >= peer->num_irqs)
+		return -EINVAL;
+
+	dev_dbg(&peer->nm->ntb->dev, "trigger irq %llu on peer %u\n",
+		idx, peer->pidx);
+
+	return ntb_msi_peer_trigger(peer->nm->ntb, peer->pidx,
+				    &peer->msi_desc[idx]);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_trigger_fops, NULL,
+			 ntb_msit_dbgfs_trigger, "%llu\n");
+
+static int ntb_msit_dbgfs_port_get(void *data, u64 *port)
+{
+	struct ntb_msit_peer *peer = data;
+
+	*port = ntb_peer_port_number(peer->nm->ntb, peer->pidx);
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_port_fops, ntb_msit_dbgfs_port_get,
+			 NULL, "%llu\n");
+
+static int ntb_msit_dbgfs_count_get(void *data, u64 *count)
+{
+	struct ntb_msit_peer *peer = data;
+
+	*count = peer->num_irqs;
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_count_fops, ntb_msit_dbgfs_count_get,
+			 NULL, "%llu\n");
+
+static int ntb_msit_dbgfs_ready_get(void *data, u64 *ready)
+{
+	struct ntb_msit_peer *peer = data;
+
+	*ready = try_wait_for_completion(&peer->init_comp);
+
+	return 0;
+}
+
+static int ntb_msit_dbgfs_ready_set(void *data, u64 ready)
+{
+	struct ntb_msit_peer *peer = data;
+
+	return wait_for_completion_interruptible(&peer->init_comp);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_ready_fops, ntb_msit_dbgfs_ready_get,
+			 ntb_msit_dbgfs_ready_set, "%llu\n");
+
+static int ntb_msit_dbgfs_occurrences_get(void *data, u64 *occurrences)
+{
+	struct ntb_msit_isr_ctx *isr_ctx = data;
+
+	*occurrences = isr_ctx->occurrences;
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_occurrences_fops,
+			 ntb_msit_dbgfs_occurrences_get,
+			 NULL, "%llu\n");
+
+static int ntb_msit_dbgfs_local_port_get(void *data, u64 *port)
+{
+	struct ntb_msit_ctx *nm = data;
+
+	*port = ntb_port_number(nm->ntb);
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_local_port_fops,
+			 ntb_msit_dbgfs_local_port_get,
+			 NULL, "%llu\n");
+
+static void ntb_msit_create_dbgfs(struct ntb_msit_ctx *nm)
+{
+	struct pci_dev *pdev = nm->ntb->pdev;
+	char buf[32];
+	int i;
+	struct dentry *peer_dir;
+
+	nm->dbgfs_dir = debugfs_create_dir(pci_name(pdev),
+					   ntb_msit_dbgfs_topdir);
+	debugfs_create_file("port", 0400, nm->dbgfs_dir, nm,
+			    &ntb_msit_local_port_fops);
+
+	for (i = 0; i < ntb_peer_port_count(nm->ntb); i++) {
+		nm->peers[i].pidx = i;
+		nm->peers[i].nm = nm;
+		init_completion(&nm->peers[i].init_comp);
+
+		snprintf(buf, sizeof(buf), "peer%d", i);
+		peer_dir = debugfs_create_dir(buf, nm->dbgfs_dir);
+
+		debugfs_create_file_unsafe("trigger", 0200, peer_dir,
+					   &nm->peers[i],
+					   &ntb_msit_trigger_fops);
+
+		debugfs_create_file_unsafe("port", 0400, peer_dir,
+					   &nm->peers[i], &ntb_msit_port_fops);
+
+		debugfs_create_file_unsafe("count", 0400, peer_dir,
+					   &nm->peers[i],
+					   &ntb_msit_count_fops);
+
+		debugfs_create_file_unsafe("ready", 0600, peer_dir,
+					   &nm->peers[i],
+					   &ntb_msit_ready_fops);
+	}
+
+	for (i = 0; i < num_irqs; i++) {
+		snprintf(buf, sizeof(buf), "irq%d_occurrences", i);
+		debugfs_create_file_unsafe(buf, 0400, nm->dbgfs_dir,
+					   &nm->isr_ctx[i],
+					   &ntb_msit_occurrences_fops);
+	}
+}
+
+static void ntb_msit_remove_dbgfs(struct ntb_msit_ctx *nm)
+{
+	debugfs_remove_recursive(nm->dbgfs_dir);
+}
+
+static int ntb_msit_probe(struct ntb_client *client, struct ntb_dev *ntb)
+{
+	struct ntb_msit_ctx *nm;
+	size_t struct_size;
+	int peers;
+	int ret;
+
+	peers = ntb_peer_port_count(ntb);
+	if (peers <= 0)
+		return -EINVAL;
+
+	if (ntb_spad_is_unsafe(ntb) || ntb_spad_count(ntb) < 2 * num_irqs + 1) {
+		dev_err(&ntb->dev, "NTB MSI test requires at least %d spads for %d irqs\n",
+			2 * num_irqs + 1, num_irqs);
+		return -EFAULT;
+	}
+
+	ret = ntb_spad_write(ntb, 0, -1);
+	if (ret) {
+		dev_err(&ntb->dev, "Unable to write spads: %d\n", ret);
+		return ret;
+	}
+
+	ret = ntb_db_clear_mask(ntb, GENMASK(peers - 1, 0));
+	if (ret) {
+		dev_err(&ntb->dev, "Unable to clear doorbell mask: %d\n", ret);
+		return ret;
+	}
+
+	ret = ntb_msi_init(ntb, ntb_msit_desc_changed);
+	if (ret) {
+		dev_err(&ntb->dev, "Unable to initialize MSI library: %d\n",
+			ret);
+		return ret;
+	}
+
+	struct_size = sizeof(*nm) + sizeof(*nm->peers) * peers;
+
+	nm = devm_kzalloc(&ntb->dev, struct_size, GFP_KERNEL);
+	if (!nm)
+		return -ENOMEM;
+
+	nm->isr_ctx = devm_kcalloc(&ntb->dev, num_irqs, sizeof(*nm->isr_ctx),
+				   GFP_KERNEL);
+	if (!nm->isr_ctx)
+		return -ENOMEM;
+
+	INIT_WORK(&nm->setup_work, ntb_msit_setup_work);
+	nm->ntb = ntb;
+
+	ntb_msit_create_dbgfs(nm);
+
+	ret = ntb_set_ctx(ntb, nm, &ntb_msit_ops);
+	if (ret)
+		goto remove_dbgfs;
+
+	if (!nm->isr_ctx)
+		goto remove_dbgfs;
+
+	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+
+	return 0;
+
+remove_dbgfs:
+	ntb_msit_remove_dbgfs(nm);
+	devm_kfree(&ntb->dev, nm->isr_ctx);
+	devm_kfree(&ntb->dev, nm);
+	return ret;
+}
+
+static void ntb_msit_remove(struct ntb_client *client, struct ntb_dev *ntb)
+{
+	struct ntb_msit_ctx *nm = ntb->ctx;
+	int i;
+
+	ntb_link_disable(ntb);
+	ntb_db_set_mask(ntb, ntb_db_valid_mask(ntb));
+	ntb_msi_clear_mws(ntb);
+
+	for (i = 0; i < ntb_peer_port_count(ntb); i++)
+		kfree(nm->peers[i].msi_desc);
+
+	ntb_clear_ctx(ntb);
+	ntb_msit_remove_dbgfs(nm);
+}
+
+static struct ntb_client ntb_msit_client = {
+	.ops = {
+		.probe = ntb_msit_probe,
+		.remove = ntb_msit_remove
+	}
+};
+
+static int __init ntb_msit_init(void)
+{
+	int ret;
+
+	if (debugfs_initialized())
+		ntb_msit_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME,
+							   NULL);
+
+	ret = ntb_register_client(&ntb_msit_client);
+	if (ret)
+		debugfs_remove_recursive(ntb_msit_dbgfs_topdir);
+
+	return ret;
+}
+module_init(ntb_msit_init);
+
+static void __exit ntb_msit_exit(void)
+{
+	ntb_unregister_client(&ntb_msit_client);
+	debugfs_remove_recursive(ntb_msit_dbgfs_topdir);
+}
+module_exit(ntb_msit_exit);
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 11a6cd374004..d028331558ea 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -100,7 +100,7 @@ MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool");
 #define DMA_TRIES		100
 #define DMA_MDELAY		10
 
-#define MSG_TRIES		500
+#define MSG_TRIES		1000
 #define MSG_UDELAY_LOW		1000
 #define MSG_UDELAY_HIGH		2000
 
@@ -734,8 +734,6 @@ static void perf_disable_service(struct perf_ctx *perf)
 {
 	int pidx;
 
-	ntb_link_disable(perf->ntb);
-
 	if (perf->cmd_send == perf_msg_cmd_send) {
 		u64 inbits;
 
@@ -752,6 +750,16 @@ static void perf_disable_service(struct perf_ctx *perf)
 
 	for (pidx = 0; pidx < perf->pcnt; pidx++)
 		flush_work(&perf->peers[pidx].service);
+
+	for (pidx = 0; pidx < perf->pcnt; pidx++) {
+		struct perf_peer *peer = &perf->peers[pidx];
+
+		ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx), 0);
+	}
+
+	ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx));
+
+	ntb_link_disable(perf->ntb);
 }
 
 /*==============================================================================
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 62d00fffa4af..3508a79110c7 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -62,14 +62,14 @@ static ssize_t sector_size_store(struct device *dev,
 	struct nd_btt *nd_btt = to_nd_btt(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	rc = nd_size_select_store(dev, buf, &nd_btt->lbasize,
 			btt_lbasize_supported);
 	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
 			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc ? rc : len;
 }
@@ -91,11 +91,11 @@ static ssize_t uuid_store(struct device *dev,
 	struct nd_btt *nd_btt = to_nd_btt(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
 	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
 			buf[len - 1] == '\n' ? "" : "\n");
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc ? rc : len;
 }
@@ -120,13 +120,13 @@ static ssize_t namespace_store(struct device *dev,
 	struct nd_btt *nd_btt = to_nd_btt(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len);
 	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
 			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
@@ -138,14 +138,14 @@ static ssize_t size_show(struct device *dev,
 	struct nd_btt *nd_btt = to_nd_btt(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	if (dev->driver)
 		rc = sprintf(buf, "%llu\n", nd_btt->size);
 	else {
 		/* no size to convey if the btt instance is disabled */
 		rc = -ENXIO;
 	}
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 2dca3034fee0..798c5c4aea9c 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -26,7 +26,7 @@
 
 int nvdimm_major;
 static int nvdimm_bus_major;
-static struct class *nd_class;
+struct class *nd_class;
 static DEFINE_IDA(nd_ida);
 
 static int to_nd_device_type(struct device *dev)
@@ -73,7 +73,7 @@ static void nvdimm_bus_probe_end(struct nvdimm_bus *nvdimm_bus)
 {
 	nvdimm_bus_lock(&nvdimm_bus->dev);
 	if (--nvdimm_bus->probe_active == 0)
-		wake_up(&nvdimm_bus->probe_wait);
+		wake_up(&nvdimm_bus->wait);
 	nvdimm_bus_unlock(&nvdimm_bus->dev);
 }
 
@@ -91,7 +91,10 @@ static int nvdimm_bus_probe(struct device *dev)
 			dev->driver->name, dev_name(dev));
 
 	nvdimm_bus_probe_start(nvdimm_bus);
+	debug_nvdimm_lock(dev);
 	rc = nd_drv->probe(dev);
+	debug_nvdimm_unlock(dev);
+
 	if (rc == 0)
 		nd_region_probe_success(nvdimm_bus, dev);
 	else
@@ -113,8 +116,11 @@ static int nvdimm_bus_remove(struct device *dev)
 	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
 	int rc = 0;
 
-	if (nd_drv->remove)
+	if (nd_drv->remove) {
+		debug_nvdimm_lock(dev);
 		rc = nd_drv->remove(dev);
+		debug_nvdimm_unlock(dev);
+	}
 	nd_region_disable(nvdimm_bus, dev);
 
 	dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
@@ -140,7 +146,7 @@ static void nvdimm_bus_shutdown(struct device *dev)
 
 void nd_device_notify(struct device *dev, enum nvdimm_event event)
 {
-	device_lock(dev);
+	nd_device_lock(dev);
 	if (dev->driver) {
 		struct nd_device_driver *nd_drv;
 
@@ -148,7 +154,7 @@ void nd_device_notify(struct device *dev, enum nvdimm_event event)
 		if (nd_drv->notify)
 			nd_drv->notify(dev, event);
 	}
-	device_unlock(dev);
+	nd_device_unlock(dev);
 }
 EXPORT_SYMBOL(nd_device_notify);
 
@@ -296,7 +302,7 @@ static void nvdimm_bus_release(struct device *dev)
 	kfree(nvdimm_bus);
 }
 
-static bool is_nvdimm_bus(struct device *dev)
+bool is_nvdimm_bus(struct device *dev)
 {
 	return dev->release == nvdimm_bus_release;
 }
@@ -341,7 +347,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
 		return NULL;
 	INIT_LIST_HEAD(&nvdimm_bus->list);
 	INIT_LIST_HEAD(&nvdimm_bus->mapping_list);
-	init_waitqueue_head(&nvdimm_bus->probe_wait);
+	init_waitqueue_head(&nvdimm_bus->wait);
 	nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
 	if (nvdimm_bus->id < 0) {
 		kfree(nvdimm_bus);
@@ -426,6 +432,9 @@ static int nd_bus_remove(struct device *dev)
 	list_del_init(&nvdimm_bus->list);
 	mutex_unlock(&nvdimm_bus_list_mutex);
 
+	wait_event(nvdimm_bus->wait,
+			atomic_read(&nvdimm_bus->ioctl_active) == 0);
+
 	nd_synchronize();
 	device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
 
@@ -547,13 +556,38 @@ EXPORT_SYMBOL(nd_device_register);
 
 void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
 {
+	bool killed;
+
 	switch (mode) {
 	case ND_ASYNC:
+		/*
+		 * In the async case this is being triggered with the
+		 * device lock held and the unregistration work needs to
+		 * be moved out of line iff this is thread has won the
+		 * race to schedule the deletion.
+		 */
+		if (!kill_device(dev))
+			return;
+
 		get_device(dev);
 		async_schedule_domain(nd_async_device_unregister, dev,
 				&nd_async_domain);
 		break;
 	case ND_SYNC:
+		/*
+		 * In the sync case the device is being unregistered due
+		 * to a state change of the parent. Claim the kill state
+		 * to synchronize against other unregistration requests,
+		 * or otherwise let the async path handle it if the
+		 * unregistration was already queued.
+		 */
+		nd_device_lock(dev);
+		killed = kill_device(dev);
+		nd_device_unlock(dev);
+
+		if (!killed)
+			return;
+
 		nd_synchronize();
 		device_unregister(dev);
 		break;
@@ -859,10 +893,12 @@ void wait_nvdimm_bus_probe_idle(struct device *dev)
 	do {
 		if (nvdimm_bus->probe_active == 0)
 			break;
-		nvdimm_bus_unlock(&nvdimm_bus->dev);
-		wait_event(nvdimm_bus->probe_wait,
+		nvdimm_bus_unlock(dev);
+		nd_device_unlock(dev);
+		wait_event(nvdimm_bus->wait,
 				nvdimm_bus->probe_active == 0);
-		nvdimm_bus_lock(&nvdimm_bus->dev);
+		nd_device_lock(dev);
+		nvdimm_bus_lock(dev);
 	} while (true);
 }
 
@@ -945,20 +981,19 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 		int read_only, unsigned int ioctl_cmd, unsigned long arg)
 {
 	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
-	static char out_env[ND_CMD_MAX_ENVELOPE];
-	static char in_env[ND_CMD_MAX_ENVELOPE];
 	const struct nd_cmd_desc *desc = NULL;
 	unsigned int cmd = _IOC_NR(ioctl_cmd);
 	struct device *dev = &nvdimm_bus->dev;
 	void __user *p = (void __user *) arg;
+	char *out_env = NULL, *in_env = NULL;
 	const char *cmd_name, *dimm_name;
 	u32 in_len = 0, out_len = 0;
 	unsigned int func = cmd;
 	unsigned long cmd_mask;
 	struct nd_cmd_pkg pkg;
 	int rc, i, cmd_rc;
+	void *buf = NULL;
 	u64 buf_len = 0;
-	void *buf;
 
 	if (nvdimm) {
 		desc = nd_cmd_dimm_desc(cmd);
@@ -989,7 +1024,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 		case ND_CMD_ARS_START:
 		case ND_CMD_CLEAR_ERROR:
 		case ND_CMD_CALL:
-			dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n",
+			dev_dbg(dev, "'%s' command while read-only.\n",
 					nvdimm ? nvdimm_cmd_name(cmd)
 					: nvdimm_bus_cmd_name(cmd));
 			return -EPERM;
@@ -998,6 +1033,9 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 		}
 
 	/* process an input envelope */
+	in_env = kzalloc(ND_CMD_MAX_ENVELOPE, GFP_KERNEL);
+	if (!in_env)
+		return -ENOMEM;
 	for (i = 0; i < desc->in_num; i++) {
 		u32 in_size, copy;
 
@@ -1005,14 +1043,17 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 		if (in_size == UINT_MAX) {
 			dev_err(dev, "%s:%s unknown input size cmd: %s field: %d\n",
 					__func__, dimm_name, cmd_name, i);
-			return -ENXIO;
+			rc = -ENXIO;
+			goto out;
 		}
-		if (in_len < sizeof(in_env))
-			copy = min_t(u32, sizeof(in_env) - in_len, in_size);
+		if (in_len < ND_CMD_MAX_ENVELOPE)
+			copy = min_t(u32, ND_CMD_MAX_ENVELOPE - in_len, in_size);
 		else
 			copy = 0;
-		if (copy && copy_from_user(&in_env[in_len], p + in_len, copy))
-			return -EFAULT;
+		if (copy && copy_from_user(&in_env[in_len], p + in_len, copy)) {
+			rc = -EFAULT;
+			goto out;
+		}
 		in_len += in_size;
 	}
 
@@ -1024,6 +1065,12 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 	}
 
 	/* process an output envelope */
+	out_env = kzalloc(ND_CMD_MAX_ENVELOPE, GFP_KERNEL);
+	if (!out_env) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
 	for (i = 0; i < desc->out_num; i++) {
 		u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i,
 				(u32 *) in_env, (u32 *) out_env, 0);
@@ -1032,15 +1079,18 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 		if (out_size == UINT_MAX) {
 			dev_dbg(dev, "%s unknown output size cmd: %s field: %d\n",
 					dimm_name, cmd_name, i);
-			return -EFAULT;
+			rc = -EFAULT;
+			goto out;
 		}
-		if (out_len < sizeof(out_env))
-			copy = min_t(u32, sizeof(out_env) - out_len, out_size);
+		if (out_len < ND_CMD_MAX_ENVELOPE)
+			copy = min_t(u32, ND_CMD_MAX_ENVELOPE - out_len, out_size);
 		else
 			copy = 0;
 		if (copy && copy_from_user(&out_env[out_len],
-					p + in_len + out_len, copy))
-			return -EFAULT;
+					p + in_len + out_len, copy)) {
+			rc = -EFAULT;
+			goto out;
+		}
 		out_len += out_size;
 	}
 
@@ -1048,19 +1098,23 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 	if (buf_len > ND_IOCTL_MAX_BUFLEN) {
 		dev_dbg(dev, "%s cmd: %s buf_len: %llu > %d\n", dimm_name,
 				cmd_name, buf_len, ND_IOCTL_MAX_BUFLEN);
-		return -EINVAL;
+		rc = -EINVAL;
+		goto out;
 	}
 
 	buf = vmalloc(buf_len);
-	if (!buf)
-		return -ENOMEM;
+	if (!buf) {
+		rc = -ENOMEM;
+		goto out;
+	}
 
 	if (copy_from_user(buf, p, buf_len)) {
 		rc = -EFAULT;
 		goto out;
 	}
 
-	nvdimm_bus_lock(&nvdimm_bus->dev);
+	nd_device_lock(dev);
+	nvdimm_bus_lock(dev);
 	rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf);
 	if (rc)
 		goto out_unlock;
@@ -1075,39 +1129,24 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 		nvdimm_account_cleared_poison(nvdimm_bus, clear_err->address,
 				clear_err->cleared);
 	}
-	nvdimm_bus_unlock(&nvdimm_bus->dev);
 
 	if (copy_to_user(p, buf, buf_len))
 		rc = -EFAULT;
 
-	vfree(buf);
-	return rc;
-
- out_unlock:
-	nvdimm_bus_unlock(&nvdimm_bus->dev);
- out:
+out_unlock:
+	nvdimm_bus_unlock(dev);
+	nd_device_unlock(dev);
+out:
+	kfree(in_env);
+	kfree(out_env);
 	vfree(buf);
 	return rc;
 }
 
-static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	long id = (long) file->private_data;
-	int rc = -ENXIO, ro;
-	struct nvdimm_bus *nvdimm_bus;
-
-	ro = ((file->f_flags & O_ACCMODE) == O_RDONLY);
-	mutex_lock(&nvdimm_bus_list_mutex);
-	list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
-		if (nvdimm_bus->id == id) {
-			rc = __nd_ioctl(nvdimm_bus, NULL, ro, cmd, arg);
-			break;
-		}
-	}
-	mutex_unlock(&nvdimm_bus_list_mutex);
-
-	return rc;
-}
+enum nd_ioctl_mode {
+	BUS_IOCTL,
+	DIMM_IOCTL,
+};
 
 static int match_dimm(struct device *dev, void *data)
 {
@@ -1122,31 +1161,62 @@ static int match_dimm(struct device *dev, void *data)
 	return 0;
 }
 
-static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
+		enum nd_ioctl_mode mode)
+
 {
-	int rc = -ENXIO, ro;
-	struct nvdimm_bus *nvdimm_bus;
+	struct nvdimm_bus *nvdimm_bus, *found = NULL;
+	long id = (long) file->private_data;
+	struct nvdimm *nvdimm = NULL;
+	int rc, ro;
 
 	ro = ((file->f_flags & O_ACCMODE) == O_RDONLY);
 	mutex_lock(&nvdimm_bus_list_mutex);
 	list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
-		struct device *dev = device_find_child(&nvdimm_bus->dev,
-				file->private_data, match_dimm);
-		struct nvdimm *nvdimm;
-
-		if (!dev)
-			continue;
+		if (mode == DIMM_IOCTL) {
+			struct device *dev;
+
+			dev = device_find_child(&nvdimm_bus->dev,
+					file->private_data, match_dimm);
+			if (!dev)
+				continue;
+			nvdimm = to_nvdimm(dev);
+			found = nvdimm_bus;
+		} else if (nvdimm_bus->id == id) {
+			found = nvdimm_bus;
+		}
 
-		nvdimm = to_nvdimm(dev);
-		rc = __nd_ioctl(nvdimm_bus, nvdimm, ro, cmd, arg);
-		put_device(dev);
-		break;
+		if (found) {
+			atomic_inc(&nvdimm_bus->ioctl_active);
+			break;
+		}
 	}
 	mutex_unlock(&nvdimm_bus_list_mutex);
 
+	if (!found)
+		return -ENXIO;
+
+	nvdimm_bus = found;
+	rc = __nd_ioctl(nvdimm_bus, nvdimm, ro, cmd, arg);
+
+	if (nvdimm)
+		put_device(&nvdimm->dev);
+	if (atomic_dec_and_test(&nvdimm_bus->ioctl_active))
+		wake_up(&nvdimm_bus->wait);
+
 	return rc;
 }
 
+static long bus_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	return nd_ioctl(file, cmd, arg, BUS_IOCTL);
+}
+
+static long dimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	return nd_ioctl(file, cmd, arg, DIMM_IOCTL);
+}
+
 static int nd_open(struct inode *inode, struct file *file)
 {
 	long minor = iminor(inode);
@@ -1158,16 +1228,16 @@ static int nd_open(struct inode *inode, struct file *file)
 static const struct file_operations nvdimm_bus_fops = {
 	.owner = THIS_MODULE,
 	.open = nd_open,
-	.unlocked_ioctl = nd_ioctl,
-	.compat_ioctl = nd_ioctl,
+	.unlocked_ioctl = bus_ioctl,
+	.compat_ioctl = bus_ioctl,
 	.llseek = noop_llseek,
 };
 
 static const struct file_operations nvdimm_fops = {
 	.owner = THIS_MODULE,
 	.open = nd_open,
-	.unlocked_ioctl = nvdimm_ioctl,
-	.compat_ioctl = nvdimm_ioctl,
+	.unlocked_ioctl = dimm_ioctl,
+	.compat_ioctl = dimm_ioctl,
 	.llseek = noop_llseek,
 };
 
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 5e1f060547bf..9204f1e9fd14 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -246,7 +246,7 @@ static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
  *
  * Enforce that uuids can only be changed while the device is disabled
  * (driver detached)
- * LOCKING: expects device_lock() is held on entry
+ * LOCKING: expects nd_device_lock() is held on entry
  */
 int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
 		size_t len)
@@ -347,15 +347,15 @@ static DEVICE_ATTR_RO(provider);
 
 static int flush_namespaces(struct device *dev, void *data)
 {
-	device_lock(dev);
-	device_unlock(dev);
+	nd_device_lock(dev);
+	nd_device_unlock(dev);
 	return 0;
 }
 
 static int flush_regions_dimms(struct device *dev, void *data)
 {
-	device_lock(dev);
-	device_unlock(dev);
+	nd_device_lock(dev);
+	nd_device_unlock(dev);
 	device_for_each_child(dev, NULL, flush_namespaces);
 	return 0;
 }
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index dfecd6e17043..29a065e769ea 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -484,12 +484,12 @@ static ssize_t security_store(struct device *dev,
 	 * done while probing is idle and the DIMM is not in active use
 	 * in any region.
 	 */
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	wait_nvdimm_bus_probe_idle(dev);
 	rc = __security_store(dev, buf, len);
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 2d8d7e554877..a16e52251a30 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -410,7 +410,7 @@ static ssize_t alt_name_store(struct device *dev,
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	wait_nvdimm_bus_probe_idle(dev);
 	rc = __alt_name_store(dev, buf, len);
@@ -418,7 +418,7 @@ static ssize_t alt_name_store(struct device *dev,
 		rc = nd_namespace_label_update(nd_region, dev);
 	dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc < 0 ? rc : len;
 }
@@ -1077,7 +1077,7 @@ static ssize_t size_store(struct device *dev,
 	if (rc)
 		return rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	wait_nvdimm_bus_probe_idle(dev);
 	rc = __size_store(dev, val);
@@ -1103,7 +1103,7 @@ static ssize_t size_store(struct device *dev,
 	dev_dbg(dev, "%llx %s (%d)\n", val, rc < 0 ? "fail" : "success", rc);
 
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc < 0 ? rc : len;
 }
@@ -1286,7 +1286,7 @@ static ssize_t uuid_store(struct device *dev,
 	} else
 		return -ENXIO;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	wait_nvdimm_bus_probe_idle(dev);
 	if (to_ndns(dev)->claim)
@@ -1302,7 +1302,7 @@ static ssize_t uuid_store(struct device *dev,
 	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
 			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc < 0 ? rc : len;
 }
@@ -1376,7 +1376,7 @@ static ssize_t sector_size_store(struct device *dev,
 	} else
 		return -ENXIO;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	if (to_ndns(dev)->claim)
 		rc = -EBUSY;
@@ -1387,7 +1387,7 @@ static ssize_t sector_size_store(struct device *dev,
 	dev_dbg(dev, "result: %zd %s: %s%s", rc, rc < 0 ? "tried" : "wrote",
 			buf, buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc ? rc : len;
 }
@@ -1502,9 +1502,9 @@ static ssize_t holder_show(struct device *dev,
 	struct nd_namespace_common *ndns = to_ndns(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	rc = sprintf(buf, "%s\n", ndns->claim ? dev_name(ndns->claim) : "");
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
@@ -1541,7 +1541,7 @@ static ssize_t holder_class_store(struct device *dev,
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	wait_nvdimm_bus_probe_idle(dev);
 	rc = __holder_class_store(dev, buf);
@@ -1549,7 +1549,7 @@ static ssize_t holder_class_store(struct device *dev,
 		rc = nd_namespace_label_update(nd_region, dev);
 	dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc < 0 ? rc : len;
 }
@@ -1560,7 +1560,7 @@ static ssize_t holder_class_show(struct device *dev,
 	struct nd_namespace_common *ndns = to_ndns(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	if (ndns->claim_class == NVDIMM_CCLASS_NONE)
 		rc = sprintf(buf, "\n");
 	else if ((ndns->claim_class == NVDIMM_CCLASS_BTT) ||
@@ -1572,7 +1572,7 @@ static ssize_t holder_class_show(struct device *dev,
 		rc = sprintf(buf, "dax\n");
 	else
 		rc = sprintf(buf, "<unknown>\n");
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
@@ -1586,7 +1586,7 @@ static ssize_t mode_show(struct device *dev,
 	char *mode;
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	claim = ndns->claim;
 	if (claim && is_nd_btt(claim))
 		mode = "safe";
@@ -1599,7 +1599,7 @@ static ssize_t mode_show(struct device *dev,
 	else
 		mode = "raw";
 	rc = sprintf(buf, "%s\n", mode);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
@@ -1703,8 +1703,8 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
 		 * Flush any in-progess probes / removals in the driver
 		 * for the raw personality of this namespace.
 		 */
-		device_lock(&ndns->dev);
-		device_unlock(&ndns->dev);
+		nd_device_lock(&ndns->dev);
+		nd_device_unlock(&ndns->dev);
 		if (ndns->dev.driver) {
 			dev_dbg(&ndns->dev, "is active, can't bind %s\n",
 					dev_name(dev));
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 391e88de3a29..0ac52b6eb00e 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -9,6 +9,7 @@
 #include <linux/sizes.h>
 #include <linux/mutex.h>
 #include <linux/nd.h>
+#include "nd.h"
 
 extern struct list_head nvdimm_bus_list;
 extern struct mutex nvdimm_bus_list_mutex;
@@ -17,10 +18,11 @@ extern struct workqueue_struct *nvdimm_wq;
 
 struct nvdimm_bus {
 	struct nvdimm_bus_descriptor *nd_desc;
-	wait_queue_head_t probe_wait;
+	wait_queue_head_t wait;
 	struct list_head list;
 	struct device dev;
 	int id, probe_active;
+	atomic_t ioctl_active;
 	struct list_head mapping_list;
 	struct mutex reconfig_mutex;
 	struct badrange badrange;
@@ -181,4 +183,71 @@ ssize_t nd_namespace_store(struct device *dev,
 		struct nd_namespace_common **_ndns, const char *buf,
 		size_t len);
 struct nd_pfn *to_nd_pfn_safe(struct device *dev);
+bool is_nvdimm_bus(struct device *dev);
+
+#ifdef CONFIG_PROVE_LOCKING
+extern struct class *nd_class;
+
+enum {
+	LOCK_BUS,
+	LOCK_NDCTL,
+	LOCK_REGION,
+	LOCK_DIMM = LOCK_REGION,
+	LOCK_NAMESPACE,
+	LOCK_CLAIM,
+};
+
+static inline void debug_nvdimm_lock(struct device *dev)
+{
+	if (is_nd_region(dev))
+		mutex_lock_nested(&dev->lockdep_mutex, LOCK_REGION);
+	else if (is_nvdimm(dev))
+		mutex_lock_nested(&dev->lockdep_mutex, LOCK_DIMM);
+	else if (is_nd_btt(dev) || is_nd_pfn(dev) || is_nd_dax(dev))
+		mutex_lock_nested(&dev->lockdep_mutex, LOCK_CLAIM);
+	else if (dev->parent && (is_nd_region(dev->parent)))
+		mutex_lock_nested(&dev->lockdep_mutex, LOCK_NAMESPACE);
+	else if (is_nvdimm_bus(dev))
+		mutex_lock_nested(&dev->lockdep_mutex, LOCK_BUS);
+	else if (dev->class && dev->class == nd_class)
+		mutex_lock_nested(&dev->lockdep_mutex, LOCK_NDCTL);
+	else
+		dev_WARN(dev, "unknown lock level\n");
+}
+
+static inline void debug_nvdimm_unlock(struct device *dev)
+{
+	mutex_unlock(&dev->lockdep_mutex);
+}
+
+static inline void nd_device_lock(struct device *dev)
+{
+	device_lock(dev);
+	debug_nvdimm_lock(dev);
+}
+
+static inline void nd_device_unlock(struct device *dev)
+{
+	debug_nvdimm_unlock(dev);
+	device_unlock(dev);
+}
+#else
+static inline void nd_device_lock(struct device *dev)
+{
+	device_lock(dev);
+}
+
+static inline void nd_device_unlock(struct device *dev)
+{
+	device_unlock(dev);
+}
+
+static inline void debug_nvdimm_lock(struct device *dev)
+{
+}
+
+static inline void debug_nvdimm_unlock(struct device *dev)
+{
+}
+#endif
 #endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index df2bdbd22450..3e7b11cf1aae 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -67,7 +67,7 @@ static ssize_t mode_store(struct device *dev,
 	struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
 	ssize_t rc = 0;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	if (dev->driver)
 		rc = -EBUSY;
@@ -89,7 +89,7 @@ static ssize_t mode_store(struct device *dev,
 	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
 			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc ? rc : len;
 }
@@ -132,14 +132,14 @@ static ssize_t align_store(struct device *dev,
 	struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	rc = nd_size_select_store(dev, buf, &nd_pfn->align,
 			nd_pfn_supported_alignments());
 	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
 			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc ? rc : len;
 }
@@ -161,11 +161,11 @@ static ssize_t uuid_store(struct device *dev,
 	struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len);
 	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
 			buf[len - 1] == '\n' ? "" : "\n");
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc ? rc : len;
 }
@@ -190,13 +190,13 @@ static ssize_t namespace_store(struct device *dev,
 	struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len);
 	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
 			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
@@ -208,7 +208,7 @@ static ssize_t resource_show(struct device *dev,
 	struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	if (dev->driver) {
 		struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
 		u64 offset = __le64_to_cpu(pfn_sb->dataoff);
@@ -222,7 +222,7 @@ static ssize_t resource_show(struct device *dev,
 		/* no address to convey if the pfn instance is disabled */
 		rc = -ENXIO;
 	}
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
@@ -234,7 +234,7 @@ static ssize_t size_show(struct device *dev,
 	struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	if (dev->driver) {
 		struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
 		u64 offset = __le64_to_cpu(pfn_sb->dataoff);
@@ -250,7 +250,7 @@ static ssize_t size_show(struct device *dev,
 		/* no size to convey if the pfn instance is disabled */
 		rc = -ENXIO;
 	}
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 2bf3acd69613..4c121dd03dd9 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -522,8 +522,8 @@ static int nd_pmem_remove(struct device *dev)
 		nvdimm_namespace_detach_btt(to_nd_btt(dev));
 	else {
 		/*
-		 * Note, this assumes device_lock() context to not race
-		 * nd_pmem_notify()
+		 * Note, this assumes nd_device_lock() context to not
+		 * race nd_pmem_notify()
 		 */
 		sysfs_put(pmem->bb_state);
 		pmem->bb_state = NULL;
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index ef46cc3a71ae..37bf8719a2a4 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -34,17 +34,6 @@ static int nd_region_probe(struct device *dev)
 	if (rc)
 		return rc;
 
-	rc = nd_region_register_namespaces(nd_region, &err);
-	if (rc < 0)
-		return rc;
-
-	ndrd = dev_get_drvdata(dev);
-	ndrd->ns_active = rc;
-	ndrd->ns_count = rc + err;
-
-	if (rc && err && rc == err)
-		return -ENODEV;
-
 	if (is_nd_pmem(&nd_region->dev)) {
 		struct resource ndr_res;
 
@@ -60,6 +49,17 @@ static int nd_region_probe(struct device *dev)
 		nvdimm_badblocks_populate(nd_region, &nd_region->bb, &ndr_res);
 	}
 
+	rc = nd_region_register_namespaces(nd_region, &err);
+	if (rc < 0)
+		return rc;
+
+	ndrd = dev_get_drvdata(dev);
+	ndrd->ns_active = rc;
+	ndrd->ns_count = rc + err;
+
+	if (rc && err && rc == err)
+		return -ENODEV;
+
 	nd_region->btt_seed = nd_btt_create(nd_region);
 	nd_region->pfn_seed = nd_pfn_create(nd_region);
 	nd_region->dax_seed = nd_dax_create(nd_region);
@@ -102,7 +102,7 @@ static int nd_region_remove(struct device *dev)
 	nvdimm_bus_unlock(dev);
 
 	/*
-	 * Note, this assumes device_lock() context to not race
+	 * Note, this assumes nd_device_lock() context to not race
 	 * nd_region_notify()
 	 */
 	sysfs_put(nd_region->bb_state);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 56f2227f192a..af30cbe7a8ea 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -331,7 +331,7 @@ static ssize_t set_cookie_show(struct device *dev,
 	 * the v1.1 namespace label cookie definition. To read all this
 	 * data we need to wait for probing to settle.
 	 */
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	wait_nvdimm_bus_probe_idle(dev);
 	if (nd_region->ndr_mappings) {
@@ -348,7 +348,7 @@ static ssize_t set_cookie_show(struct device *dev,
 		}
 	}
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	if (rc)
 		return rc;
@@ -424,10 +424,12 @@ static ssize_t available_size_show(struct device *dev,
 	 * memory nvdimm_bus_lock() is dropped, but that's userspace's
 	 * problem to not race itself.
 	 */
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	wait_nvdimm_bus_probe_idle(dev);
 	available = nd_region_available_dpa(nd_region);
 	nvdimm_bus_unlock(dev);
+	nd_device_unlock(dev);
 
 	return sprintf(buf, "%llu\n", available);
 }
@@ -439,10 +441,12 @@ static ssize_t max_available_extent_show(struct device *dev,
 	struct nd_region *nd_region = to_nd_region(dev);
 	unsigned long long available = 0;
 
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	wait_nvdimm_bus_probe_idle(dev);
 	available = nd_region_allocatable_dpa(nd_region);
 	nvdimm_bus_unlock(dev);
+	nd_device_unlock(dev);
 
 	return sprintf(buf, "%llu\n", available);
 }
@@ -561,12 +565,12 @@ static ssize_t region_badblocks_show(struct device *dev,
 	struct nd_region *nd_region = to_nd_region(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	if (dev->driver)
 		rc = badblocks_show(&nd_region->bb, buf, 0);
 	else
 		rc = -ENXIO;
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index cc09b81fc7f4..8f3fbe5ca937 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2311,17 +2311,15 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct
 	memset(subsys->subnqn + off, 0, sizeof(subsys->subnqn) - off);
 }
 
-static void __nvme_release_subsystem(struct nvme_subsystem *subsys)
+static void nvme_release_subsystem(struct device *dev)
 {
+	struct nvme_subsystem *subsys =
+		container_of(dev, struct nvme_subsystem, dev);
+
 	ida_simple_remove(&nvme_subsystems_ida, subsys->instance);
 	kfree(subsys);
 }
 
-static void nvme_release_subsystem(struct device *dev)
-{
-	__nvme_release_subsystem(container_of(dev, struct nvme_subsystem, dev));
-}
-
 static void nvme_destroy_subsystem(struct kref *ref)
 {
 	struct nvme_subsystem *subsys =
@@ -2477,7 +2475,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 	mutex_lock(&nvme_subsystems_lock);
 	found = __nvme_find_get_subsystem(subsys->subnqn);
 	if (found) {
-		__nvme_release_subsystem(subsys);
+		put_device(&subsys->dev);
 		subsys = found;
 
 		if (!nvme_validate_cntlid(subsys, ctrl, id)) {
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index a9a927677970..4f0d0d12744e 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -12,11 +12,6 @@ module_param(multipath, bool, 0444);
 MODULE_PARM_DESC(multipath,
 	"turn on native support for multiple controllers per subsystem");
 
-inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
-{
-	return multipath && ctrl->subsys && (ctrl->subsys->cmic & (1 << 3));
-}
-
 /*
  * If multipathing is enabled we need to always use the subsystem instance
  * number for numbering our devices to avoid conflicts between subsystems that
@@ -622,7 +617,8 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 {
 	int error;
 
-	if (!nvme_ctrl_use_ana(ctrl))
+	/* check if multipath is enabled and we have the capability */
+	if (!multipath || !ctrl->subsys || !(ctrl->subsys->cmic & (1 << 3)))
 		return 0;
 
 	ctrl->anacap = id->anacap;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 716a876119c8..26b563f9985b 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -485,7 +485,11 @@ extern const struct attribute_group *nvme_ns_id_attr_groups[];
 extern const struct block_device_operations nvme_ns_head_ops;
 
 #ifdef CONFIG_NVME_MULTIPATH
-bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl);
+static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
+{
+	return ctrl->ana_log_buf != NULL;
+}
+
 void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
 			struct nvme_ctrl *ctrl, int *flags);
 void nvme_failover_req(struct request *req);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index bb970ca82517..db160cee42ad 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2254,9 +2254,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	if (!dev->ctrl.tagset) {
 		dev->tagset.ops = &nvme_mq_ops;
 		dev->tagset.nr_hw_queues = dev->online_queues - 1;
-		dev->tagset.nr_maps = 1; /* default */
-		if (dev->io_queues[HCTX_TYPE_READ])
-			dev->tagset.nr_maps++;
+		dev->tagset.nr_maps = 2; /* default + read */
 		if (dev->io_queues[HCTX_TYPE_POLL])
 			dev->tagset.nr_maps++;
 		dev->tagset.timeout = NVME_IO_TIMEOUT;
@@ -3029,6 +3027,8 @@ static const struct pci_device_id nvme_id_table[] = {
 		.driver_data = NVME_QUIRK_LIGHTNVM, },
 	{ PCI_DEVICE(0x1d1d, 0x2601),	/* CNEX Granby */
 		.driver_data = NVME_QUIRK_LIGHTNVM, },
+	{ PCI_DEVICE(0x10ec, 0x5762),   /* ADATA SX6000LNP */
+		.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
 	{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 59a6d232f77a..0884bedcfc7a 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -192,6 +192,9 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 
 static void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
 {
+	if (desc->msi_attrib.is_virtual)
+		return NULL;
+
 	return desc->mask_base +
 		desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
 }
@@ -206,14 +209,19 @@ static void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
 u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag)
 {
 	u32 mask_bits = desc->masked;
+	void __iomem *desc_addr;
 
 	if (pci_msi_ignore_mask)
 		return 0;
+	desc_addr = pci_msix_desc_addr(desc);
+	if (!desc_addr)
+		return 0;
 
 	mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
 	if (flag)
 		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
-	writel(mask_bits, pci_msix_desc_addr(desc) + PCI_MSIX_ENTRY_VECTOR_CTRL);
+
+	writel(mask_bits, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
 
 	return mask_bits;
 }
@@ -273,6 +281,11 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 	if (entry->msi_attrib.is_msix) {
 		void __iomem *base = pci_msix_desc_addr(entry);
 
+		if (!base) {
+			WARN_ON(1);
+			return;
+		}
+
 		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
 		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
 		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
@@ -303,6 +316,9 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 	} else if (entry->msi_attrib.is_msix) {
 		void __iomem *base = pci_msix_desc_addr(entry);
 
+		if (!base)
+			goto skip;
+
 		writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
 		writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
 		writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
@@ -327,7 +343,13 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 					      msg->data);
 		}
 	}
+
+skip:
 	entry->msg = *msg;
+
+	if (entry->write_msi_msg)
+		entry->write_msi_msg(entry, entry->write_msi_msg_data);
+
 }
 
 void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
@@ -550,6 +572,7 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
 
 	entry->msi_attrib.is_msix	= 0;
 	entry->msi_attrib.is_64		= !!(control & PCI_MSI_FLAGS_64BIT);
+	entry->msi_attrib.is_virtual    = 0;
 	entry->msi_attrib.entry_nr	= 0;
 	entry->msi_attrib.maskbit	= !!(control & PCI_MSI_FLAGS_MASKBIT);
 	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
@@ -674,6 +697,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
 	struct irq_affinity_desc *curmsk, *masks = NULL;
 	struct msi_desc *entry;
 	int ret, i;
+	int vec_count = pci_msix_vec_count(dev);
 
 	if (affd)
 		masks = irq_create_affinity_masks(nvec, affd);
@@ -696,6 +720,10 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
 			entry->msi_attrib.entry_nr = entries[i].entry;
 		else
 			entry->msi_attrib.entry_nr = i;
+
+		entry->msi_attrib.is_virtual =
+			entry->msi_attrib.entry_nr >= vec_count;
+
 		entry->msi_attrib.default_irq	= dev->irq;
 		entry->mask_base		= base;
 
@@ -714,12 +742,19 @@ static void msix_program_entries(struct pci_dev *dev,
 {
 	struct msi_desc *entry;
 	int i = 0;
+	void __iomem *desc_addr;
 
 	for_each_pci_msi_entry(entry, dev) {
 		if (entries)
 			entries[i++].vector = entry->irq;
-		entry->masked = readl(pci_msix_desc_addr(entry) +
-				PCI_MSIX_ENTRY_VECTOR_CTRL);
+
+		desc_addr = pci_msix_desc_addr(entry);
+		if (desc_addr)
+			entry->masked = readl(desc_addr +
+					      PCI_MSIX_ENTRY_VECTOR_CTRL);
+		else
+			entry->masked = 0;
+
 		msix_mask_irq(entry, 1);
 	}
 }
@@ -932,7 +967,7 @@ int pci_msix_vec_count(struct pci_dev *dev)
 EXPORT_SYMBOL(pci_msix_vec_count);
 
 static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
-			     int nvec, struct irq_affinity *affd)
+			     int nvec, struct irq_affinity *affd, int flags)
 {
 	int nr_entries;
 	int i, j;
@@ -943,7 +978,7 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
 	nr_entries = pci_msix_vec_count(dev);
 	if (nr_entries < 0)
 		return nr_entries;
-	if (nvec > nr_entries)
+	if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL))
 		return nr_entries;
 
 	if (entries) {
@@ -1079,7 +1114,8 @@ EXPORT_SYMBOL(pci_enable_msi);
 
 static int __pci_enable_msix_range(struct pci_dev *dev,
 				   struct msix_entry *entries, int minvec,
-				   int maxvec, struct irq_affinity *affd)
+				   int maxvec, struct irq_affinity *affd,
+				   int flags)
 {
 	int rc, nvec = maxvec;
 
@@ -1096,7 +1132,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
 				return -ENOSPC;
 		}
 
-		rc = __pci_enable_msix(dev, entries, nvec, affd);
+		rc = __pci_enable_msix(dev, entries, nvec, affd, flags);
 		if (rc == 0)
 			return nvec;
 
@@ -1127,7 +1163,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
 int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
 		int minvec, int maxvec)
 {
-	return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL);
+	return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0);
 }
 EXPORT_SYMBOL(pci_enable_msix_range);
 
@@ -1167,7 +1203,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
 
 	if (flags & PCI_IRQ_MSIX) {
 		msix_vecs = __pci_enable_msix_range(dev, NULL, min_vecs,
-						    max_vecs, affd);
+						    max_vecs, affd, flags);
 		if (msix_vecs > 0)
 			return msix_vecs;
 	}
diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index bebbde4ebec0..8c94cd3fd1f2 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -30,6 +30,10 @@ module_param(use_dma_mrpc, bool, 0644);
 MODULE_PARM_DESC(use_dma_mrpc,
 		 "Enable the use of the DMA MRPC feature");
 
+static int nirqs = 32;
+module_param(nirqs, int, 0644);
+MODULE_PARM_DESC(nirqs, "number of interrupts to allocate (more may be useful for NTB applications)");
+
 static dev_t switchtec_devt;
 static DEFINE_IDA(switchtec_minor_ida);
 
@@ -1263,8 +1267,12 @@ static int switchtec_init_isr(struct switchtec_dev *stdev)
 	int dma_mrpc_irq;
 	int rc;
 
-	nvecs = pci_alloc_irq_vectors(stdev->pdev, 1, 4,
-				      PCI_IRQ_MSIX | PCI_IRQ_MSI);
+	if (nirqs < 4)
+		nirqs = 4;
+
+	nvecs = pci_alloc_irq_vectors(stdev->pdev, 1, nirqs,
+				      PCI_IRQ_MSIX | PCI_IRQ_MSI |
+				      PCI_IRQ_VIRTUAL);
 	if (nvecs < 0)
 		return nvecs;
 
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 9fd6dd342169..6df481896b5f 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -1454,7 +1454,7 @@ static void __exit rapl_exit(void)
 	unregister_pm_notifier(&rapl_pm_notifier);
 }
 
-module_init(rapl_init);
+fs_initcall(rapl_init);
 module_exit(rapl_exit);
 
 MODULE_DESCRIPTION("Intel Runtime Average Power Limit (RAPL) common code");
diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c
index 540e8aafc990..f808c5fa9838 100644
--- a/drivers/powercap/powercap_sys.c
+++ b/drivers/powercap/powercap_sys.c
@@ -671,7 +671,7 @@ static int __init powercap_init(void)
 	return class_register(&powercap_class);
 }
 
-device_initcall(powercap_init);
+fs_initcall(powercap_init);
 
 MODULE_DESCRIPTION("PowerCap sysfs Driver");
 MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index e8fc28dba8df..96f0d34e9459 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -11,6 +11,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/kthread.h>
+#include <linux/bug.h>
 #include "zfcp_ext.h"
 #include "zfcp_reqlist.h"
 
@@ -217,6 +218,12 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(enum zfcp_erp_act_type need,
 	struct zfcp_erp_action *erp_action;
 	struct zfcp_scsi_dev *zfcp_sdev;
 
+	if (WARN_ON_ONCE(need != ZFCP_ERP_ACTION_REOPEN_LUN &&
+			 need != ZFCP_ERP_ACTION_REOPEN_PORT &&
+			 need != ZFCP_ERP_ACTION_REOPEN_PORT_FORCED &&
+			 need != ZFCP_ERP_ACTION_REOPEN_ADAPTER))
+		return NULL;
+
 	switch (need) {
 	case ZFCP_ERP_ACTION_REOPEN_LUN:
 		zfcp_sdev = sdev_to_zfcp(sdev);
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index d94496ee6883..296bbc3c4606 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -11,6 +11,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/blktrace_api.h>
+#include <linux/types.h>
 #include <linux/slab.h>
 #include <scsi/fc/fc_els.h>
 #include "zfcp_ext.h"
@@ -741,6 +742,7 @@ static struct zfcp_fsf_req *zfcp_fsf_req_create(struct zfcp_qdio *qdio,
 
 static int zfcp_fsf_req_send(struct zfcp_fsf_req *req)
 {
+	const bool is_srb = zfcp_fsf_req_is_status_read_buffer(req);
 	struct zfcp_adapter *adapter = req->adapter;
 	struct zfcp_qdio *qdio = adapter->qdio;
 	int req_id = req->req_id;
@@ -757,8 +759,20 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req)
 		return -EIO;
 	}
 
+	/*
+	 * NOTE: DO NOT TOUCH ASYNC req PAST THIS POINT.
+	 *	 ONLY TOUCH SYNC req AGAIN ON req->completion.
+	 *
+	 * The request might complete and be freed concurrently at any point
+	 * now. This is not protected by the QDIO-lock (req_q_lock). So any
+	 * uncontrolled access after this might result in an use-after-free bug.
+	 * Only if the request doesn't have ZFCP_STATUS_FSFREQ_CLEANUP set, and
+	 * when it is completed via req->completion, is it safe to use req
+	 * again.
+	 */
+
 	/* Don't increase for unsolicited status */
-	if (!zfcp_fsf_req_is_status_read_buffer(req))
+	if (!is_srb)
 		adapter->fsf_req_seq_no++;
 	adapter->req_no++;
 
@@ -805,6 +819,7 @@ int zfcp_fsf_status_read(struct zfcp_qdio *qdio)
 	retval = zfcp_fsf_req_send(req);
 	if (retval)
 		goto failed_req_send;
+	/* NOTE: DO NOT TOUCH req PAST THIS POINT! */
 
 	goto out;
 
@@ -914,8 +929,10 @@ struct zfcp_fsf_req *zfcp_fsf_abort_fcp_cmnd(struct scsi_cmnd *scmnd)
 	req->qtcb->bottom.support.req_handle = (u64) old_req_id;
 
 	zfcp_fsf_start_timer(req, ZFCP_FSF_SCSI_ER_TIMEOUT);
-	if (!zfcp_fsf_req_send(req))
+	if (!zfcp_fsf_req_send(req)) {
+		/* NOTE: DO NOT TOUCH req, UNTIL IT COMPLETES! */
 		goto out;
+	}
 
 out_error_free:
 	zfcp_fsf_req_free(req);
@@ -1098,6 +1115,7 @@ int zfcp_fsf_send_ct(struct zfcp_fc_wka_port *wka_port,
 	ret = zfcp_fsf_req_send(req);
 	if (ret)
 		goto failed_send;
+	/* NOTE: DO NOT TOUCH req PAST THIS POINT! */
 
 	goto out;
 
@@ -1198,6 +1216,7 @@ int zfcp_fsf_send_els(struct zfcp_adapter *adapter, u32 d_id,
 	ret = zfcp_fsf_req_send(req);
 	if (ret)
 		goto failed_send;
+	/* NOTE: DO NOT TOUCH req PAST THIS POINT! */
 
 	goto out;
 
@@ -1243,6 +1262,7 @@ int zfcp_fsf_exchange_config_data(struct zfcp_erp_action *erp_action)
 		zfcp_fsf_req_free(req);
 		erp_action->fsf_req_id = 0;
 	}
+	/* NOTE: DO NOT TOUCH req PAST THIS POINT! */
 out:
 	spin_unlock_irq(&qdio->req_q_lock);
 	return retval;
@@ -1279,8 +1299,10 @@ int zfcp_fsf_exchange_config_data_sync(struct zfcp_qdio *qdio,
 	zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT);
 	retval = zfcp_fsf_req_send(req);
 	spin_unlock_irq(&qdio->req_q_lock);
-	if (!retval)
+	if (!retval) {
+		/* NOTE: ONLY TOUCH SYNC req AGAIN ON req->completion. */
 		wait_for_completion(&req->completion);
+	}
 
 	zfcp_fsf_req_free(req);
 	return retval;
@@ -1330,6 +1352,7 @@ int zfcp_fsf_exchange_port_data(struct zfcp_erp_action *erp_action)
 		zfcp_fsf_req_free(req);
 		erp_action->fsf_req_id = 0;
 	}
+	/* NOTE: DO NOT TOUCH req PAST THIS POINT! */
 out:
 	spin_unlock_irq(&qdio->req_q_lock);
 	return retval;
@@ -1372,8 +1395,10 @@ int zfcp_fsf_exchange_port_data_sync(struct zfcp_qdio *qdio,
 	retval = zfcp_fsf_req_send(req);
 	spin_unlock_irq(&qdio->req_q_lock);
 
-	if (!retval)
+	if (!retval) {
+		/* NOTE: ONLY TOUCH SYNC req AGAIN ON req->completion. */
 		wait_for_completion(&req->completion);
+	}
 
 	zfcp_fsf_req_free(req);
 
@@ -1493,6 +1518,7 @@ int zfcp_fsf_open_port(struct zfcp_erp_action *erp_action)
 		erp_action->fsf_req_id = 0;
 		put_device(&port->dev);
 	}
+	/* NOTE: DO NOT TOUCH req PAST THIS POINT! */
 out:
 	spin_unlock_irq(&qdio->req_q_lock);
 	return retval;
@@ -1557,6 +1583,7 @@ int zfcp_fsf_close_port(struct zfcp_erp_action *erp_action)
 		zfcp_fsf_req_free(req);
 		erp_action->fsf_req_id = 0;
 	}
+	/* NOTE: DO NOT TOUCH req PAST THIS POINT! */
 out:
 	spin_unlock_irq(&qdio->req_q_lock);
 	return retval;
@@ -1600,6 +1627,7 @@ int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port)
 {
 	struct zfcp_qdio *qdio = wka_port->adapter->qdio;
 	struct zfcp_fsf_req *req;
+	unsigned long req_id = 0;
 	int retval = -EIO;
 
 	spin_lock_irq(&qdio->req_q_lock);
@@ -1622,14 +1650,17 @@ int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port)
 	hton24(req->qtcb->bottom.support.d_id, wka_port->d_id);
 	req->data = wka_port;
 
+	req_id = req->req_id;
+
 	zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT);
 	retval = zfcp_fsf_req_send(req);
 	if (retval)
 		zfcp_fsf_req_free(req);
+	/* NOTE: DO NOT TOUCH req PAST THIS POINT! */
 out:
 	spin_unlock_irq(&qdio->req_q_lock);
 	if (!retval)
-		zfcp_dbf_rec_run_wka("fsowp_1", wka_port, req->req_id);
+		zfcp_dbf_rec_run_wka("fsowp_1", wka_port, req_id);
 	return retval;
 }
 
@@ -1655,6 +1686,7 @@ int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port)
 {
 	struct zfcp_qdio *qdio = wka_port->adapter->qdio;
 	struct zfcp_fsf_req *req;
+	unsigned long req_id = 0;
 	int retval = -EIO;
 
 	spin_lock_irq(&qdio->req_q_lock);
@@ -1677,14 +1709,17 @@ int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port)
 	req->data = wka_port;
 	req->qtcb->header.port_handle = wka_port->handle;
 
+	req_id = req->req_id;
+
 	zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT);
 	retval = zfcp_fsf_req_send(req);
 	if (retval)
 		zfcp_fsf_req_free(req);
+	/* NOTE: DO NOT TOUCH req PAST THIS POINT! */
 out:
 	spin_unlock_irq(&qdio->req_q_lock);
 	if (!retval)
-		zfcp_dbf_rec_run_wka("fscwp_1", wka_port, req->req_id);
+		zfcp_dbf_rec_run_wka("fscwp_1", wka_port, req_id);
 	return retval;
 }
 
@@ -1776,6 +1811,7 @@ int zfcp_fsf_close_physical_port(struct zfcp_erp_action *erp_action)
 		zfcp_fsf_req_free(req);
 		erp_action->fsf_req_id = 0;
 	}
+	/* NOTE: DO NOT TOUCH req PAST THIS POINT! */
 out:
 	spin_unlock_irq(&qdio->req_q_lock);
 	return retval;
@@ -1899,6 +1935,7 @@ int zfcp_fsf_open_lun(struct zfcp_erp_action *erp_action)
 		zfcp_fsf_req_free(req);
 		erp_action->fsf_req_id = 0;
 	}
+	/* NOTE: DO NOT TOUCH req PAST THIS POINT! */
 out:
 	spin_unlock_irq(&qdio->req_q_lock);
 	return retval;
@@ -1987,6 +2024,7 @@ int zfcp_fsf_close_lun(struct zfcp_erp_action *erp_action)
 		zfcp_fsf_req_free(req);
 		erp_action->fsf_req_id = 0;
 	}
+	/* NOTE: DO NOT TOUCH req PAST THIS POINT! */
 out:
 	spin_unlock_irq(&qdio->req_q_lock);
 	return retval;
@@ -2299,6 +2337,7 @@ int zfcp_fsf_fcp_cmnd(struct scsi_cmnd *scsi_cmnd)
 	retval = zfcp_fsf_req_send(req);
 	if (unlikely(retval))
 		goto failed_scsi_cmnd;
+	/* NOTE: DO NOT TOUCH req PAST THIS POINT! */
 
 	goto out;
 
@@ -2373,8 +2412,10 @@ struct zfcp_fsf_req *zfcp_fsf_fcp_task_mgmt(struct scsi_device *sdev,
 	zfcp_fc_fcp_tm(fcp_cmnd, sdev, tm_flags);
 
 	zfcp_fsf_start_timer(req, ZFCP_FSF_SCSI_ER_TIMEOUT);
-	if (!zfcp_fsf_req_send(req))
+	if (!zfcp_fsf_req_send(req)) {
+		/* NOTE: DO NOT TOUCH req, UNTIL IT COMPLETES! */
 		goto out;
+	}
 
 	zfcp_fsf_req_free(req);
 	req = NULL;
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 75f66f8ad3ea..1b92f3c19ff3 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1523,10 +1523,10 @@ config SCSI_VIRTIO
 
 source "drivers/scsi/csiostor/Kconfig"
 
-endif # SCSI_LOWLEVEL
-
 source "drivers/scsi/pcmcia/Kconfig"
 
+endif # SCSI_LOWLEVEL
+
 source "drivers/scsi/device_handler/Kconfig"
 
 endmenu
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index aeda53901064..c00e3dd57990 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -185,7 +185,7 @@ zalon7xx-objs	:= zalon.o ncr53c8xx.o
 # Files generated that shall be removed upon make clean
 clean-files :=	53c700_d.h 53c700_u.h scsi_devinfo_tbl.c
 
-$(obj)/53c700.o $(MODVERDIR)/$(obj)/53c700.ver: $(obj)/53c700_d.h
+$(obj)/53c700.o: $(obj)/53c700_d.h
 
 $(obj)/scsi_sysfs.o: $(obj)/scsi_devinfo_tbl.c
 
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index f0066f8a1786..4971104b1817 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -40,6 +40,7 @@
 #define ALUA_FAILOVER_TIMEOUT		60
 #define ALUA_FAILOVER_RETRIES		5
 #define ALUA_RTPG_DELAY_MSECS		5
+#define ALUA_RTPG_RETRY_DELAY		2
 
 /* device handler flags */
 #define ALUA_OPTIMIZE_STPG		0x01
@@ -682,7 +683,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
 	case SCSI_ACCESS_STATE_TRANSITIONING:
 		if (time_before(jiffies, pg->expiry)) {
 			/* State transition, retry */
-			pg->interval = 2;
+			pg->interval = ALUA_RTPG_RETRY_DELAY;
 			err = SCSI_DH_RETRY;
 		} else {
 			struct alua_dh_data *h;
@@ -807,6 +808,8 @@ static void alua_rtpg_work(struct work_struct *work)
 				spin_lock_irqsave(&pg->lock, flags);
 				pg->flags &= ~ALUA_PG_RUNNING;
 				pg->flags |= ALUA_PG_RUN_RTPG;
+				if (!pg->interval)
+					pg->interval = ALUA_RTPG_RETRY_DELAY;
 				spin_unlock_irqrestore(&pg->lock, flags);
 				queue_delayed_work(kaluad_wq, &pg->rtpg_work,
 						   pg->interval * HZ);
@@ -818,6 +821,8 @@ static void alua_rtpg_work(struct work_struct *work)
 		spin_lock_irqsave(&pg->lock, flags);
 		if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
 			pg->flags &= ~ALUA_PG_RUNNING;
+			if (!pg->interval && !(pg->flags & ALUA_PG_RUN_RTPG))
+				pg->interval = ALUA_RTPG_RETRY_DELAY;
 			pg->flags |= ALUA_PG_RUN_RTPG;
 			spin_unlock_irqrestore(&pg->lock, flags);
 			queue_delayed_work(kaluad_wq, &pg->rtpg_work,
diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index 590ec8009f52..1a85fe9e4b7b 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -1019,7 +1019,7 @@ static void fcoe_ctlr_recv_adv(struct fcoe_ctlr *fip, struct sk_buff *skb)
 {
 	struct fcoe_fcf *fcf;
 	struct fcoe_fcf new;
-	unsigned long sol_tov = msecs_to_jiffies(FCOE_CTRL_SOL_TOV);
+	unsigned long sol_tov = msecs_to_jiffies(FCOE_CTLR_SOL_TOV);
 	int first = 0;
 	int mtu_valid;
 	int found = 0;
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index ff0d8c6a8d0c..55522b7162d3 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -462,6 +462,9 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
 	else
 		shost->dma_boundary = 0xffffffff;
 
+	if (sht->virt_boundary_mask)
+		shost->virt_boundary_mask = sht->virt_boundary_mask;
+
 	device_initialize(&shost->shost_gendev);
 	dev_set_name(&shost->shost_gendev, "host%d", shost->host_no);
 	shost->shost_gendev.bus = &scsi_bus_type;
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 43a6b5350775..eaf6177ac9ee 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -7798,7 +7798,7 @@ static void hpsa_free_pci_init(struct ctlr_info *h)
 	hpsa_disable_interrupt_mode(h);		/* pci_init 2 */
 	/*
 	 * call pci_disable_device before pci_release_regions per
-	 * Documentation/PCI/pci.rst
+	 * Documentation/driver-api/pci/pci.rst
 	 */
 	pci_disable_device(h->pdev);		/* pci_init 1 */
 	pci_release_regions(h->pdev);		/* pci_init 2 */
@@ -7881,7 +7881,7 @@ clean2:	/* intmode+region, pci */
 clean1:
 	/*
 	 * call pci_disable_device before pci_release_regions per
-	 * Documentation/PCI/pci.rst
+	 * Documentation/driver-api/pci/pci.rst
 	 */
 	pci_disable_device(h->pdev);
 	pci_release_regions(h->pdev);
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index acd16e0d52cf..8cdbac076a1b 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -4864,8 +4864,8 @@ static int ibmvfc_remove(struct vio_dev *vdev)
 
 	spin_lock_irqsave(vhost->host->host_lock, flags);
 	ibmvfc_purge_requests(vhost, DID_ERROR);
-	ibmvfc_free_event_pool(vhost);
 	spin_unlock_irqrestore(vhost->host->host_lock, flags);
+	ibmvfc_free_event_pool(vhost);
 
 	ibmvfc_free_mem(vhost);
 	spin_lock(&ibmvfc_driver_lock);
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index 8e1053bdd843..52e866659853 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -2591,7 +2591,7 @@ void fc_exch_recv(struct fc_lport *lport, struct fc_frame *fp)
 
 	/* lport lock ? */
 	if (!lport || lport->state == LPORT_ST_DISABLED) {
-		FC_LPORT_DBG(lport, "Receiving frames for an lport that "
+		FC_LIBFC_DBG("Receiving frames for an lport that "
 			     "has not been initialized correctly\n");
 		fc_frame_free(fp);
 		return;
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 4f339f939a51..bec83eb8ab87 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -414,7 +414,6 @@ static void sas_wait_eh(struct domain_device *dev)
 		goto retry;
 	}
 }
-EXPORT_SYMBOL(sas_wait_eh);
 
 static int sas_queue_reset(struct domain_device *dev, int reset_type,
 			   u64 lun, int wait)
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h
index 2322ddb085c0..34070874616d 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.h
+++ b/drivers/scsi/lpfc/lpfc_debugfs.h
@@ -330,7 +330,7 @@ enum {
  * This function dumps an entry indexed by @idx from a queue specified by the
  * queue descriptor @q.
  **/
-static inline void
+static void
 lpfc_debug_dump_qe(struct lpfc_queue *q, uint32_t idx)
 {
 	char line_buf[LPFC_LBUF_SZ];
diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
index ca724fe91b8d..a14e8344822b 100644
--- a/drivers/scsi/megaraid/megaraid_sas.h
+++ b/drivers/scsi/megaraid/megaraid_sas.h
@@ -21,8 +21,8 @@
 /*
  * MegaRAID SAS Driver meta data
  */
-#define MEGASAS_VERSION				"07.710.06.00-rc1"
-#define MEGASAS_RELDATE				"June 18, 2019"
+#define MEGASAS_VERSION				"07.710.50.00-rc1"
+#define MEGASAS_RELDATE				"June 28, 2019"
 
 /*
  * Device IDs
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 80ab9700f1de..f9f07935556e 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -105,6 +105,10 @@ MODULE_PARM_DESC(perf_mode, "Performance mode (only for Aero adapters), options:
 		"default mode is 'balanced'"
 		);
 
+int event_log_level = MFI_EVT_CLASS_CRITICAL;
+module_param(event_log_level, int, 0644);
+MODULE_PARM_DESC(event_log_level, "Asynchronous event logging level- range is: -2(CLASS_DEBUG) to 4(CLASS_DEAD), Default: 2(CLASS_CRITICAL)");
+
 MODULE_LICENSE("GPL");
 MODULE_VERSION(MEGASAS_VERSION);
 MODULE_AUTHOR("megaraidlinux.pdl@broadcom.com");
@@ -280,7 +284,7 @@ void megasas_set_dma_settings(struct megasas_instance *instance,
 	}
 }
 
-void
+static void
 megasas_issue_dcmd(struct megasas_instance *instance, struct megasas_cmd *cmd)
 {
 	instance->instancet->fire_cmd(instance,
@@ -404,7 +408,13 @@ megasas_decode_evt(struct megasas_instance *instance)
 	union megasas_evt_class_locale class_locale;
 	class_locale.word = le32_to_cpu(evt_detail->cl.word);
 
-	if (class_locale.members.class >= MFI_EVT_CLASS_CRITICAL)
+	if ((event_log_level < MFI_EVT_CLASS_DEBUG) ||
+	    (event_log_level > MFI_EVT_CLASS_DEAD)) {
+		printk(KERN_WARNING "megaraid_sas: provided event log level is out of range, setting it to default 2(CLASS_CRITICAL), permissible range is: -2 to 4\n");
+		event_log_level = MFI_EVT_CLASS_CRITICAL;
+	}
+
+	if (class_locale.members.class >= event_log_level)
 		dev_info(&instance->pdev->dev, "%d (%s/0x%04x/%s) - %s\n",
 			le32_to_cpu(evt_detail->seq_num),
 			format_timestamp(le32_to_cpu(evt_detail->time_stamp)),
@@ -2237,7 +2247,7 @@ megasas_internal_reset_defer_cmds(struct megasas_instance *instance);
 static void
 process_fw_state_change_wq(struct work_struct *work);
 
-void megasas_do_ocr(struct megasas_instance *instance)
+static void megasas_do_ocr(struct megasas_instance *instance)
 {
 	if ((instance->pdev->device == PCI_DEVICE_ID_LSI_SAS1064R) ||
 	(instance->pdev->device == PCI_DEVICE_ID_DELL_PERC5) ||
@@ -3153,6 +3163,7 @@ fw_crash_buffer_show(struct device *cdev,
 		(struct megasas_instance *) shost->hostdata;
 	u32 size;
 	unsigned long dmachunk = CRASH_DMA_BUF_SIZE;
+	unsigned long chunk_left_bytes;
 	unsigned long src_addr;
 	unsigned long flags;
 	u32 buff_offset;
@@ -3176,6 +3187,8 @@ fw_crash_buffer_show(struct device *cdev,
 	}
 
 	size = (instance->fw_crash_buffer_size * dmachunk) - buff_offset;
+	chunk_left_bytes = dmachunk - (buff_offset % dmachunk);
+	size = (size > chunk_left_bytes) ? chunk_left_bytes : size;
 	size = (size >= PAGE_SIZE) ? (PAGE_SIZE - 1) : size;
 
 	src_addr = (unsigned long)instance->crash_buf[buff_offset / dmachunk] +
@@ -3303,7 +3316,7 @@ static DEVICE_ATTR_RO(fw_cmds_outstanding);
 static DEVICE_ATTR_RO(dump_system_regs);
 static DEVICE_ATTR_RO(raid_map_id);
 
-struct device_attribute *megaraid_host_attrs[] = {
+static struct device_attribute *megaraid_host_attrs[] = {
 	&dev_attr_fw_crash_buffer_size,
 	&dev_attr_fw_crash_buffer,
 	&dev_attr_fw_crash_state,
@@ -3334,6 +3347,7 @@ static struct scsi_host_template megasas_template = {
 	.shost_attrs = megaraid_host_attrs,
 	.bios_param = megasas_bios_param,
 	.change_queue_depth = scsi_change_queue_depth,
+	.max_segment_size = 0xffffffff,
 	.no_write_same = 1,
 };
 
@@ -5933,7 +5947,8 @@ static int megasas_init_fw(struct megasas_instance *instance)
 					instance->is_rdpq = (scratch_pad_1 & MR_RDPQ_MODE_OFFSET) ?
 								1 : 0;
 
-				if (!instance->msix_combined) {
+				if (instance->adapter_type >= INVADER_SERIES &&
+				    !instance->msix_combined) {
 					instance->msix_load_balance = true;
 					instance->smp_affinity_enable = false;
 				}
@@ -6546,7 +6561,8 @@ megasas_get_target_prop(struct megasas_instance *instance,
 	int ret;
 	struct megasas_cmd *cmd;
 	struct megasas_dcmd_frame *dcmd;
-	u16 targetId = (sdev->channel % 2) + sdev->id;
+	u16 targetId = ((sdev->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) +
+			sdev->id;
 
 	cmd = megasas_get_cmd(instance);
 
@@ -8748,6 +8764,12 @@ static int __init megasas_init(void)
 		goto err_pcidrv;
 	}
 
+	if ((event_log_level < MFI_EVT_CLASS_DEBUG) ||
+	    (event_log_level > MFI_EVT_CLASS_DEAD)) {
+		pr_warn("megaraid_sas: provided event log level is out of range, setting it to default 2(CLASS_CRITICAL), permissible range is: -2 to 4\n");
+		event_log_level = MFI_EVT_CLASS_CRITICAL;
+	}
+
 	rval = driver_create_file(&megasas_pci_driver.driver,
 				  &driver_attr_version);
 	if (rval)
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index a32b3f0fcd15..120e3c4de8c2 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -537,7 +537,7 @@ static int megasas_create_sg_sense_fusion(struct megasas_instance *instance)
 	return 0;
 }
 
-int
+static int
 megasas_alloc_cmdlist_fusion(struct megasas_instance *instance)
 {
 	u32 max_mpt_cmd, i, j;
@@ -576,7 +576,8 @@ megasas_alloc_cmdlist_fusion(struct megasas_instance *instance)
 
 	return 0;
 }
-int
+
+static int
 megasas_alloc_request_fusion(struct megasas_instance *instance)
 {
 	struct fusion_context *fusion;
@@ -657,7 +658,7 @@ retry_alloc:
 	return 0;
 }
 
-int
+static int
 megasas_alloc_reply_fusion(struct megasas_instance *instance)
 {
 	int i, count;
@@ -734,7 +735,7 @@ megasas_alloc_reply_fusion(struct megasas_instance *instance)
 	return 0;
 }
 
-int
+static int
 megasas_alloc_rdpq_fusion(struct megasas_instance *instance)
 {
 	int i, j, k, msix_count;
@@ -916,7 +917,7 @@ megasas_free_reply_fusion(struct megasas_instance *instance) {
  * and is used as SMID of the cmd.
  * SMID value range is from 1 to max_fw_cmds.
  */
-int
+static int
 megasas_alloc_cmds_fusion(struct megasas_instance *instance)
 {
 	int i;
@@ -1736,7 +1737,7 @@ static inline void megasas_free_ioc_init_cmd(struct megasas_instance *instance)
  *
  * This is the main function for initializing firmware.
  */
-u32
+static u32
 megasas_init_adapter_fusion(struct megasas_instance *instance)
 {
 	struct fusion_context *fusion;
@@ -1962,7 +1963,7 @@ megasas_fusion_stop_watchdog(struct megasas_instance *instance)
  * @ext_status :	ext status of cmd returned by FW
  */
 
-void
+static void
 map_cmd_status(struct fusion_context *fusion,
 		struct scsi_cmnd *scmd, u8 status, u8 ext_status,
 		u32 data_length, u8 *sense)
@@ -2375,7 +2376,7 @@ int megasas_make_sgl(struct megasas_instance *instance, struct scsi_cmnd *scp,
  *
  * Used to set the PD LBA in CDB for FP IOs
  */
-void
+static void
 megasas_set_pd_lba(struct MPI2_RAID_SCSI_IO_REQUEST *io_request, u8 cdb_len,
 		   struct IO_REQUEST_INFO *io_info, struct scsi_cmnd *scp,
 		   struct MR_DRV_RAID_MAP_ALL *local_map_ptr, u32 ref_tag)
@@ -2714,7 +2715,7 @@ megasas_set_raidflag_cpu_affinity(struct fusion_context *fusion,
  * Prepares the io_request and chain elements (sg_frame) for IO
  * The IO can be for PD (Fast Path) or LD
  */
-void
+static void
 megasas_build_ldio_fusion(struct megasas_instance *instance,
 			  struct scsi_cmnd *scp,
 			  struct megasas_cmd_fusion *cmd)
@@ -3211,7 +3212,7 @@ megasas_build_syspd_fusion(struct megasas_instance *instance,
  * Invokes helper functions to prepare request frames
  * and sets flags appropriate for IO/Non-IO cmd
  */
-int
+static int
 megasas_build_io_fusion(struct megasas_instance *instance,
 			struct scsi_cmnd *scp,
 			struct megasas_cmd_fusion *cmd)
@@ -3325,9 +3326,9 @@ megasas_get_request_descriptor(struct megasas_instance *instance, u16 index)
 /* megasas_prepate_secondRaid1_IO
  *  It prepares the raid 1 second IO
  */
-void megasas_prepare_secondRaid1_IO(struct megasas_instance *instance,
-			    struct megasas_cmd_fusion *cmd,
-			    struct megasas_cmd_fusion *r1_cmd)
+static void megasas_prepare_secondRaid1_IO(struct megasas_instance *instance,
+					   struct megasas_cmd_fusion *cmd,
+					   struct megasas_cmd_fusion *r1_cmd)
 {
 	union MEGASAS_REQUEST_DESCRIPTOR_UNION *req_desc, *req_desc2 = NULL;
 	struct fusion_context *fusion;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 27c731a3fb49..717ba0845a2a 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -10238,6 +10238,7 @@ static struct scsi_host_template mpt3sas_driver_template = {
 	.this_id			= -1,
 	.sg_tablesize			= MPT3SAS_SG_DEPTH,
 	.max_sectors			= 32767,
+	.max_segment_size		= 0xffffffff,
 	.cmd_per_lun			= 7,
 	.shost_attrs			= mpt3sas_host_attrs,
 	.sdev_attrs			= mpt3sas_dev_attrs,
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index dd38c356a1a4..9453705f643a 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -888,6 +888,8 @@ static void pm8001_dev_gone_notify(struct domain_device *dev)
 			spin_unlock_irqrestore(&pm8001_ha->lock, flags);
 			pm8001_exec_internal_task_abort(pm8001_ha, pm8001_dev ,
 				dev, 1, 0);
+			while (pm8001_dev->running_req)
+				msleep(20);
 			spin_lock_irqsave(&pm8001_ha->lock, flags);
 		}
 		PM8001_CHIP_DISP->dereg_dev_req(pm8001_ha, device_id);
@@ -1256,8 +1258,10 @@ int pm8001_abort_task(struct sas_task *task)
 			PM8001_MSG_DBG(pm8001_ha,
 				pm8001_printk("Waiting for Port reset\n"));
 			wait_for_completion(&completion_reset);
-			if (phy->port_reset_status)
+			if (phy->port_reset_status) {
+				pm8001_dev_gone_notify(dev);
 				goto out;
+			}
 
 			/*
 			 * 4. SATA Abort ALL
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index 1128d86d241a..73261902d75d 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -604,7 +604,7 @@ static void update_main_config_table(struct pm8001_hba_info *pm8001_ha)
 		pm8001_ha->main_cfg_tbl.pm80xx_tbl.port_recovery_timer &=
 					0x0000ffff;
 		pm8001_ha->main_cfg_tbl.pm80xx_tbl.port_recovery_timer |=
-					0x140000;
+					CHIP_8006_PORT_RECOVERY_TIMEOUT;
 	}
 	pm8001_mw32(address, MAIN_PORT_RECOVERY_TIMER,
 			pm8001_ha->main_cfg_tbl.pm80xx_tbl.port_recovery_timer);
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.h b/drivers/scsi/pm8001/pm80xx_hwi.h
index 84d7426441bf..dc9ab7689060 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.h
+++ b/drivers/scsi/pm8001/pm80xx_hwi.h
@@ -230,6 +230,8 @@
 #define SAS_MAX_AIP                     0x200000
 #define IT_NEXUS_TIMEOUT       0x7D0
 #define PORT_RECOVERY_TIMEOUT  ((IT_NEXUS_TIMEOUT/100) + 30)
+/* Port recovery timeout, 10000 ms for PM8006 controller */
+#define CHIP_8006_PORT_RECOVERY_TIMEOUT 0x640000
 
 #ifdef __LITTLE_ENDIAN_BITFIELD
 struct sas_identify_frame_local {
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index a08ff3bd6310..df14597752ec 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -239,6 +239,8 @@ static struct {
 	{"LSI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
 	{"ENGENIO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
 	{"LENOVO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
+	{"SanDisk", "Cruzer Blade", NULL, BLIST_TRY_VPD_PAGES |
+		BLIST_INQUIRY_36},
 	{"SMSC", "USB 2 HS-CF", NULL, BLIST_SPARSELUN | BLIST_INQUIRY_36},
 	{"SONY", "CD-ROM CDU-8001", NULL, BLIST_BORKEN},
 	{"SONY", "TSL", NULL, BLIST_FORCELUN},		/* DDS3 & DDS4 autoloaders */
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index e1da8c70a266..11e64b50497f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -84,11 +84,11 @@ int scsi_init_sense_cache(struct Scsi_Host *shost)
 	struct kmem_cache *cache;
 	int ret = 0;
 
+	mutex_lock(&scsi_sense_cache_mutex);
 	cache = scsi_select_sense_cache(shost->unchecked_isa_dma);
 	if (cache)
-		return 0;
+		goto exit;
 
-	mutex_lock(&scsi_sense_cache_mutex);
 	if (shost->unchecked_isa_dma) {
 		scsi_sense_isadma_cache =
 			kmem_cache_create("scsi_sense_cache(DMA)",
@@ -104,7 +104,7 @@ int scsi_init_sense_cache(struct Scsi_Host *shost)
 		if (!scsi_sense_cache)
 			ret = -ENOMEM;
 	}
-
+ exit:
 	mutex_unlock(&scsi_sense_cache_mutex);
 	return ret;
 }
@@ -1452,7 +1452,7 @@ static void scsi_softirq_done(struct request *rq)
 	disposition = scsi_decide_disposition(cmd);
 	if (disposition != SUCCESS &&
 	    time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
-		sdev_printk(KERN_ERR, cmd->device,
+		scmd_printk(KERN_ERR, cmd,
 			    "timing out command, waited %lus\n",
 			    wait_for/HZ);
 		disposition = SUCCESS;
@@ -1784,6 +1784,10 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 		blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize);
 	}
 
+	if (dev->dma_mask) {
+		shost->max_sectors = min_t(unsigned int, shost->max_sectors,
+				dma_max_mapping_size(dev) >> SECTOR_SHIFT);
+	}
 	blk_queue_max_hw_sectors(q, shost->max_sectors);
 	if (shost->unchecked_isa_dma)
 		blk_queue_bounce_limit(q, BLK_BOUNCE_ISA);
@@ -1791,7 +1795,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 	dma_set_seg_boundary(dev, shost->dma_boundary);
 
 	blk_queue_max_segment_size(q, shost->max_segment_size);
-	dma_set_max_seg_size(dev, shost->max_segment_size);
+	blk_queue_virt_boundary(q, shost->virt_boundary_mask);
+	dma_set_max_seg_size(dev, queue_max_segment_size(q));
 
 	/*
 	 * Set a reasonable default alignment:  The larger of 32-byte (dword),
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index db16c19e05c4..5d6ff3931632 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -461,7 +461,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
 {
 	struct gendisk *disk = sdkp->disk;
 	unsigned int nr_zones;
-	u32 zone_blocks;
+	u32 zone_blocks = 0;
 	int ret;
 
 	if (!sd_is_zoned(sdkp))
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index c2b6a0ca6933..ed8b9ac805e6 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1423,9 +1423,6 @@ static int storvsc_device_configure(struct scsi_device *sdevice)
 {
 	blk_queue_rq_timeout(sdevice->request_queue, (storvsc_timeout * HZ));
 
-	/* Ensure there are no gaps in presented sgls */
-	blk_queue_virt_boundary(sdevice->request_queue, PAGE_SIZE - 1);
-
 	sdevice->no_write_same = 1;
 
 	/*
@@ -1698,6 +1695,8 @@ static struct scsi_host_template scsi_driver = {
 	.this_id =		-1,
 	/* Make sure we dont get a sg segment crosses a page boundary */
 	.dma_boundary =		PAGE_SIZE-1,
+	/* Ensure there are no gaps in presented sgls */
+	.virt_boundary_mask =	PAGE_SIZE-1,
 	.no_write_same =	1,
 	.track_queue_depth =	1,
 	.change_queue_depth =	storvsc_change_queue_depth,
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 04d3686511c8..e274053109d0 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -4587,8 +4587,6 @@ static int ufshcd_slave_configure(struct scsi_device *sdev)
 	struct request_queue *q = sdev->request_queue;
 
 	blk_queue_update_dma_pad(q, PRDT_DATA_BYTE_COUNT_PAD - 1);
-	blk_queue_max_segment_size(q, PRDT_DATA_BYTE_COUNT_MAX);
-
 	return 0;
 }
 
@@ -7022,6 +7020,7 @@ static struct scsi_host_template ufshcd_driver_template = {
 	.sg_tablesize		= SG_ALL,
 	.cmd_per_lun		= UFSHCD_CMD_PER_LUN,
 	.can_queue		= UFSHCD_CAN_QUEUE,
+	.max_segment_size	= PRDT_DATA_BYTE_COUNT_MAX,
 	.max_host_blocked	= 1,
 	.track_queue_depth	= 1,
 	.sdev_groups		= ufshcd_driver_groups,
diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index 62c6ba17991a..c9519e62308c 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -419,7 +419,7 @@ static void qe_upload_microcode(const void *base,
 /*
  * Upload a microcode to the I-RAM at a specific address.
  *
- * See Documentation/powerpc/qe_firmware.txt for information on QE microcode
+ * See Documentation/powerpc/qe_firmware.rst for information on QE microcode
  * uploading.
  *
  * Currently, only version 1 is supported, so the 'version' field must be
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
index 22dd4c457d6a..c70caf4ea490 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
@@ -875,10 +875,12 @@ static u8 cxgbit_get_iscsi_dcb_priority(struct net_device *ndev, u16 local_port)
 		return 0;
 
 	if (caps & DCB_CAP_DCBX_VER_IEEE) {
-		iscsi_dcb_app.selector = IEEE_8021QAZ_APP_SEL_ANY;
-
+		iscsi_dcb_app.selector = IEEE_8021QAZ_APP_SEL_STREAM;
 		ret = dcb_ieee_getapp_mask(ndev, &iscsi_dcb_app);
-
+		if (!ret) {
+			iscsi_dcb_app.selector = IEEE_8021QAZ_APP_SEL_ANY;
+			ret = dcb_ieee_getapp_mask(ndev, &iscsi_dcb_app);
+		}
 	} else if (caps & DCB_CAP_DCBX_VER_CEE) {
 		iscsi_dcb_app.selector = DCB_APP_IDTYPE_PORTNUM;
 
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_main.c b/drivers/target/iscsi/cxgbit/cxgbit_main.c
index 343b129c2cfa..e877b917c15f 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_main.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_main.c
@@ -589,7 +589,8 @@ static void cxgbit_dcb_workfn(struct work_struct *work)
 	iscsi_app = &dcb_work->dcb_app;
 
 	if (iscsi_app->dcbx & DCB_CAP_DCBX_VER_IEEE) {
-		if (iscsi_app->app.selector != IEEE_8021QAZ_APP_SEL_ANY)
+		if ((iscsi_app->app.selector != IEEE_8021QAZ_APP_SEL_STREAM) &&
+		    (iscsi_app->app.selector != IEEE_8021QAZ_APP_SEL_ANY))
 			goto out;
 
 		priority = iscsi_app->app.priority;
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
index 213ab3cc6b80..d3446acf9bbd 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
@@ -487,6 +487,7 @@ static int proc_thermal_rapl_add(struct pci_dev *pdev,
 				rapl_mmio_cpu_online, rapl_mmio_cpu_down_prep);
 	if (ret < 0) {
 		powercap_unregister_control_type(rapl_mmio_priv.control_type);
+		rapl_mmio_priv.control_type = NULL;
 		return ret;
 	}
 	rapl_mmio_priv.pcap_rapl_online = ret;
@@ -496,6 +497,9 @@ static int proc_thermal_rapl_add(struct pci_dev *pdev,
 
 static void proc_thermal_rapl_remove(void)
 {
+	if (IS_ERR_OR_NULL(rapl_mmio_priv.control_type))
+		return;
+
 	cpuhp_remove_state(rapl_mmio_priv.pcap_rapl_online);
 	powercap_unregister_control_type(rapl_mmio_priv.control_type);
 }
diff --git a/drivers/tty/hvc/hvcs.c b/drivers/tty/hvc/hvcs.c
index cb4db1b3ca3c..5fb214e67d73 100644
--- a/drivers/tty/hvc/hvcs.c
+++ b/drivers/tty/hvc/hvcs.c
@@ -47,7 +47,7 @@
  * using the 2.6 Linux kernel kref construct.
  *
  * For direction on installation and usage of this driver please reference
- * Documentation/powerpc/hvcs.txt.
+ * Documentation/powerpc/hvcs.rst.
  */
 
 #include <linux/device.h>
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4707dfff991b..c2a85b587922 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -345,15 +345,24 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	struct bio *bio;
 	bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
 	bool is_read = (iov_iter_rw(iter) == READ), is_sync;
+	bool nowait = (iocb->ki_flags & IOCB_NOWAIT) != 0;
 	loff_t pos = iocb->ki_pos;
 	blk_qc_t qc = BLK_QC_T_NONE;
-	int ret = 0;
+	gfp_t gfp;
+	ssize_t ret;
 
 	if ((pos | iov_iter_alignment(iter)) &
 	    (bdev_logical_block_size(bdev) - 1))
 		return -EINVAL;
 
-	bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
+	if (nowait)
+		gfp = GFP_NOWAIT;
+	else
+		gfp = GFP_KERNEL;
+
+	bio = bio_alloc_bioset(gfp, nr_pages, &blkdev_dio_pool);
+	if (!bio)
+		return -EAGAIN;
 
 	dio = container_of(bio, struct blkdev_dio, bio);
 	dio->is_sync = is_sync = is_sync_kiocb(iocb);
@@ -375,7 +384,10 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	if (!is_poll)
 		blk_start_plug(&plug);
 
+	ret = 0;
 	for (;;) {
+		int err;
+
 		bio_set_dev(bio, bdev);
 		bio->bi_iter.bi_sector = pos >> 9;
 		bio->bi_write_hint = iocb->ki_hint;
@@ -383,8 +395,10 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 		bio->bi_end_io = blkdev_bio_end_io;
 		bio->bi_ioprio = iocb->ki_ioprio;
 
-		ret = bio_iov_iter_get_pages(bio, iter);
-		if (unlikely(ret)) {
+		err = bio_iov_iter_get_pages(bio, iter);
+		if (unlikely(err)) {
+			if (!ret)
+				ret = err;
 			bio->bi_status = BLK_STS_IOERR;
 			bio_endio(bio);
 			break;
@@ -399,6 +413,14 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 			task_io_account_write(bio->bi_iter.bi_size);
 		}
 
+		/*
+		 * Tell underlying layer to not block for resource shortage.
+		 * And if we would have blocked, return error inline instead
+		 * of through the bio->bi_end_io() callback.
+		 */
+		if (nowait)
+			bio->bi_opf |= (REQ_NOWAIT | REQ_NOWAIT_INLINE);
+
 		dio->size += bio->bi_iter.bi_size;
 		pos += bio->bi_iter.bi_size;
 
@@ -412,6 +434,11 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 			}
 
 			qc = submit_bio(bio);
+			if (qc == BLK_QC_T_EAGAIN) {
+				if (!ret)
+					ret = -EAGAIN;
+				goto error;
+			}
 
 			if (polled)
 				WRITE_ONCE(iocb->ki_cookie, qc);
@@ -432,8 +459,20 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 			atomic_inc(&dio->ref);
 		}
 
-		submit_bio(bio);
-		bio = bio_alloc(GFP_KERNEL, nr_pages);
+		qc = submit_bio(bio);
+		if (qc == BLK_QC_T_EAGAIN) {
+			if (!ret)
+				ret = -EAGAIN;
+			goto error;
+		}
+		ret += bio->bi_iter.bi_size;
+
+		bio = bio_alloc(gfp, nr_pages);
+		if (!bio) {
+			if (!ret)
+				ret = -EAGAIN;
+			goto error;
+		}
 	}
 
 	if (!is_poll)
@@ -453,13 +492,16 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	}
 	__set_current_state(TASK_RUNNING);
 
+out:
 	if (!ret)
 		ret = blk_status_to_errno(dio->bio.bi_status);
-	if (likely(!ret))
-		ret = dio->size;
 
 	bio_put(&dio->bio);
 	return ret;
+error:
+	if (!is_poll)
+		blk_finish_plug(&plug);
+	goto out;
 }
 
 static ssize_t
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 212b4a854f2c..38651fae7f21 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -4,6 +4,7 @@ config BTRFS_FS
 	tristate "Btrfs filesystem support"
 	select CRYPTO
 	select CRYPTO_CRC32C
+	select LIBCRC32C
 	select ZLIB_INFLATE
 	select ZLIB_DEFLATE
 	select LZO_COMPRESS
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 41a2bd2e0c56..5f7ee70b3d1a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4106,6 +4106,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
 	percpu_counter_destroy(&fs_info->dev_replace.bio_counter);
 	cleanup_srcu_struct(&fs_info->subvol_srcu);
 
+	btrfs_free_csum_hash(fs_info);
 	btrfs_free_stripe_hash_table(fs_info);
 	btrfs_free_ref_cache(fs_info);
 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1af069a9a0c7..ee582a36653d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -395,10 +395,31 @@ static noinline int add_async_extent(struct async_chunk *cow,
 	return 0;
 }
 
+/*
+ * Check if the inode has flags compatible with compression
+ */
+static inline bool inode_can_compress(struct inode *inode)
+{
+	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW ||
+	    BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
+		return false;
+	return true;
+}
+
+/*
+ * Check if the inode needs to be submitted to compression, based on mount
+ * options, defragmentation, properties or heuristics.
+ */
 static inline int inode_need_compress(struct inode *inode, u64 start, u64 end)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 
+	if (!inode_can_compress(inode)) {
+		WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
+			KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
+			btrfs_ino(BTRFS_I(inode)));
+		return 0;
+	}
 	/* force compress */
 	if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
 		return 1;
@@ -1631,7 +1652,8 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
 	} else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
 		ret = run_delalloc_nocow(inode, locked_page, start, end,
 					 page_started, 0, nr_written);
-	} else if (!inode_need_compress(inode, start, end)) {
+	} else if (!inode_can_compress(inode) ||
+		   !inode_need_compress(inode, start, end)) {
 		ret = cow_file_range(inode, locked_page, start, end, end,
 				      page_started, nr_written, 1, NULL);
 	} else {
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 98fccce4208c..393eceda57c8 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -346,9 +346,12 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
 	if (blockers) {
 		btrfs_assert_no_spinning_writers(eb);
 		eb->blocking_writers--;
-		/* Use the lighter barrier after atomic */
-		smp_mb__after_atomic();
-		cond_wake_up_nomb(&eb->write_lock_wq);
+		/*
+		 * We need to order modifying blocking_writers above with
+		 * actually waking up the sleepers to ensure they see the
+		 * updated value of blocking_writers
+		 */
+		cond_wake_up(&eb->write_lock_wq);
 	} else {
 		btrfs_assert_spinning_writers_put(eb);
 		write_unlock(&eb->lock);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 1744ba8b2754..ae7f64a8facb 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -985,13 +985,14 @@ void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
 					struct extent_state **cached_state)
 {
 	struct btrfs_ordered_extent *ordered;
-	struct extent_state *cachedp = NULL;
+	struct extent_state *cache = NULL;
+	struct extent_state **cachedp = &cache;
 
 	if (cached_state)
-		cachedp = *cached_state;
+		cachedp = cached_state;
 
 	while (1) {
-		lock_extent_bits(tree, start, end, &cachedp);
+		lock_extent_bits(tree, start, end, cachedp);
 		ordered = btrfs_lookup_ordered_range(inode, start,
 						     end - start + 1);
 		if (!ordered) {
@@ -1001,10 +1002,10 @@ void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
 			 * aren't exposing it outside of this function
 			 */
 			if (!cached_state)
-				refcount_dec(&cachedp->refs);
+				refcount_dec(&cache->refs);
 			break;
 		}
-		unlock_extent_cached(tree, start, end, &cachedp);
+		unlock_extent_cached(tree, start, end, cachedp);
 		btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
 		btrfs_put_ordered_extent(ordered);
 	}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a13ddba1ebc3..d74b74ca07af 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5941,6 +5941,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 	u64 stripe_len;
 	u64 raid56_full_stripe_start = (u64)-1;
 	int data_stripes;
+	int ret = 0;
 
 	ASSERT(op != BTRFS_MAP_DISCARD);
 
@@ -5961,8 +5962,8 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 		btrfs_crit(fs_info,
 "stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
 			stripe_offset, offset, em->start, logical, stripe_len);
-		free_extent_map(em);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
 	/* stripe_offset is the offset of this block in its stripe */
@@ -6009,7 +6010,10 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 	io_geom->stripe_offset = stripe_offset;
 	io_geom->raid56_stripe_offset = raid56_full_stripe_start;
 
-	return 0;
+out:
+	/* once for us */
+	free_extent_map(em);
+	return ret;
 }
 
 static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index aab29f48c62d..4ca0b8ff9a72 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1267,7 +1267,7 @@ __dentry_leases_walk(struct ceph_mds_client *mdsc,
 		if (!spin_trylock(&dentry->d_lock))
 			continue;
 
-		if (dentry->d_lockref.count < 0) {
+		if (__lockref_is_dead(&dentry->d_lockref)) {
 			list_del_init(&di->lease_list);
 			goto next;
 		}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 270d3c58fb3b..3289b566463f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1104,6 +1104,10 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
 		goto out;
 	}
 
+	rc = -EOPNOTSUPP;
+	if (!target_tcon->ses->server->ops->copychunk_range)
+		goto out;
+
 	/*
 	 * Note: cifs case is easier than btrfs since server responsible for
 	 * checks for proper open modes and file type and if it wants
@@ -1115,11 +1119,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
 	/* should we flush first and last page first */
 	truncate_inode_pages(&target_inode->i_data, 0);
 
-	if (target_tcon->ses->server->ops->copychunk_range)
+	rc = file_modified(dst_file);
+	if (!rc)
 		rc = target_tcon->ses->server->ops->copychunk_range(xid,
 			smb_file_src, smb_file_target, off, len, destoff);
-	else
-		rc = -EOPNOTSUPP;
+
+	file_accessed(src_file);
 
 	/* force revalidate of size and timestamps of target file now
 	 * that target is updated on the server
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index aea005703785..4b21a90015a9 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -152,5 +152,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "2.20"
+#define CIFS_VERSION   "2.21"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 1bffe029fb66..56ca4b8ccaba 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2406,6 +2406,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 	struct inode *inode = d_inode(direntry);
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 	struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+	struct cifsFileInfo *wfile;
+	struct cifs_tcon *tcon;
 	char *full_path = NULL;
 	int rc = -EACCES;
 	__u32 dosattr = 0;
@@ -2452,6 +2454,20 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 	mapping_set_error(inode->i_mapping, rc);
 	rc = 0;
 
+	if (attrs->ia_valid & ATTR_MTIME) {
+		rc = cifs_get_writable_file(cifsInode, false, &wfile);
+		if (!rc) {
+			tcon = tlink_tcon(wfile->tlink);
+			rc = tcon->ses->server->ops->flush(xid, tcon, &wfile->fid);
+			cifsFileInfo_put(wfile);
+			if (rc)
+				return rc;
+		} else if (rc != -EBADF)
+			return rc;
+		else
+			rc = 0;
+	}
+
 	if (attrs->ia_valid & ATTR_SIZE) {
 		rc = cifs_set_file_size(inode, attrs, xid, full_path);
 		if (rc != 0)
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 54bffb2a1786..e6a1fc72018f 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -88,14 +88,20 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
 	}
 
 	if (buf) {
-		/* open response does not have IndexNumber field - get it */
-		rc = SMB2_get_srv_num(xid, oparms->tcon, fid->persistent_fid,
+		/* if open response does not have IndexNumber field - get it */
+		if (smb2_data->IndexNumber == 0) {
+			rc = SMB2_get_srv_num(xid, oparms->tcon,
+				      fid->persistent_fid,
 				      fid->volatile_fid,
 				      &smb2_data->IndexNumber);
-		if (rc) {
-			/* let get_inode_info disable server inode numbers */
-			smb2_data->IndexNumber = 0;
-			rc = 0;
+			if (rc) {
+				/*
+				 * let get_inode_info disable server inode
+				 * numbers
+				 */
+				smb2_data->IndexNumber = 0;
+				rc = 0;
+			}
 		}
 		move_smb2_info_to_cifs(buf, smb2_data);
 	}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 0cdc4e47ca87..a5bc1b671c12 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -694,8 +694,51 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
 
 	smb2_set_related(&rqst[1]);
 
+	/*
+	 * We do not hold the lock for the open because in case
+	 * SMB2_open needs to reconnect, it will end up calling
+	 * cifs_mark_open_files_invalid() which takes the lock again
+	 * thus causing a deadlock
+	 */
+
+	mutex_unlock(&tcon->crfid.fid_mutex);
 	rc = compound_send_recv(xid, ses, flags, 2, rqst,
 				resp_buftype, rsp_iov);
+	mutex_lock(&tcon->crfid.fid_mutex);
+
+	/*
+	 * Now we need to check again as the cached root might have
+	 * been successfully re-opened from a concurrent process
+	 */
+
+	if (tcon->crfid.is_valid) {
+		/* work was already done */
+
+		/* stash fids for close() later */
+		struct cifs_fid fid = {
+			.persistent_fid = pfid->persistent_fid,
+			.volatile_fid = pfid->volatile_fid,
+		};
+
+		/*
+		 * caller expects this func to set pfid to a valid
+		 * cached root, so we copy the existing one and get a
+		 * reference.
+		 */
+		memcpy(pfid, tcon->crfid.fid, sizeof(*pfid));
+		kref_get(&tcon->crfid.refcount);
+
+		mutex_unlock(&tcon->crfid.fid_mutex);
+
+		if (rc == 0) {
+			/* close extra handle outside of crit sec */
+			SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+		}
+		goto oshr_free;
+	}
+
+	/* Cached root is still invalid, continue normaly */
+
 	if (rc)
 		goto oshr_exit;
 
@@ -711,11 +754,12 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
 	tcon->crfid.is_valid = true;
 	kref_init(&tcon->crfid.refcount);
 
+	/* BB TBD check to see if oplock level check can be removed below */
 	if (o_rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) {
 		kref_get(&tcon->crfid.refcount);
-		oplock = smb2_parse_lease_state(server, o_rsp,
-						&oparms.fid->epoch,
-						oparms.fid->lease_key);
+		smb2_parse_contexts(server, o_rsp,
+				&oparms.fid->epoch,
+				oparms.fid->lease_key, &oplock, NULL);
 	} else
 		goto oshr_exit;
 
@@ -729,8 +773,9 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
 				(char *)&tcon->crfid.file_all_info))
 		tcon->crfid.file_all_info_is_valid = 1;
 
- oshr_exit:
+oshr_exit:
 	mutex_unlock(&tcon->crfid.fid_mutex);
+oshr_free:
 	SMB2_open_free(&rqst[0]);
 	SMB2_query_info_free(&rqst[1]);
 	free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index f58e4dc3987b..c8cd7b6cdda2 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1873,10 +1873,21 @@ create_reconnect_durable_buf(struct cifs_fid *fid)
 	return buf;
 }
 
-__u8
-smb2_parse_lease_state(struct TCP_Server_Info *server,
+static void
+parse_query_id_ctxt(struct create_context *cc, struct smb2_file_all_info *buf)
+{
+	struct create_on_disk_id *pdisk_id = (struct create_on_disk_id *)cc;
+
+	cifs_dbg(FYI, "parse query id context 0x%llx 0x%llx\n",
+		pdisk_id->DiskFileId, pdisk_id->VolumeId);
+	buf->IndexNumber = pdisk_id->DiskFileId;
+}
+
+void
+smb2_parse_contexts(struct TCP_Server_Info *server,
 		       struct smb2_create_rsp *rsp,
-		       unsigned int *epoch, char *lease_key)
+		       unsigned int *epoch, char *lease_key, __u8 *oplock,
+		       struct smb2_file_all_info *buf)
 {
 	char *data_offset;
 	struct create_context *cc;
@@ -1884,15 +1895,24 @@ smb2_parse_lease_state(struct TCP_Server_Info *server,
 	unsigned int remaining;
 	char *name;
 
+	*oplock = 0;
 	data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset);
 	remaining = le32_to_cpu(rsp->CreateContextsLength);
 	cc = (struct create_context *)data_offset;
+
+	/* Initialize inode number to 0 in case no valid data in qfid context */
+	if (buf)
+		buf->IndexNumber = 0;
+
 	while (remaining >= sizeof(struct create_context)) {
 		name = le16_to_cpu(cc->NameOffset) + (char *)cc;
 		if (le16_to_cpu(cc->NameLength) == 4 &&
-		    strncmp(name, "RqLs", 4) == 0)
-			return server->ops->parse_lease_buf(cc, epoch,
-							    lease_key);
+		    strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4) == 0)
+			*oplock = server->ops->parse_lease_buf(cc, epoch,
+							   lease_key);
+		else if (buf && (le16_to_cpu(cc->NameLength) == 4) &&
+		    strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4) == 0)
+			parse_query_id_ctxt(cc, buf);
 
 		next = le32_to_cpu(cc->Next);
 		if (!next)
@@ -1901,7 +1921,10 @@ smb2_parse_lease_state(struct TCP_Server_Info *server,
 		cc = (struct create_context *)((char *)cc + next);
 	}
 
-	return 0;
+	if (rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE)
+		*oplock = rsp->OplockLevel;
+
+	return;
 }
 
 static int
@@ -2588,12 +2611,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 		buf->DeletePending = 0;
 	}
 
-	if (rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE)
-		*oplock = smb2_parse_lease_state(server, rsp,
-						 &oparms->fid->epoch,
-						 oparms->fid->lease_key);
-	else
-		*oplock = rsp->OplockLevel;
+
+	smb2_parse_contexts(server, rsp, &oparms->fid->epoch,
+			    oparms->fid->lease_key, oplock, buf);
 creat_exit:
 	SMB2_open_free(&rqst);
 	free_rsp_buf(resp_buftype, rsp);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 7e2e782f8edd..747de9317659 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -818,7 +818,9 @@ struct durable_reconnect_context_v2 {
 } __packed;
 
 /* See MS-SMB2 2.2.14.2.9 */
-struct on_disk_id {
+struct create_on_disk_id {
+	struct create_context ccontext;
+	__u8   Name[8];
 	__le64 DiskFileId;
 	__le64 VolumeId;
 	__u32  Reserved[4];
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 52df125e9189..07ca72486cfa 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -228,9 +228,10 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *);
 
 extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *,
 					enum securityEnum);
-extern __u8 smb2_parse_lease_state(struct TCP_Server_Info *server,
-				   struct smb2_create_rsp *rsp,
-				   unsigned int *epoch, char *lease_key);
+extern void smb2_parse_contexts(struct TCP_Server_Info *server,
+				struct smb2_create_rsp *rsp,
+				unsigned int *epoch, char *lease_key,
+				__u8 *oplock, struct smb2_file_all_info *buf);
 extern int smb3_encryption_required(const struct cifs_tcon *tcon);
 extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length,
 			     struct kvec *iov, unsigned int min_buf_size);
diff --git a/fs/dcache.c b/fs/dcache.c
index f41121e5d1ec..e88cf0554e65 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -861,6 +861,32 @@ void dput(struct dentry *dentry)
 }
 EXPORT_SYMBOL(dput);
 
+static void __dput_to_list(struct dentry *dentry, struct list_head *list)
+__must_hold(&dentry->d_lock)
+{
+	if (dentry->d_flags & DCACHE_SHRINK_LIST) {
+		/* let the owner of the list it's on deal with it */
+		--dentry->d_lockref.count;
+	} else {
+		if (dentry->d_flags & DCACHE_LRU_LIST)
+			d_lru_del(dentry);
+		if (!--dentry->d_lockref.count)
+			d_shrink_add(dentry, list);
+	}
+}
+
+void dput_to_list(struct dentry *dentry, struct list_head *list)
+{
+	rcu_read_lock();
+	if (likely(fast_dput(dentry))) {
+		rcu_read_unlock();
+		return;
+	}
+	rcu_read_unlock();
+	if (!retain_dentry(dentry))
+		__dput_to_list(dentry, list);
+	spin_unlock(&dentry->d_lock);
+}
 
 /* This must be called with d_lock held */
 static inline void __dget_dlock(struct dentry *dentry)
@@ -1067,7 +1093,7 @@ out:
 	return false;
 }
 
-static void shrink_dentry_list(struct list_head *list)
+void shrink_dentry_list(struct list_head *list)
 {
 	while (!list_empty(list)) {
 		struct dentry *dentry, *parent;
@@ -1089,18 +1115,9 @@ static void shrink_dentry_list(struct list_head *list)
 		rcu_read_unlock();
 		d_shrink_del(dentry);
 		parent = dentry->d_parent;
+		if (parent != dentry)
+			__dput_to_list(parent, list);
 		__dentry_kill(dentry);
-		if (parent == dentry)
-			continue;
-		/*
-		 * We need to prune ancestors too. This is necessary to prevent
-		 * quadratic behavior of shrink_dcache_parent(), but is also
-		 * expected to be beneficial in reducing dentry cache
-		 * fragmentation.
-		 */
-		dentry = parent;
-		while (dentry && !lockref_put_or_lock(&dentry->d_lockref))
-			dentry = dentry_kill(dentry);
 	}
 }
 
@@ -1445,8 +1462,11 @@ out:
 
 struct select_data {
 	struct dentry *start;
+	union {
+		long found;
+		struct dentry *victim;
+	};
 	struct list_head dispose;
-	int found;
 };
 
 static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
@@ -1478,6 +1498,37 @@ out:
 	return ret;
 }
 
+static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
+{
+	struct select_data *data = _data;
+	enum d_walk_ret ret = D_WALK_CONTINUE;
+
+	if (data->start == dentry)
+		goto out;
+
+	if (dentry->d_flags & DCACHE_SHRINK_LIST) {
+		if (!dentry->d_lockref.count) {
+			rcu_read_lock();
+			data->victim = dentry;
+			return D_WALK_QUIT;
+		}
+	} else {
+		if (dentry->d_flags & DCACHE_LRU_LIST)
+			d_lru_del(dentry);
+		if (!dentry->d_lockref.count)
+			d_shrink_add(dentry, &data->dispose);
+	}
+	/*
+	 * We can return to the caller if we have found some (this
+	 * ensures forward progress). We'll be coming back to find
+	 * the rest.
+	 */
+	if (!list_empty(&data->dispose))
+		ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY;
+out:
+	return ret;
+}
+
 /**
  * shrink_dcache_parent - prune dcache
  * @parent: parent of entries to prune
@@ -1487,12 +1538,9 @@ out:
 void shrink_dcache_parent(struct dentry *parent)
 {
 	for (;;) {
-		struct select_data data;
+		struct select_data data = {.start = parent};
 
 		INIT_LIST_HEAD(&data.dispose);
-		data.start = parent;
-		data.found = 0;
-
 		d_walk(parent, &data, select_collect);
 
 		if (!list_empty(&data.dispose)) {
@@ -1503,6 +1551,24 @@ void shrink_dcache_parent(struct dentry *parent)
 		cond_resched();
 		if (!data.found)
 			break;
+		data.victim = NULL;
+		d_walk(parent, &data, select_collect2);
+		if (data.victim) {
+			struct dentry *parent;
+			spin_lock(&data.victim->d_lock);
+			if (!shrink_lock_dentry(data.victim)) {
+				spin_unlock(&data.victim->d_lock);
+				rcu_read_unlock();
+			} else {
+				rcu_read_unlock();
+				parent = data.victim->d_parent;
+				if (parent != data.victim)
+					__dput_to_list(parent, &data.dispose);
+				__dentry_kill(data.victim);
+			}
+		}
+		if (!list_empty(&data.dispose))
+			shrink_dentry_list(&data.dispose);
 	}
 }
 EXPORT_SYMBOL(shrink_dcache_parent);
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index a6497cf8ae53..47ef3c71ce90 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -19,20 +19,14 @@ void pin_remove(struct fs_pin *pin)
 	spin_unlock_irq(&pin->wait.lock);
 }
 
-void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p)
+void pin_insert(struct fs_pin *pin, struct vfsmount *m)
 {
 	spin_lock(&pin_lock);
-	if (p)
-		hlist_add_head(&pin->s_list, p);
+	hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins);
 	hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins);
 	spin_unlock(&pin_lock);
 }
 
-void pin_insert(struct fs_pin *pin, struct vfsmount *m)
-{
-	pin_insert_group(pin, m, &m->mnt_sb->s_pins);
-}
-
 void pin_kill(struct fs_pin *p)
 {
 	wait_queue_entry_t wait;
diff --git a/fs/internal.h b/fs/internal.h
index ff5173212803..315fcd8d237c 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -157,6 +157,8 @@ extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
 extern struct dentry *d_alloc_cursor(struct dentry *);
 extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
 extern char *simple_dname(struct dentry *, char *, int);
+extern void dput_to_list(struct dentry *, struct list_head *);
+extern void shrink_dentry_list(struct list_head *);
 
 /*
  * read_write.c
diff --git a/fs/io_uring.c b/fs/io_uring.c
index e2a66e12fbc6..012bc0efb9d3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -202,7 +202,7 @@ struct async_list {
 
 	struct file		*file;
 	off_t			io_end;
-	size_t			io_pages;
+	size_t			io_len;
 };
 
 struct io_ring_ctx {
@@ -333,7 +333,8 @@ struct io_kiocb {
 #define REQ_F_IO_DRAIN		16	/* drain existing IO first */
 #define REQ_F_IO_DRAINED	32	/* drain done */
 #define REQ_F_LINK		64	/* linked sqes */
-#define REQ_F_FAIL_LINK		128	/* fail rest of links */
+#define REQ_F_LINK_DONE		128	/* linked sqes done */
+#define REQ_F_FAIL_LINK		256	/* fail rest of links */
 	u64			user_data;
 	u32			result;
 	u32			sequence;
@@ -429,7 +430,7 @@ static inline bool io_sequence_defer(struct io_ring_ctx *ctx,
 	if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
 		return false;
 
-	return req->sequence > ctx->cached_cq_tail + ctx->sq_ring->dropped;
+	return req->sequence != ctx->cached_cq_tail + ctx->sq_ring->dropped;
 }
 
 static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
@@ -632,6 +633,7 @@ static void io_req_link_next(struct io_kiocb *req)
 			nxt->flags |= REQ_F_LINK;
 		}
 
+		nxt->flags |= REQ_F_LINK_DONE;
 		INIT_WORK(&nxt->work, io_sq_wq_submit_work);
 		queue_work(req->ctx->sqo_wq, &nxt->work);
 	}
@@ -1064,8 +1066,44 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
 	 */
 	offset = buf_addr - imu->ubuf;
 	iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
-	if (offset)
-		iov_iter_advance(iter, offset);
+
+	if (offset) {
+		/*
+		 * Don't use iov_iter_advance() here, as it's really slow for
+		 * using the latter parts of a big fixed buffer - it iterates
+		 * over each segment manually. We can cheat a bit here, because
+		 * we know that:
+		 *
+		 * 1) it's a BVEC iter, we set it up
+		 * 2) all bvecs are PAGE_SIZE in size, except potentially the
+		 *    first and last bvec
+		 *
+		 * So just find our index, and adjust the iterator afterwards.
+		 * If the offset is within the first bvec (or the whole first
+		 * bvec, just use iov_iter_advance(). This makes it easier
+		 * since we can just skip the first segment, which may not
+		 * be PAGE_SIZE aligned.
+		 */
+		const struct bio_vec *bvec = imu->bvec;
+
+		if (offset <= bvec->bv_len) {
+			iov_iter_advance(iter, offset);
+		} else {
+			unsigned long seg_skip;
+
+			/* skip first vec */
+			offset -= bvec->bv_len;
+			seg_skip = 1 + (offset >> PAGE_SHIFT);
+
+			iter->bvec = bvec + seg_skip;
+			iter->nr_segs -= seg_skip;
+			iter->count -= (seg_skip << PAGE_SHIFT);
+			iter->iov_offset = offset & ~PAGE_MASK;
+			if (iter->iov_offset)
+				iter->count -= iter->iov_offset;
+		}
+	}
+
 	return 0;
 }
 
@@ -1120,28 +1158,26 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
 	off_t io_end = kiocb->ki_pos + len;
 
 	if (filp == async_list->file && kiocb->ki_pos == async_list->io_end) {
-		unsigned long max_pages;
+		unsigned long max_bytes;
 
 		/* Use 8x RA size as a decent limiter for both reads/writes */
-		max_pages = filp->f_ra.ra_pages;
-		if (!max_pages)
-			max_pages = VM_READAHEAD_PAGES;
-		max_pages *= 8;
-
-		/* If max pages are exceeded, reset the state */
-		len >>= PAGE_SHIFT;
-		if (async_list->io_pages + len <= max_pages) {
+		max_bytes = filp->f_ra.ra_pages << (PAGE_SHIFT + 3);
+		if (!max_bytes)
+			max_bytes = VM_READAHEAD_PAGES << (PAGE_SHIFT + 3);
+
+		/* If max len are exceeded, reset the state */
+		if (async_list->io_len + len <= max_bytes) {
 			req->flags |= REQ_F_SEQ_PREV;
-			async_list->io_pages += len;
+			async_list->io_len += len;
 		} else {
 			io_end = 0;
-			async_list->io_pages = 0;
+			async_list->io_len = 0;
 		}
 	}
 
 	/* New file? Reset state. */
 	if (async_list->file != filp) {
-		async_list->io_pages = 0;
+		async_list->io_len = 0;
 		async_list->file = filp;
 	}
 	async_list->io_end = io_end;
@@ -1630,6 +1666,8 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	INIT_LIST_HEAD(&poll->wait.entry);
 	init_waitqueue_func_entry(&poll->wait, io_poll_wake);
 
+	INIT_LIST_HEAD(&req->list);
+
 	mask = vfs_poll(poll->file, &ipt.pt) & poll->events;
 
 	spin_lock_irq(&ctx->completion_lock);
@@ -1844,6 +1882,10 @@ restart:
 		/* async context always use a copy of the sqe */
 		kfree(sqe);
 
+		/* req from defer and link list needn't decrease async cnt */
+		if (req->flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE))
+			goto out;
+
 		if (!async_list)
 			break;
 		if (!list_empty(&req_list)) {
@@ -1891,6 +1933,7 @@ restart:
 		}
 	}
 
+out:
 	if (cur_mm) {
 		set_fs(old_fs);
 		unuse_mm(cur_mm);
@@ -1917,6 +1960,10 @@ static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req)
 	ret = true;
 	spin_lock(&list->lock);
 	list_add_tail(&req->list, &list->list);
+	/*
+	 * Ensure we see a simultaneous modification from io_sq_wq_submit_work()
+	 */
+	smp_mb();
 	if (!atomic_read(&list->cnt)) {
 		list_del_init(&req->list);
 		ret = false;
diff --git a/fs/mount.h b/fs/mount.h
index 6250de544760..711a4093e475 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -58,7 +58,10 @@ struct mount {
 	struct mount *mnt_master;	/* slave is on master->mnt_slave_list */
 	struct mnt_namespace *mnt_ns;	/* containing namespace */
 	struct mountpoint *mnt_mp;	/* where is it mounted */
-	struct hlist_node mnt_mp_list;	/* list mounts with the same mountpoint */
+	union {
+		struct hlist_node mnt_mp_list;	/* list mounts with the same mountpoint */
+		struct hlist_node mnt_umount;
+	};
 	struct list_head mnt_umounting; /* list entry for umount propagation */
 #ifdef CONFIG_FSNOTIFY
 	struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
@@ -68,8 +71,7 @@ struct mount {
 	int mnt_group_id;		/* peer group identifier */
 	int mnt_expiry_mark;		/* true if marked for expiry */
 	struct hlist_head mnt_pins;
-	struct fs_pin mnt_umount;
-	struct dentry *mnt_ex_mountpoint;
+	struct hlist_head mnt_stuck_children;
 } __randomize_layout;
 
 #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
diff --git a/fs/namespace.c b/fs/namespace.c
index f0d664adb9ba..d28d30b13043 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -70,6 +70,8 @@ static struct hlist_head *mount_hashtable __read_mostly;
 static struct hlist_head *mountpoint_hashtable __read_mostly;
 static struct kmem_cache *mnt_cache __read_mostly;
 static DECLARE_RWSEM(namespace_sem);
+static HLIST_HEAD(unmounted);	/* protected by namespace_sem */
+static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
 
 /* /sys/fs */
 struct kobject *fs_kobj;
@@ -170,14 +172,6 @@ unsigned int mnt_get_count(struct mount *mnt)
 #endif
 }
 
-static void drop_mountpoint(struct fs_pin *p)
-{
-	struct mount *m = container_of(p, struct mount, mnt_umount);
-	dput(m->mnt_ex_mountpoint);
-	pin_remove(p);
-	mntput(&m->mnt);
-}
-
 static struct mount *alloc_vfsmnt(const char *name)
 {
 	struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -215,7 +209,7 @@ static struct mount *alloc_vfsmnt(const char *name)
 		INIT_LIST_HEAD(&mnt->mnt_slave);
 		INIT_HLIST_NODE(&mnt->mnt_mp_list);
 		INIT_LIST_HEAD(&mnt->mnt_umounting);
-		init_fs_pin(&mnt->mnt_umount, drop_mountpoint);
+		INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
 	}
 	return mnt;
 
@@ -740,7 +734,7 @@ mountpoint:
 
 	/* Add the new mountpoint to the hash table */
 	read_seqlock_excl(&mount_lock);
-	new->m_dentry = dentry;
+	new->m_dentry = dget(dentry);
 	new->m_count = 1;
 	hlist_add_head(&new->m_hash, mp_hash(dentry));
 	INIT_HLIST_HEAD(&new->m_list);
@@ -753,7 +747,11 @@ done:
 	return mp;
 }
 
-static void put_mountpoint(struct mountpoint *mp)
+/*
+ * vfsmount lock must be held.  Additionally, the caller is responsible
+ * for serializing calls for given disposal list.
+ */
+static void __put_mountpoint(struct mountpoint *mp, struct list_head *list)
 {
 	if (!--mp->m_count) {
 		struct dentry *dentry = mp->m_dentry;
@@ -761,11 +759,18 @@ static void put_mountpoint(struct mountpoint *mp)
 		spin_lock(&dentry->d_lock);
 		dentry->d_flags &= ~DCACHE_MOUNTED;
 		spin_unlock(&dentry->d_lock);
+		dput_to_list(dentry, list);
 		hlist_del(&mp->m_hash);
 		kfree(mp);
 	}
 }
 
+/* called with namespace_lock and vfsmount lock */
+static void put_mountpoint(struct mountpoint *mp)
+{
+	__put_mountpoint(mp, &ex_mountpoints);
+}
+
 static inline int check_mnt(struct mount *mnt)
 {
 	return mnt->mnt_ns == current->nsproxy->mnt_ns;
@@ -796,25 +801,17 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
 /*
  * vfsmount lock must be held for write
  */
-static void unhash_mnt(struct mount *mnt)
+static struct mountpoint *unhash_mnt(struct mount *mnt)
 {
+	struct mountpoint *mp;
 	mnt->mnt_parent = mnt;
 	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
 	list_del_init(&mnt->mnt_child);
 	hlist_del_init_rcu(&mnt->mnt_hash);
 	hlist_del_init(&mnt->mnt_mp_list);
-	put_mountpoint(mnt->mnt_mp);
+	mp = mnt->mnt_mp;
 	mnt->mnt_mp = NULL;
-}
-
-/*
- * vfsmount lock must be held for write
- */
-static void detach_mnt(struct mount *mnt, struct path *old_path)
-{
-	old_path->dentry = mnt->mnt_mountpoint;
-	old_path->mnt = &mnt->mnt_parent->mnt;
-	unhash_mnt(mnt);
+	return mp;
 }
 
 /*
@@ -822,9 +819,7 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
  */
 static void umount_mnt(struct mount *mnt)
 {
-	/* old mountpoint will be dropped when we can do that */
-	mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint;
-	unhash_mnt(mnt);
+	put_mountpoint(unhash_mnt(mnt));
 }
 
 /*
@@ -836,7 +831,7 @@ void mnt_set_mountpoint(struct mount *mnt,
 {
 	mp->m_count++;
 	mnt_add_count(mnt, 1);	/* essentially, that's mntget */
-	child_mnt->mnt_mountpoint = dget(mp->m_dentry);
+	child_mnt->mnt_mountpoint = mp->m_dentry;
 	child_mnt->mnt_parent = mnt;
 	child_mnt->mnt_mp = mp;
 	hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
@@ -863,7 +858,6 @@ static void attach_mnt(struct mount *mnt,
 void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
 {
 	struct mountpoint *old_mp = mnt->mnt_mp;
-	struct dentry *old_mountpoint = mnt->mnt_mountpoint;
 	struct mount *old_parent = mnt->mnt_parent;
 
 	list_del_init(&mnt->mnt_child);
@@ -873,22 +867,6 @@ void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct m
 	attach_mnt(mnt, parent, mp);
 
 	put_mountpoint(old_mp);
-
-	/*
-	 * Safely avoid even the suggestion this code might sleep or
-	 * lock the mount hash by taking advantage of the knowledge that
-	 * mnt_change_mountpoint will not release the final reference
-	 * to a mountpoint.
-	 *
-	 * During mounting, the mount passed in as the parent mount will
-	 * continue to use the old mountpoint and during unmounting, the
-	 * old mountpoint will continue to exist until namespace_unlock,
-	 * which happens well after mnt_change_mountpoint.
-	 */
-	spin_lock(&old_mountpoint->d_lock);
-	old_mountpoint->d_lockref.count--;
-	spin_unlock(&old_mountpoint->d_lock);
-
 	mnt_add_count(old_parent, -1);
 }
 
@@ -1103,19 +1081,22 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 
 static void cleanup_mnt(struct mount *mnt)
 {
+	struct hlist_node *p;
+	struct mount *m;
 	/*
-	 * This probably indicates that somebody messed
-	 * up a mnt_want/drop_write() pair.  If this
-	 * happens, the filesystem was probably unable
-	 * to make r/w->r/o transitions.
-	 */
-	/*
+	 * The warning here probably indicates that somebody messed
+	 * up a mnt_want/drop_write() pair.  If this happens, the
+	 * filesystem was probably unable to make r/w->r/o transitions.
 	 * The locking used to deal with mnt_count decrement provides barriers,
 	 * so mnt_get_writers() below is safe.
 	 */
 	WARN_ON(mnt_get_writers(mnt));
 	if (unlikely(mnt->mnt_pins.first))
 		mnt_pin_kill(mnt);
+	hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) {
+		hlist_del(&m->mnt_umount);
+		mntput(&m->mnt);
+	}
 	fsnotify_vfsmount_delete(&mnt->mnt);
 	dput(mnt->mnt.mnt_root);
 	deactivate_super(mnt->mnt.mnt_sb);
@@ -1141,6 +1122,8 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
 
 static void mntput_no_expire(struct mount *mnt)
 {
+	LIST_HEAD(list);
+
 	rcu_read_lock();
 	if (likely(READ_ONCE(mnt->mnt_ns))) {
 		/*
@@ -1181,10 +1164,12 @@ static void mntput_no_expire(struct mount *mnt)
 	if (unlikely(!list_empty(&mnt->mnt_mounts))) {
 		struct mount *p, *tmp;
 		list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts,  mnt_child) {
-			umount_mnt(p);
+			__put_mountpoint(unhash_mnt(p), &list);
+			hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children);
 		}
 	}
 	unlock_mount_hash();
+	shrink_dentry_list(&list);
 
 	if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
 		struct task_struct *task = current;
@@ -1370,22 +1355,29 @@ int may_umount(struct vfsmount *mnt)
 
 EXPORT_SYMBOL(may_umount);
 
-static HLIST_HEAD(unmounted);	/* protected by namespace_sem */
-
 static void namespace_unlock(void)
 {
 	struct hlist_head head;
+	struct hlist_node *p;
+	struct mount *m;
+	LIST_HEAD(list);
 
 	hlist_move_list(&unmounted, &head);
+	list_splice_init(&ex_mountpoints, &list);
 
 	up_write(&namespace_sem);
 
+	shrink_dentry_list(&list);
+
 	if (likely(hlist_empty(&head)))
 		return;
 
 	synchronize_rcu_expedited();
 
-	group_pin_kill(&head);
+	hlist_for_each_entry_safe(m, p, &head, mnt_umount) {
+		hlist_del(&m->mnt_umount);
+		mntput(&m->mnt);
+	}
 }
 
 static inline void namespace_lock(void)
@@ -1471,9 +1463,6 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
 			p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
 
 		disconnect = disconnect_mount(p, how);
-
-		pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
-				 disconnect ? &unmounted : NULL);
 		if (mnt_has_parent(p)) {
 			mnt_add_count(p->mnt_parent, -1);
 			if (!disconnect) {
@@ -1484,6 +1473,8 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
 			}
 		}
 		change_mnt_propagation(p, MS_PRIVATE);
+		if (disconnect)
+			hlist_add_head(&p->mnt_umount, &unmounted);
 	}
 }
 
@@ -1626,15 +1617,15 @@ void __detach_mounts(struct dentry *dentry)
 	namespace_lock();
 	lock_mount_hash();
 	mp = lookup_mountpoint(dentry);
-	if (IS_ERR_OR_NULL(mp))
+	if (!mp)
 		goto out_unlock;
 
 	event++;
 	while (!hlist_empty(&mp->m_list)) {
 		mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
 		if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
-			hlist_add_head(&mnt->mnt_umount.s_list, &unmounted);
 			umount_mnt(mnt);
+			hlist_add_head(&mnt->mnt_umount, &unmounted);
 		}
 		else umount_tree(mnt, UMOUNT_CONNECTED);
 	}
@@ -2046,7 +2037,7 @@ int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
 static int attach_recursive_mnt(struct mount *source_mnt,
 			struct mount *dest_mnt,
 			struct mountpoint *dest_mp,
-			struct path *parent_path)
+			bool moving)
 {
 	struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
 	HLIST_HEAD(tree_list);
@@ -2064,7 +2055,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 		return PTR_ERR(smp);
 
 	/* Is there space to add these mounts to the mount namespace? */
-	if (!parent_path) {
+	if (!moving) {
 		err = count_mounts(ns, source_mnt);
 		if (err)
 			goto out;
@@ -2083,8 +2074,8 @@ static int attach_recursive_mnt(struct mount *source_mnt,
 	} else {
 		lock_mount_hash();
 	}
-	if (parent_path) {
-		detach_mnt(source_mnt, parent_path);
+	if (moving) {
+		unhash_mnt(source_mnt);
 		attach_mnt(source_mnt, dest_mnt, dest_mp);
 		touch_mnt_namespace(source_mnt->mnt_ns);
 	} else {
@@ -2182,7 +2173,7 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
 	      d_is_dir(mnt->mnt.mnt_root))
 		return -ENOTDIR;
 
-	return attach_recursive_mnt(mnt, p, mp, NULL);
+	return attach_recursive_mnt(mnt, p, mp, false);
 }
 
 /*
@@ -2575,11 +2566,11 @@ out:
 
 static int do_move_mount(struct path *old_path, struct path *new_path)
 {
-	struct path parent_path = {.mnt = NULL, .dentry = NULL};
 	struct mnt_namespace *ns;
 	struct mount *p;
 	struct mount *old;
-	struct mountpoint *mp;
+	struct mount *parent;
+	struct mountpoint *mp, *old_mp;
 	int err;
 	bool attached;
 
@@ -2589,7 +2580,9 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
 
 	old = real_mount(old_path->mnt);
 	p = real_mount(new_path->mnt);
+	parent = old->mnt_parent;
 	attached = mnt_has_parent(old);
+	old_mp = old->mnt_mp;
 	ns = old->mnt_ns;
 
 	err = -EINVAL;
@@ -2617,7 +2610,7 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
 	/*
 	 * Don't move a mount residing in a shared parent.
 	 */
-	if (attached && IS_MNT_SHARED(old->mnt_parent))
+	if (attached && IS_MNT_SHARED(parent))
 		goto out;
 	/*
 	 * Don't move a mount tree containing unbindable mounts to a destination
@@ -2633,18 +2626,21 @@ static int do_move_mount(struct path *old_path, struct path *new_path)
 			goto out;
 
 	err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
-				   attached ? &parent_path : NULL);
+				   attached);
 	if (err)
 		goto out;
 
 	/* if the mount is moved, it should no longer be expire
 	 * automatically */
 	list_del_init(&old->mnt_expire);
+	if (attached)
+		put_mountpoint(old_mp);
 out:
 	unlock_mount(mp);
 	if (!err) {
-		path_put(&parent_path);
-		if (!attached)
+		if (attached)
+			mntput_no_expire(parent);
+		else
 			free_mnt_ns(ns);
 	}
 	return err;
@@ -3589,8 +3585,8 @@ EXPORT_SYMBOL(path_is_under);
 SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 		const char __user *, put_old)
 {
-	struct path new, old, parent_path, root_parent, root;
-	struct mount *new_mnt, *root_mnt, *old_mnt;
+	struct path new, old, root;
+	struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent;
 	struct mountpoint *old_mp, *root_mp;
 	int error;
 
@@ -3619,9 +3615,11 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	new_mnt = real_mount(new.mnt);
 	root_mnt = real_mount(root.mnt);
 	old_mnt = real_mount(old.mnt);
+	ex_parent = new_mnt->mnt_parent;
+	root_parent = root_mnt->mnt_parent;
 	if (IS_MNT_SHARED(old_mnt) ||
-		IS_MNT_SHARED(new_mnt->mnt_parent) ||
-		IS_MNT_SHARED(root_mnt->mnt_parent))
+		IS_MNT_SHARED(ex_parent) ||
+		IS_MNT_SHARED(root_parent))
 		goto out4;
 	if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
 		goto out4;
@@ -3638,7 +3636,6 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 		goto out4; /* not a mountpoint */
 	if (!mnt_has_parent(root_mnt))
 		goto out4; /* not attached */
-	root_mp = root_mnt->mnt_mp;
 	if (new.mnt->mnt_root != new.dentry)
 		goto out4; /* not a mountpoint */
 	if (!mnt_has_parent(new_mnt))
@@ -3649,10 +3646,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	/* make certain new is below the root */
 	if (!is_path_reachable(new_mnt, new.dentry, &root))
 		goto out4;
-	root_mp->m_count++; /* pin it so it won't go away */
 	lock_mount_hash();
-	detach_mnt(new_mnt, &parent_path);
-	detach_mnt(root_mnt, &root_parent);
+	umount_mnt(new_mnt);
+	root_mp = unhash_mnt(root_mnt);  /* we'll need its mountpoint */
 	if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
 		new_mnt->mnt.mnt_flags |= MNT_LOCKED;
 		root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
@@ -3660,7 +3656,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	/* mount old root on put_old */
 	attach_mnt(root_mnt, old_mnt, old_mp);
 	/* mount new_root on / */
-	attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
+	attach_mnt(new_mnt, root_parent, root_mp);
+	mnt_add_count(root_parent, -1);
 	touch_mnt_namespace(current->nsproxy->mnt_ns);
 	/* A moved mount should not expire automatically */
 	list_del_init(&new_mnt->mnt_expire);
@@ -3670,10 +3667,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	error = 0;
 out4:
 	unlock_mount(old_mp);
-	if (!error) {
-		path_put(&root_parent);
-		path_put(&parent_path);
-	}
+	if (!error)
+		mntput_no_expire(ex_parent);
 out3:
 	path_put(&root);
 out2:
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3683d2b1cc8e..628631e2e34f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -457,10 +457,8 @@ int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 		struct dentry *pd_dentry;
 
 		pd_dentry = dget_parent(dentry);
-		if (pd_dentry != NULL) {
-			nfs_zap_caches(d_inode(pd_dentry));
-			dput(pd_dentry);
-		}
+		nfs_zap_caches(d_inode(pd_dentry));
+		dput(pd_dentry);
 	}
 	nfs_free_fattr(res.fattr);
 	if (error < 0)
diff --git a/fs/open.c b/fs/open.c
index b5b80469b93d..a59abe3c669a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -374,6 +374,25 @@ long do_faccessat(int dfd, const char __user *filename, int mode)
 				override_cred->cap_permitted;
 	}
 
+	/*
+	 * The new set of credentials can *only* be used in
+	 * task-synchronous circumstances, and does not need
+	 * RCU freeing, unless somebody then takes a separate
+	 * reference to it.
+	 *
+	 * NOTE! This is _only_ true because this credential
+	 * is used purely for override_creds() that installs
+	 * it as the subjective cred. Other threads will be
+	 * accessing ->real_cred, not the subjective cred.
+	 *
+	 * If somebody _does_ make a copy of this (using the
+	 * 'get_current_cred()' function), that will clear the
+	 * non_rcu field, because now that other user may be
+	 * expecting RCU freeing. But normal thread-synchronous
+	 * cred accesses will keep things non-RCY.
+	 */
+	override_cred->non_rcu = 1;
+
 	old_cred = override_creds(override_cred);
 retry:
 	res = user_path_at(dfd, filename, lookup_flags, &path);
diff --git a/include/Kbuild b/include/Kbuild
index 7e9f1acb9dd5..c38f0d46b267 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -31,7 +31,7 @@ header-test-			+= acpi/platform/acintel.h
 header-test-			+= acpi/platform/aclinux.h
 header-test-			+= acpi/platform/aclinuxex.h
 header-test-			+= acpi/processor.h
-header-test-			+= clocksource/hyperv_timer.h
+header-test-$(CONFIG_X86)	+= clocksource/hyperv_timer.h
 header-test-			+= clocksource/timer-sp804.h
 header-test-			+= crypto/cast_common.h
 header-test-			+= crypto/internal/cryptouser.h
@@ -246,6 +246,7 @@ header-test-			+= linux/intel-pti.h
 header-test-			+= linux/intel-svm.h
 header-test-			+= linux/interconnect-provider.h
 header-test-			+= linux/ioc3.h
+header-test-$(CONFIG_BLOCK)	+= linux/iomap.h
 header-test-			+= linux/ipack.h
 header-test-			+= linux/irq_cpustat.h
 header-test-			+= linux/irq_poll.h
@@ -454,9 +455,6 @@ header-test-			+= linux/phy/omap_control_phy.h
 header-test-			+= linux/phy/tegra/xusb.h
 header-test-			+= linux/phy/ulpi_phy.h
 header-test-			+= linux/phy_fixed.h
-header-test-			+= linux/pinctrl/pinconf-generic.h
-header-test-			+= linux/pinctrl/pinconf.h
-header-test-			+= linux/pinctrl/pinctrl.h
 header-test-			+= linux/pipe_fs_i.h
 header-test-			+= linux/pktcdvd.h
 header-test-			+= linux/pl320-ipc.h
@@ -905,10 +903,11 @@ header-test-			+= net/netfilter/nf_nat_redirect.h
 header-test-			+= net/netfilter/nf_queue.h
 header-test-			+= net/netfilter/nf_reject.h
 header-test-			+= net/netfilter/nf_synproxy.h
-header-test-			+= net/netfilter/nf_tables.h
-header-test-			+= net/netfilter/nf_tables_core.h
-header-test-			+= net/netfilter/nf_tables_ipv4.h
+header-test-$(CONFIG_NF_TABLES)	+= net/netfilter/nf_tables.h
+header-test-$(CONFIG_NF_TABLES)	+= net/netfilter/nf_tables_core.h
+header-test-$(CONFIG_NF_TABLES)	+= net/netfilter/nf_tables_ipv4.h
 header-test-			+= net/netfilter/nf_tables_ipv6.h
+header-test-$(CONFIG_NF_TABLES)	+= net/netfilter/nf_tables_offload.h
 header-test-			+= net/netfilter/nft_fib.h
 header-test-			+= net/netfilter/nft_meta.h
 header-test-			+= net/netfilter/nft_reject.h
@@ -949,7 +948,6 @@ header-test-			+= pcmcia/ds.h
 header-test-			+= rdma/ib.h
 header-test-			+= rdma/iw_portmap.h
 header-test-			+= rdma/opa_port_info.h
-header-test-			+= rdma/rdma_counter.h
 header-test-			+= rdma/rdmavt_cq.h
 header-test-			+= rdma/restrack.h
 header-test-			+= rdma/signature.h
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 689a58231288..12811091fd50 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -181,6 +181,7 @@ struct blkcg_policy {
 
 extern struct blkcg blkcg_root;
 extern struct cgroup_subsys_state * const blkcg_root_css;
+extern bool blkcg_debug_stats;
 
 struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
 				      struct request_queue *q, bool update_hint);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index feff3fe4467e..1b1fa1557e68 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -311,6 +311,7 @@ enum req_flag_bits {
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
 	__REQ_BACKGROUND,	/* background IO */
 	__REQ_NOWAIT,           /* Don't wait if request will block */
+	__REQ_NOWAIT_INLINE,	/* Return would-block error inline */
 	/*
 	 * When a shared kthread needs to issue a bio for a cgroup, doing
 	 * so synchronously can lead to priority inversions as the kthread
@@ -345,6 +346,7 @@ enum req_flag_bits {
 #define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
 #define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
 #define REQ_NOWAIT		(1ULL << __REQ_NOWAIT)
+#define REQ_NOWAIT_INLINE	(1ULL << __REQ_NOWAIT_INLINE)
 #define REQ_CGROUP_PUNT		(1ULL << __REQ_CGROUP_PUNT)
 
 #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
@@ -418,12 +420,13 @@ static inline int op_stat_group(unsigned int op)
 
 typedef unsigned int blk_qc_t;
 #define BLK_QC_T_NONE		-1U
+#define BLK_QC_T_EAGAIN		-2U
 #define BLK_QC_T_SHIFT		16
 #define BLK_QC_T_INTERNAL	(1U << 31)
 
 static inline bool blk_qc_t_valid(blk_qc_t cookie)
 {
-	return cookie != BLK_QC_T_NONE;
+	return cookie != BLK_QC_T_NONE && cookie != BLK_QC_T_EAGAIN;
 }
 
 static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index e8579412ad21..d7ee4c6bad48 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -170,3 +170,5 @@
 #else
 #define __diag_GCC_8(s)
 #endif
+
+#define __no_fgcse __attribute__((optimize("-fno-gcse")))
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 8aaf7cd026b0..f0fd5636fddb 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -116,9 +116,14 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 	".pushsection .discard.unreachable\n\t"				\
 	".long 999b - .\n\t"						\
 	".popsection\n\t"
+
+/* Annotate a C jump table to allow objtool to follow the code flow */
+#define __annotate_jump_table __section(".rodata..c_jump_table")
+
 #else
 #define annotate_reachable()
 #define annotate_unreachable()
+#define __annotate_jump_table
 #endif
 
 #ifndef ASM_UNREACHABLE
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 095d55c3834d..599c27b56c29 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -189,6 +189,10 @@ struct ftrace_likely_data {
 #define asm_volatile_goto(x...) asm goto(x)
 #endif
 
+#ifndef __no_fgcse
+# define __no_fgcse
+#endif
+
 /* Are two types/vars the same type (ignoring qualifiers)? */
 #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
 
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 6b6c7396a584..cb732643471b 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -50,7 +50,6 @@ struct cn_dev {
 
 	u32 seq, groups;
 	struct sock *nls;
-	void (*input) (struct sk_buff *skb);
 
 	struct cn_queue_dev *cbdev;
 };
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 7eb43a038330..f7a30e0099be 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -145,7 +145,11 @@ struct cred {
 	struct user_struct *user;	/* real user ID subscription */
 	struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
 	struct group_info *group_info;	/* supplementary groups for euid/fsgid */
-	struct rcu_head	rcu;		/* RCU deletion hook */
+	/* RCU deletion */
+	union {
+		int non_rcu;			/* Can we skip RCU deletion? */
+		struct rcu_head	rcu;		/* RCU deletion hook */
+	};
 } __randomize_layout;
 
 extern void __put_cred(struct cred *);
@@ -246,6 +250,7 @@ static inline const struct cred *get_cred(const struct cred *cred)
 	if (!cred)
 		return cred;
 	validate_creds(cred);
+	nonconst_cred->non_rcu = 0;
 	return get_new_cred(nonconst_cred);
 }
 
@@ -257,6 +262,7 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred)
 	if (!atomic_inc_not_zero(&nonconst_cred->usage))
 		return NULL;
 	validate_creds(cred);
+	nonconst_cred->non_rcu = 0;
 	return cred;
 }
 
diff --git a/include/linux/device.h b/include/linux/device.h
index c330b75c6c57..6717adee33f0 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -915,6 +915,8 @@ struct dev_links_info {
  * 		This identifies the device type and carries type-specific
  * 		information.
  * @mutex:	Mutex to synchronize calls to its driver.
+ * @lockdep_mutex: An optional debug lock that a subsystem can use as a
+ * 		peer lock to gain localized lockdep coverage of the device_lock.
  * @bus:	Type of bus device is on.
  * @driver:	Which driver has allocated this
  * @platform_data: Platform data specific to the device.
@@ -998,6 +1000,9 @@ struct device {
 					   core doesn't touch it */
 	void		*driver_data;	/* Driver data, set and get with
 					   dev_set_drvdata/dev_get_drvdata */
+#ifdef CONFIG_PROVE_LOCKING
+	struct mutex		lockdep_mutex;
+#endif
 	struct mutex		mutex;	/* mutex to synchronize calls to
 					 * its driver.
 					 */
@@ -1383,6 +1388,7 @@ extern int (*platform_notify_remove)(struct device *dev);
  */
 extern struct device *get_device(struct device *dev);
 extern void put_device(struct device *dev);
+extern bool kill_device(struct device *dev);
 
 #ifdef CONFIG_DEVTMPFS
 extern int devtmpfs_create_node(struct device *dev);
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index b7338702592a..adf993a3bd58 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -32,6 +32,15 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 }
 #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
 
+#ifdef CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED
+bool force_dma_unencrypted(struct device *dev);
+#else
+static inline bool force_dma_unencrypted(struct device *dev)
+{
+	return false;
+}
+#endif /* CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED */
+
 /*
  * If memory encryption is supported, phys_to_dma will set the memory encryption
  * bit in the DMA address, and dma_to_phys will clear it.  The raw __phys_to_dma
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 8d13e28a8e07..f7d1eea32c78 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -679,6 +679,20 @@ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask)
 	return dma_set_mask_and_coherent(dev, mask);
 }
 
+/**
+ * dma_addressing_limited - return if the device is addressing limited
+ * @dev:	device to check
+ *
+ * Return %true if the devices DMA mask is too small to address all memory in
+ * the system, else %false.  Lack of addressing bits is the prime reason for
+ * bounce buffering, but might not be the only one.
+ */
+static inline bool dma_addressing_limited(struct device *dev)
+{
+	return min_not_zero(dma_get_mask(dev), dev->bus_dma_mask) <
+			    dma_get_required_mask(dev);
+}
+
 #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 		const struct iommu_ops *iommu, bool coherent);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 17cd0078377c..1dd014c9c87b 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -45,7 +45,6 @@ struct elevator_mq_ops {
 	struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
 	bool (*has_work)(struct blk_mq_hw_ctx *);
 	void (*completed_request)(struct request *, u64);
-	void (*started_request)(struct request *);
 	void (*requeue_request)(struct request *);
 	struct request *(*former_request)(struct request_queue *, struct request *);
 	struct request *(*next_request)(struct request_queue *, struct request *);
diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h
index 7cab74d66f85..bdd09fd2520c 100644
--- a/include/linux/fs_pin.h
+++ b/include/linux/fs_pin.h
@@ -20,6 +20,5 @@ static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *))
 }
 
 void pin_remove(struct fs_pin *);
-void pin_insert_group(struct fs_pin *, struct vfsmount *, struct hlist_head *);
 void pin_insert(struct fs_pin *, struct vfsmount *);
 void pin_kill(struct fs_pin *);
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 781b96ac706f..a0637abffee8 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -155,6 +155,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
 void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
 void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 	unsigned long start_pfn);
+bool has_iova_flush_queue(struct iova_domain *iovad);
 int init_iova_flush_queue(struct iova_domain *iovad,
 			  iova_flush_cb flush_cb, iova_entry_dtor entry_dtor);
 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
@@ -235,6 +236,11 @@ static inline void init_iova_domain(struct iova_domain *iovad,
 {
 }
 
+static inline bool has_iova_flush_queue(struct iova_domain *iovad)
+{
+	return false;
+}
+
 static inline int init_iova_flush_queue(struct iova_domain *iovad,
 					iova_flush_cb flush_cb,
 					iova_entry_dtor entry_dtor)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c5da875f19e3..5c5b5867024c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -318,6 +318,7 @@ struct kvm_vcpu {
 	} spin_loop;
 #endif
 	bool preempted;
+	bool ready;
 	struct kvm_vcpu_arch arch;
 	struct dentry *debugfs_dentry;
 };
diff --git a/include/linux/msi.h b/include/linux/msi.h
index d48e919d55ae..8ad679e9d9c0 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -64,6 +64,10 @@ struct ti_sci_inta_msi_desc {
  * @msg:	The last set MSI message cached for reuse
  * @affinity:	Optional pointer to a cpu affinity mask for this descriptor
  *
+ * @write_msi_msg:	Callback that may be called when the MSI message
+ *			address or data changes
+ * @write_msi_msg_data:	Data parameter for the callback.
+ *
  * @masked:	[PCI MSI/X] Mask bits
  * @is_msix:	[PCI MSI/X] True if MSI-X
  * @multiple:	[PCI MSI/X] log2 num of messages allocated
@@ -90,6 +94,9 @@ struct msi_desc {
 	const void			*iommu_cookie;
 #endif
 
+	void (*write_msi_msg)(struct msi_desc *entry, void *data);
+	void *write_msi_msg_data;
+
 	union {
 		/* PCI MSI/X specific data */
 		struct {
@@ -100,6 +107,7 @@ struct msi_desc {
 				u8	multi_cap	: 3;
 				u8	maskbit		: 1;
 				u8	is_64		: 1;
+				u8	is_virtual	: 1;
 				u16	entry_nr;
 				unsigned default_irq;
 			} msi_attrib;
diff --git a/include/linux/netfilter/nf_conntrack_h323_asn1.h b/include/linux/netfilter/nf_conntrack_h323_asn1.h
index 91d6275292a5..19df78341fb3 100644
--- a/include/linux/netfilter/nf_conntrack_h323_asn1.h
+++ b/include/linux/netfilter/nf_conntrack_h323_asn1.h
@@ -1,7 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
- * ip_conntrack_h323_asn1.h - BER and PER decoding library for H.323
- * 			      conntrack/NAT module.
+ * BER and PER decoding library for H.323 conntrack/NAT module.
  *
  * Copyright (c) 2006 by Jing Min Zhao <zhaojingmin@users.sourceforge.net>
  *
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index 56a92e3ae3ae..8c13538aeffe 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -58,9 +58,11 @@
 
 #include <linux/completion.h>
 #include <linux/device.h>
+#include <linux/interrupt.h>
 
 struct ntb_client;
 struct ntb_dev;
+struct ntb_msi;
 struct pci_dev;
 
 /**
@@ -205,7 +207,7 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops)
 }
 
 /**
- * struct ntb_ctx_ops - ntb device operations
+ * struct ntb_dev_ops - ntb device operations
  * @port_number:	See ntb_port_number().
  * @peer_port_count:	See ntb_peer_port_count().
  * @peer_port_number:	See ntb_peer_port_number().
@@ -404,7 +406,7 @@ struct ntb_client {
 #define drv_ntb_client(__drv) container_of((__drv), struct ntb_client, drv)
 
 /**
- * struct ntb_device - ntb device
+ * struct ntb_dev - ntb device
  * @dev:		Linux device object.
  * @pdev:		PCI device entry of the ntb.
  * @topo:		Detected topology of the ntb.
@@ -426,6 +428,10 @@ struct ntb_dev {
 	spinlock_t			ctx_lock;
 	/* block unregister until device is fully released */
 	struct completion		released;
+
+#ifdef CONFIG_NTB_MSI
+	struct ntb_msi *msi;
+#endif
 };
 #define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev)
 
@@ -616,7 +622,6 @@ static inline int ntb_port_number(struct ntb_dev *ntb)
 
 	return ntb->ops->port_number(ntb);
 }
-
 /**
  * ntb_peer_port_count() - get the number of peer device ports
  * @ntb:	NTB device context.
@@ -654,6 +659,58 @@ static inline int ntb_peer_port_number(struct ntb_dev *ntb, int pidx)
 }
 
 /**
+ * ntb_logical_port_number() - get the logical port number of the local port
+ * @ntb:	NTB device context.
+ *
+ * The Logical Port Number is defined to be a unique number for each
+ * port starting from zero through to the number of ports minus one.
+ * This is in contrast to the Port Number where each port can be assigned
+ * any unique physical number by the hardware.
+ *
+ * The logical port number is useful for calculating the resource indexes
+ * used by peers.
+ *
+ * Return: the logical port number or negative value indicating an error
+ */
+static inline int ntb_logical_port_number(struct ntb_dev *ntb)
+{
+	int lport = ntb_port_number(ntb);
+	int pidx;
+
+	if (lport < 0)
+		return lport;
+
+	for (pidx = 0; pidx < ntb_peer_port_count(ntb); pidx++)
+		if (lport <= ntb_peer_port_number(ntb, pidx))
+			return pidx;
+
+	return pidx;
+}
+
+/**
+ * ntb_peer_logical_port_number() - get the logical peer port by given index
+ * @ntb:	NTB device context.
+ * @pidx:	Peer port index.
+ *
+ * The Logical Port Number is defined to be a unique number for each
+ * port starting from zero through to the number of ports minus one.
+ * This is in contrast to the Port Number where each port can be assigned
+ * any unique physical number by the hardware.
+ *
+ * The logical port number is useful for calculating the resource indexes
+ * used by peers.
+ *
+ * Return: the peer's logical port number or negative value indicating an error
+ */
+static inline int ntb_peer_logical_port_number(struct ntb_dev *ntb, int pidx)
+{
+	if (ntb_peer_port_number(ntb, pidx) < ntb_port_number(ntb))
+		return pidx;
+	else
+		return pidx + 1;
+}
+
+/**
  * ntb_peer_port_idx() - get the peer device port index by given port number
  * @ntb:	NTB device context.
  * @port:	Peer port number.
@@ -1506,4 +1563,141 @@ static inline int ntb_peer_msg_write(struct ntb_dev *ntb, int pidx, int midx,
 	return ntb->ops->peer_msg_write(ntb, pidx, midx, msg);
 }
 
+/**
+ * ntb_peer_resource_idx() - get a resource index for a given peer idx
+ * @ntb:	NTB device context.
+ * @pidx:	Peer port index.
+ *
+ * When constructing a graph of peers, each remote peer must use a different
+ * resource index (mw, doorbell, etc) to communicate with each other
+ * peer.
+ *
+ * In a two peer system, this function should always return 0 such that
+ * resource 0 points to the remote peer on both ports.
+ *
+ * In a 5 peer system, this function will return the following matrix
+ *
+ * pidx \ port    0    1    2    3    4
+ * 0              0    0    1    2    3
+ * 1              0    1    1    2    3
+ * 2              0    1    2    2    3
+ * 3              0    1    2    3    3
+ *
+ * For example, if this function is used to program peer's memory
+ * windows, port 0 will program MW 0 on all it's peers to point to itself.
+ * port 1 will program MW 0 in port 0 to point to itself and MW 1 on all
+ * other ports. etc.
+ *
+ * For the legacy two host case, ntb_port_number() and ntb_peer_port_number()
+ * both return zero and therefore this function will always return zero.
+ * So MW 0 on each host would be programmed to point to the other host.
+ *
+ * Return: the resource index to use for that peer.
+ */
+static inline int ntb_peer_resource_idx(struct ntb_dev *ntb, int pidx)
+{
+	int local_port, peer_port;
+
+	if (pidx >= ntb_peer_port_count(ntb))
+		return -EINVAL;
+
+	local_port = ntb_logical_port_number(ntb);
+	peer_port = ntb_peer_logical_port_number(ntb, pidx);
+
+	if (peer_port < local_port)
+		return local_port - 1;
+	else
+		return local_port;
+}
+
+/**
+ * ntb_peer_highest_mw_idx() - get a memory window index for a given peer idx
+ *	using the highest index memory windows first
+ *
+ * @ntb:	NTB device context.
+ * @pidx:	Peer port index.
+ *
+ * Like ntb_peer_resource_idx(), except it returns indexes starting with
+ * last memory window index.
+ *
+ * Return: the resource index to use for that peer.
+ */
+static inline int ntb_peer_highest_mw_idx(struct ntb_dev *ntb, int pidx)
+{
+	int ret;
+
+	ret = ntb_peer_resource_idx(ntb, pidx);
+	if (ret < 0)
+		return ret;
+
+	return ntb_mw_count(ntb, pidx) - ret - 1;
+}
+
+struct ntb_msi_desc {
+	u32 addr_offset;
+	u32 data;
+};
+
+#ifdef CONFIG_NTB_MSI
+
+int ntb_msi_init(struct ntb_dev *ntb, void (*desc_changed)(void *ctx));
+int ntb_msi_setup_mws(struct ntb_dev *ntb);
+void ntb_msi_clear_mws(struct ntb_dev *ntb);
+int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler,
+				  irq_handler_t thread_fn,
+				  const char *name, void *dev_id,
+				  struct ntb_msi_desc *msi_desc);
+void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id);
+int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer,
+			 struct ntb_msi_desc *desc);
+int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer,
+		      struct ntb_msi_desc *desc,
+		      phys_addr_t *msi_addr);
+
+#else /* not CONFIG_NTB_MSI */
+
+static inline int ntb_msi_init(struct ntb_dev *ntb,
+			       void (*desc_changed)(void *ctx))
+{
+	return -EOPNOTSUPP;
+}
+static inline int ntb_msi_setup_mws(struct ntb_dev *ntb)
+{
+	return -EOPNOTSUPP;
+}
+static inline void ntb_msi_clear_mws(struct ntb_dev *ntb) {}
+static inline int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb,
+						irq_handler_t handler,
+						irq_handler_t thread_fn,
+						const char *name, void *dev_id,
+						struct ntb_msi_desc *msi_desc)
+{
+	return -EOPNOTSUPP;
+}
+static inline void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq,
+				     void *dev_id) {}
+static inline int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer,
+				       struct ntb_msi_desc *desc)
+{
+	return -EOPNOTSUPP;
+}
+static inline int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer,
+				    struct ntb_msi_desc *desc,
+				    phys_addr_t *msi_addr)
+{
+	return -EOPNOTSUPP;
+
+}
+
+#endif /* CONFIG_NTB_MSI */
+
+static inline int ntbm_msi_request_irq(struct ntb_dev *ntb,
+				       irq_handler_t handler,
+				       const char *name, void *dev_id,
+				       struct ntb_msi_desc *msi_desc)
+{
+	return ntbm_msi_request_threaded_irq(ntb, handler, NULL, name,
+					     dev_id, msi_desc);
+}
+
 #endif
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 2972793e3028..9e700d9f9f28 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1412,6 +1412,15 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode,
 #define PCI_IRQ_MSI		(1 << 1) /* Allow MSI interrupts */
 #define PCI_IRQ_MSIX		(1 << 2) /* Allow MSI-X interrupts */
 #define PCI_IRQ_AFFINITY	(1 << 3) /* Auto-assign affinity */
+
+/*
+ * Virtual interrupts allow for more interrupts to be allocated
+ * than the device has interrupts for. These are not programmed
+ * into the device's MSI-X table and must be handled by some
+ * other driver means.
+ */
+#define PCI_IRQ_VIRTUAL		(1 << 4)
+
 #define PCI_IRQ_ALL_TYPES \
 	(PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX)
 
diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index b0fb1446fe04..6c8512d3be88 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -19,6 +19,7 @@ enum hk_flags {
 DECLARE_STATIC_KEY_FALSE(housekeeping_overridden);
 extern int housekeeping_any_cpu(enum hk_flags flags);
 extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags);
+extern bool housekeeping_enabled(enum hk_flags flags);
 extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags);
 extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags);
 extern void __init housekeeping_init(void);
@@ -35,6 +36,11 @@ static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags)
 	return cpu_possible_mask;
 }
 
+static inline bool housekeeping_enabled(enum hk_flags flags)
+{
+	return false;
+}
+
 static inline void housekeeping_affine(struct task_struct *t,
 				       enum hk_flags flags) { }
 static inline void housekeeping_init(void) { }
diff --git a/include/linux/wait.h b/include/linux/wait.h
index b6f77cf60dd7..30c515520fb2 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -127,6 +127,19 @@ static inline int waitqueue_active(struct wait_queue_head *wq_head)
 }
 
 /**
+ * wq_has_single_sleeper - check if there is only one sleeper
+ * @wq_head: wait queue head
+ *
+ * Returns true of wq_head has only one sleeper on the list.
+ *
+ * Please refer to the comment for waitqueue_active.
+ */
+static inline bool wq_has_single_sleeper(struct wait_queue_head *wq_head)
+{
+	return list_is_singular(&wq_head->head);
+}
+
+/**
  * wq_has_sleeper - check if there are any waiting processes
  * @wq_head: wait queue head
  *
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 88c27153a4bc..45850a8391d9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4170,7 +4170,7 @@ struct sta_opmode_info {
 	u8 rx_nss;
 };
 
-#define VENDOR_CMD_RAW_DATA ((const struct nla_policy *)ERR_PTR(-ENODATA))
+#define VENDOR_CMD_RAW_DATA ((const struct nla_policy *)(long)(-ENODATA))
 
 /**
  * struct wiphy_vendor_command - vendor command definition
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index db337299e81e..b16d21636d69 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -2,8 +2,8 @@
 #define _NET_FLOW_OFFLOAD_H
 
 #include <linux/kernel.h>
+#include <linux/list.h>
 #include <net/flow_dissector.h>
-#include <net/sch_generic.h>
 
 struct flow_match {
 	struct flow_dissector	*dissector;
@@ -249,6 +249,10 @@ enum flow_block_binder_type {
 	FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS,
 };
 
+struct flow_block {
+	struct list_head cb_list;
+};
+
 struct netlink_ext_ack;
 
 struct flow_block_offload {
@@ -256,29 +260,33 @@ struct flow_block_offload {
 	enum flow_block_binder_type binder_type;
 	bool block_shared;
 	struct net *net;
+	struct flow_block *block;
 	struct list_head cb_list;
 	struct list_head *driver_block_list;
 	struct netlink_ext_ack *extack;
 };
 
+enum tc_setup_type;
+typedef int flow_setup_cb_t(enum tc_setup_type type, void *type_data,
+			    void *cb_priv);
+
 struct flow_block_cb {
 	struct list_head	driver_list;
 	struct list_head	list;
-	struct net		*net;
-	tc_setup_cb_t		*cb;
+	flow_setup_cb_t		*cb;
 	void			*cb_ident;
 	void			*cb_priv;
 	void			(*release)(void *cb_priv);
 	unsigned int		refcnt;
 };
 
-struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb,
+struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb,
 					  void *cb_ident, void *cb_priv,
 					  void (*release)(void *cb_priv));
 void flow_block_cb_free(struct flow_block_cb *block_cb);
 
-struct flow_block_cb *flow_block_cb_lookup(struct flow_block_offload *offload,
-					   tc_setup_cb_t *cb, void *cb_ident);
+struct flow_block_cb *flow_block_cb_lookup(struct flow_block *block,
+					   flow_setup_cb_t *cb, void *cb_ident);
 
 void *flow_block_cb_priv(struct flow_block_cb *block_cb);
 void flow_block_cb_incref(struct flow_block_cb *block_cb);
@@ -296,11 +304,12 @@ static inline void flow_block_cb_remove(struct flow_block_cb *block_cb,
 	list_move(&block_cb->list, &offload->cb_list);
 }
 
-bool flow_block_cb_is_busy(tc_setup_cb_t *cb, void *cb_ident,
+bool flow_block_cb_is_busy(flow_setup_cb_t *cb, void *cb_ident,
 			   struct list_head *driver_block_list);
 
 int flow_block_cb_setup_simple(struct flow_block_offload *f,
-			       struct list_head *driver_list, tc_setup_cb_t *cb,
+			       struct list_head *driver_list,
+			       flow_setup_cb_t *cb,
 			       void *cb_ident, void *cb_priv, bool ingress_only);
 
 enum flow_cls_command {
@@ -333,4 +342,9 @@ flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd)
 	return flow_cmd->rule;
 }
 
+static inline void flow_block_init(struct flow_block *flow_block)
+{
+	INIT_LIST_HEAD(&flow_block->cb_list);
+}
+
 #endif /* _NET_FLOW_OFFLOAD_H */
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 93ce6b0daaba..573429be4d59 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -76,6 +76,11 @@ struct nf_conntrack_expect_policy {
 #define NF_CT_EXPECT_CLASS_DEFAULT	0
 #define NF_CT_EXPECT_MAX_CNT		255
 
+/* Allow to reuse expectations with the same tuples from different master
+ * conntracks.
+ */
+#define NF_CT_EXP_F_SKIP_MASTER	0x1
+
 int nf_conntrack_expect_pernet_init(struct net *net);
 void nf_conntrack_expect_pernet_fini(struct net *net);
 
@@ -122,10 +127,11 @@ void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t,
 		       u_int8_t, const __be16 *, const __be16 *);
 void nf_ct_expect_put(struct nf_conntrack_expect *exp);
 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, 
-				u32 portid, int report);
-static inline int nf_ct_expect_related(struct nf_conntrack_expect *expect)
+				u32 portid, int report, unsigned int flags);
+static inline int nf_ct_expect_related(struct nf_conntrack_expect *expect,
+				       unsigned int flags)
 {
-	return nf_ct_expect_related_report(expect, 0, 0);
+	return nf_ct_expect_related_report(expect, 0, 0, flags);
 }
 
 #endif /*_NF_CONNTRACK_EXPECT_H*/
diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h
index 8f00125b06f4..44513b93bd55 100644
--- a/include/net/netfilter/nf_conntrack_synproxy.h
+++ b/include/net/netfilter/nf_conntrack_synproxy.h
@@ -68,6 +68,7 @@ struct synproxy_options {
 	u8				options;
 	u8				wscale;
 	u16				mss;
+	u16				mss_encode;
 	u32				tsval;
 	u32				tsecr;
 };
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 35dfdd9f69b3..9b624566b82d 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -11,6 +11,7 @@
 #include <linux/rhashtable.h>
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netlink.h>
+#include <net/flow_offload.h>
 
 struct module;
 
@@ -951,7 +952,7 @@ struct nft_stats {
  *	@stats: per-cpu chain stats
  *	@chain: the chain
  *	@dev_name: device name that this base chain is attached to (if any)
- *	@cb_list: list of flow block callbacks (for hardware offload)
+ *	@flow_block: flow block (for hardware offload)
  */
 struct nft_base_chain {
 	struct nf_hook_ops		ops;
@@ -961,7 +962,7 @@ struct nft_base_chain {
 	struct nft_stats __percpu	*stats;
 	struct nft_chain		chain;
 	char 				dev_name[IFNAMSIZ];
-	struct list_head		cb_list;
+	struct flow_block		flow_block;
 };
 
 static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain)
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 841faadceb6e..e429809ca90d 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -6,7 +6,6 @@
 #include <linux/workqueue.h>
 #include <net/sch_generic.h>
 #include <net/act_api.h>
-#include <net/flow_offload.h>
 #include <net/net_namespace.h>
 
 /* TC action not accessible from user space */
@@ -126,14 +125,14 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
 }
 
 static inline
-int tc_setup_cb_block_register(struct tcf_block *block, tc_setup_cb_t *cb,
+int tc_setup_cb_block_register(struct tcf_block *block, flow_setup_cb_t *cb,
 			       void *cb_priv)
 {
 	return 0;
 }
 
 static inline
-void tc_setup_cb_block_unregister(struct tcf_block *block, tc_setup_cb_t *cb,
+void tc_setup_cb_block_unregister(struct tcf_block *block, flow_setup_cb_t *cb,
 				  void *cb_priv)
 {
 }
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 855167bbc372..6b6b01234dd9 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -15,6 +15,7 @@
 #include <linux/mutex.h>
 #include <net/gen_stats.h>
 #include <net/rtnetlink.h>
+#include <net/flow_offload.h>
 
 struct Qdisc_ops;
 struct qdisc_walker;
@@ -22,9 +23,6 @@ struct tcf_walker;
 struct module;
 struct bpf_flow_keys;
 
-typedef int tc_setup_cb_t(enum tc_setup_type type,
-			  void *type_data, void *cb_priv);
-
 typedef int tc_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv,
 				    enum tc_setup_type type, void *type_data);
 
@@ -313,7 +311,7 @@ struct tcf_proto_ops {
 	void			(*walk)(struct tcf_proto *tp,
 					struct tcf_walker *arg, bool rtnl_held);
 	int			(*reoffload)(struct tcf_proto *tp, bool add,
-					     tc_setup_cb_t *cb, void *cb_priv,
+					     flow_setup_cb_t *cb, void *cb_priv,
 					     struct netlink_ext_ack *extack);
 	void			(*bind_class)(void *, u32, unsigned long);
 	void *			(*tmplt_create)(struct net *net,
@@ -401,7 +399,7 @@ struct tcf_block {
 	refcount_t refcnt;
 	struct net *net;
 	struct Qdisc *q;
-	struct list_head cb_list;
+	struct flow_block flow_block;
 	struct list_head owner_list;
 	bool keep_dst;
 	unsigned int offloadcnt; /* Number of oddloaded filters */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f42d300f0cfa..e5cf514ba118 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1709,6 +1709,11 @@ static inline struct sk_buff *tcp_rtx_queue_head(const struct sock *sk)
 	return skb_rb_first(&sk->tcp_rtx_queue);
 }
 
+static inline struct sk_buff *tcp_rtx_queue_tail(const struct sock *sk)
+{
+	return skb_rb_last(&sk->tcp_rtx_queue);
+}
+
 static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
 {
 	return skb_peek(&sk->sk_write_queue);
diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h
index c50fb297e265..dc14b52577f7 100644
--- a/include/scsi/libfcoe.h
+++ b/include/scsi/libfcoe.h
@@ -31,7 +31,7 @@
  * FIP tunable parameters.
  */
 #define FCOE_CTLR_START_DELAY	2000	/* mS after first adv. to choose FCF */
-#define FCOE_CTRL_SOL_TOV	2000	/* min. solicitation interval (mS) */
+#define FCOE_CTLR_SOL_TOV	2000	/* min. solicitation interval (mS) */
 #define FCOE_CTLR_FCF_LIMIT	20	/* max. number of FCF entries */
 #define FCOE_CTLR_VN2VN_LOGIN_LIMIT 3	/* max. VN2VN rport login retries */
 
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index a5fcdad4a03e..cc139dbd71e5 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -369,6 +369,8 @@ struct scsi_host_template {
 	 */
 	unsigned long dma_boundary;
 
+	unsigned long virt_boundary_mask;
+
 	/*
 	 * This specifies "machine infinity" for host templates which don't
 	 * limit the transfer size.  Note this limit represents an absolute
@@ -587,6 +589,7 @@ struct Scsi_Host {
 	unsigned int max_sectors;
 	unsigned int max_segment_size;
 	unsigned long dma_boundary;
+	unsigned long virt_boundary_mask;
 	/*
 	 * In scsi-mq mode, the number of hardware queues supported by the LLD.
 	 *
diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h
index 3f9d6b6a5691..c1036d16ed03 100644
--- a/include/soc/fsl/qe/qe.h
+++ b/include/soc/fsl/qe/qe.h
@@ -259,7 +259,7 @@ static inline int qe_alive_during_sleep(void)
 
 /* Structure that defines QE firmware binary files.
  *
- * See Documentation/powerpc/qe_firmware.txt for a description of these
+ * See Documentation/powerpc/qe_firmware.rst for a description of these
  * fields.
  */
 struct qe_firmware {
diff --git a/include/sound/compress_driver.h b/include/sound/compress_driver.h
index c5188ff724d1..bc88d6f964da 100644
--- a/include/sound/compress_driver.h
+++ b/include/sound/compress_driver.h
@@ -173,10 +173,7 @@ static inline void snd_compr_drain_notify(struct snd_compr_stream *stream)
 	if (snd_BUG_ON(!stream))
 		return;
 
-	if (stream->direction == SND_COMPRESS_PLAYBACK)
-		stream->runtime->state = SNDRV_PCM_STATE_SETUP;
-	else
-		stream->runtime->state = SNDRV_PCM_STATE_PREPARED;
+	stream->runtime->state = SNDRV_PCM_STATE_SETUP;
 
 	wake_up(&stream->runtime->sleep);
 }
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a7c19540ce21..5e3f12d5359e 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -116,7 +116,7 @@ struct kvm_irq_level {
 	 * ACPI gsi notion of irq.
 	 * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
 	 * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
-	 * For ARM: See Documentation/virtual/kvm/api.txt
+	 * For ARM: See Documentation/virt/kvm/api.txt
 	 */
 	union {
 		__u32 irq;
@@ -1086,7 +1086,7 @@ struct kvm_xen_hvm_config {
  *
  * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
  * the irqfd to operate in resampling mode for level triggered interrupt
- * emulation.  See Documentation/virtual/kvm/api.txt.
+ * emulation.  See Documentation/virt/kvm/api.txt.
  */
 #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 75758ec26c8b..beb9a9d0c00a 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2863,7 +2863,7 @@ enum nl80211_attrs {
 #define NL80211_HT_CAPABILITY_LEN		26
 #define NL80211_VHT_CAPABILITY_LEN		12
 #define NL80211_HE_MIN_CAPABILITY_LEN           16
-#define NL80211_HE_MAX_CAPABILITY_LEN           51
+#define NL80211_HE_MAX_CAPABILITY_LEN           54
 #define NL80211_MAX_NR_CIPHER_SUITES		5
 #define NL80211_MAX_NR_AKM_SUITES		2
 
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 9d9705ceda76..2427bc4d8eba 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -518,7 +518,13 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_RGBX444 v4l2_fourcc('R', 'X', '1', '2') /* 16  rrrrgggg bbbbxxxx */
 #define V4L2_PIX_FMT_ABGR444 v4l2_fourcc('A', 'B', '1', '2') /* 16  aaaabbbb ggggrrrr */
 #define V4L2_PIX_FMT_XBGR444 v4l2_fourcc('X', 'B', '1', '2') /* 16  xxxxbbbb ggggrrrr */
-#define V4L2_PIX_FMT_BGRA444 v4l2_fourcc('B', 'A', '1', '2') /* 16  bbbbgggg rrrraaaa */
+
+/*
+ * Originally this had 'BA12' as fourcc, but this clashed with the older
+ * V4L2_PIX_FMT_SGRBG12 which inexplicably used that same fourcc.
+ * So use 'GA12' instead for V4L2_PIX_FMT_BGRA444.
+ */
+#define V4L2_PIX_FMT_BGRA444 v4l2_fourcc('G', 'A', '1', '2') /* 16  bbbbgggg rrrraaaa */
 #define V4L2_PIX_FMT_BGRX444 v4l2_fourcc('B', 'X', '1', '2') /* 16  bbbbgggg rrrrxxxx */
 #define V4L2_PIX_FMT_RGB555  v4l2_fourcc('R', 'G', 'B', 'O') /* 16  RGB-5-5-5     */
 #define V4L2_PIX_FMT_ARGB555 v4l2_fourcc('A', 'R', '1', '5') /* 16  ARGB-1-5-5-5  */
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index dc0b682ec2d9..deff97217496 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -38,7 +38,7 @@ config PREEMPT_VOLUNTARY
 config PREEMPT
 	bool "Preemptible Kernel (Low-Latency Desktop)"
 	depends on !ARCH_NO_PREEMPT
-	select PREEMPT_COUNT
+	select PREEMPTION
 	select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
 	help
 	  This option reduces the latency of the kernel by making
@@ -55,7 +55,28 @@ config PREEMPT
 	  embedded system with latency requirements in the milliseconds
 	  range.
 
+config PREEMPT_RT
+	bool "Fully Preemptible Kernel (Real-Time)"
+	depends on EXPERT && ARCH_SUPPORTS_RT
+	select PREEMPTION
+	help
+	  This option turns the kernel into a real-time kernel by replacing
+	  various locking primitives (spinlocks, rwlocks, etc.) with
+	  preemptible priority-inheritance aware variants, enforcing
+	  interrupt threading and introducing mechanisms to break up long
+	  non-preemptible sections. This makes the kernel, except for very
+	  low level and critical code pathes (entry code, scheduler, low
+	  level interrupt handling) fully preemptible and brings most
+	  execution contexts under scheduler control.
+
+	  Select this if you are building a kernel for systems which
+	  require real-time guarantees.
+
 endchoice
 
 config PREEMPT_COUNT
        bool
+
+config PREEMPTION
+       bool
+       select PREEMPT_COUNT
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 16079550db6d..8191a7db2777 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1295,11 +1295,11 @@ bool bpf_opcode_in_insntable(u8 code)
  *
  * Decode and execute eBPF instructions.
  */
-static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
+static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
 {
 #define BPF_INSN_2_LBL(x, y)    [BPF_##x | BPF_##y] = &&x##_##y
 #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
-	static const void *jumptable[256] = {
+	static const void * const jumptable[256] __annotate_jump_table = {
 		[0 ... 255] = &&default_label,
 		/* Now overwrite non-defaults ... */
 		BPF_INSN_MAP(BPF_INSN_2_LBL, BPF_INSN_3_LBL),
@@ -1558,7 +1558,6 @@ out:
 		BUG_ON(1);
 		return 0;
 }
-STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */
 
 #define PROG_NAME(stack_size) __bpf_prog_run##stack_size
 #define DEFINE_BPF_PROG_RUN(stack_size) \
diff --git a/kernel/cred.c b/kernel/cred.c
index f9a0ce66c9c3..c0a4c12d38b2 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -144,7 +144,10 @@ void __put_cred(struct cred *cred)
 	BUG_ON(cred == current->cred);
 	BUG_ON(cred == current->real_cred);
 
-	call_rcu(&cred->rcu, put_cred_rcu);
+	if (cred->non_rcu)
+		put_cred_rcu(&cred->rcu);
+	else
+		call_rcu(&cred->rcu, put_cred_rcu);
 }
 EXPORT_SYMBOL(__put_cred);
 
@@ -261,6 +264,7 @@ struct cred *prepare_creds(void)
 	old = task->cred;
 	memcpy(new, old, sizeof(struct cred));
 
+	new->non_rcu = 0;
 	atomic_set(&new->usage, 1);
 	set_cred_subscribers(new, 0);
 	get_group_info(new->group_info);
@@ -544,7 +548,19 @@ const struct cred *override_creds(const struct cred *new)
 
 	validate_creds(old);
 	validate_creds(new);
-	get_cred(new);
+
+	/*
+	 * NOTE! This uses 'get_new_cred()' rather than 'get_cred()'.
+	 *
+	 * That means that we do not clear the 'non_rcu' flag, since
+	 * we are only installing the cred into the thread-synchronous
+	 * '->cred' pointer, not the '->real_cred' pointer that is
+	 * visible to other threads under RCU.
+	 *
+	 * Also note that we did validate_creds() manually, not depending
+	 * on the validation in 'get_cred()'.
+	 */
+	get_new_cred((struct cred *)new);
 	alter_cred_subscribers(new, 1);
 	rcu_assign_pointer(current->cred, new);
 	alter_cred_subscribers(old, -1);
@@ -681,6 +697,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
 	validate_creds(old);
 
 	*new = *old;
+	new->non_rcu = 0;
 	atomic_set(&new->usage, 1);
 	set_cred_subscribers(new, 0);
 	get_uid(new->user);
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 70f8f8d9200e..9decbba255fc 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -48,6 +48,9 @@ config ARCH_HAS_DMA_COHERENT_TO_PFN
 config ARCH_HAS_DMA_MMAP_PGPROT
 	bool
 
+config ARCH_HAS_FORCE_DMA_UNENCRYPTED
+	bool
+
 config DMA_NONCOHERENT_CACHE_SYNC
 	bool
 
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index b90e1aede743..59bdceea3737 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -23,14 +23,6 @@
 #define ARCH_ZONE_DMA_BITS 24
 #endif
 
-/*
- * For AMD SEV all DMA must be to unencrypted addresses.
- */
-static inline bool force_dma_unencrypted(void)
-{
-	return sev_active();
-}
-
 static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size)
 {
 	if (!dev->dma_mask) {
@@ -46,7 +38,7 @@ static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size)
 static inline dma_addr_t phys_to_dma_direct(struct device *dev,
 		phys_addr_t phys)
 {
-	if (force_dma_unencrypted())
+	if (force_dma_unencrypted(dev))
 		return __phys_to_dma(dev, phys);
 	return phys_to_dma(dev, phys);
 }
@@ -67,7 +59,7 @@ static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
 	if (dev->bus_dma_mask && dev->bus_dma_mask < dma_mask)
 		dma_mask = dev->bus_dma_mask;
 
-	if (force_dma_unencrypted())
+	if (force_dma_unencrypted(dev))
 		*phys_mask = __dma_to_phys(dev, dma_mask);
 	else
 		*phys_mask = dma_to_phys(dev, dma_mask);
@@ -159,7 +151,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
 	}
 
 	ret = page_address(page);
-	if (force_dma_unencrypted()) {
+	if (force_dma_unencrypted(dev)) {
 		set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
 		*dma_handle = __phys_to_dma(dev, page_to_phys(page));
 	} else {
@@ -192,7 +184,7 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
 		return;
 	}
 
-	if (force_dma_unencrypted())
+	if (force_dma_unencrypted(dev))
 		set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
 
 	if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) &&
@@ -242,12 +234,14 @@ void dma_direct_sync_sg_for_device(struct device *dev,
 	int i;
 
 	for_each_sg(sgl, sg, nents, i) {
-		if (unlikely(is_swiotlb_buffer(sg_phys(sg))))
-			swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length,
+		phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
+
+		if (unlikely(is_swiotlb_buffer(paddr)))
+			swiotlb_tbl_sync_single(dev, paddr, sg->length,
 					dir, SYNC_FOR_DEVICE);
 
 		if (!dev_is_dma_coherent(dev))
-			arch_sync_dma_for_device(dev, sg_phys(sg), sg->length,
+			arch_sync_dma_for_device(dev, paddr, sg->length,
 					dir);
 	}
 }
@@ -279,11 +273,13 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
 	int i;
 
 	for_each_sg(sgl, sg, nents, i) {
+		phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
+
 		if (!dev_is_dma_coherent(dev))
-			arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
-	
-		if (unlikely(is_swiotlb_buffer(sg_phys(sg))))
-			swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length, dir,
+			arch_sync_dma_for_cpu(dev, paddr, sg->length, dir);
+
+		if (unlikely(is_swiotlb_buffer(paddr)))
+			swiotlb_tbl_sync_single(dev, paddr, sg->length, dir,
 					SYNC_FOR_CPU);
 	}
 
@@ -407,11 +403,9 @@ int dma_direct_supported(struct device *dev, u64 mask)
 
 size_t dma_direct_max_mapping_size(struct device *dev)
 {
-	size_t size = SIZE_MAX;
-
 	/* If SWIOTLB is active, use its maximum mapping size */
-	if (is_swiotlb_active())
-		size = swiotlb_max_mapping_size(dev);
-
-	return size;
+	if (is_swiotlb_active() &&
+	    (dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE))
+		return swiotlb_max_mapping_size(dev);
+	return SIZE_MAX;
 }
diff --git a/kernel/exit.c b/kernel/exit.c
index a75b6a7f458a..4436158a6d30 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -720,6 +720,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 	if (group_dead)
 		kill_orphaned_pgrp(tsk->group_leader, NULL);
 
+	tsk->exit_state = EXIT_ZOMBIE;
 	if (unlikely(tsk->ptrace)) {
 		int sig = thread_group_leader(tsk) &&
 				thread_group_empty(tsk) &&
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 123ea07a3f3b..ccb28085b114 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -14,6 +14,12 @@ EXPORT_SYMBOL_GPL(housekeeping_overridden);
 static cpumask_var_t housekeeping_mask;
 static unsigned int housekeeping_flags;
 
+bool housekeeping_enabled(enum hk_flags flags)
+{
+	return !!(housekeeping_flags & flags);
+}
+EXPORT_SYMBOL_GPL(housekeeping_enabled);
+
 int housekeeping_any_cpu(enum hk_flags flags)
 {
 	if (static_branch_unlikely(&housekeeping_overridden))
diff --git a/kernel/smp.c b/kernel/smp.c
index 616d4d114847..7dbcb402c2fc 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -291,6 +291,14 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
 	WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
 		     && !oops_in_progress);
 
+	/*
+	 * When @wait we can deadlock when we interrupt between llist_add() and
+	 * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
+	 * csd_lock() on because the interrupt context uses the same csd
+	 * storage.
+	 */
+	WARN_ON_ONCE(!in_task());
+
 	csd = &csd_stack;
 	if (!wait) {
 		csd = this_cpu_ptr(&csd_data);
@@ -416,6 +424,14 @@ void smp_call_function_many(const struct cpumask *mask,
 	WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
 		     && !oops_in_progress && !early_boot_irqs_disabled);
 
+	/*
+	 * When @wait we can deadlock when we interrupt between llist_add() and
+	 * arch_send_call_function_ipi*(); when !@wait we can deadlock due to
+	 * csd_lock() on because the interrupt context uses the same csd
+	 * storage.
+	 */
+	WARN_ON_ONCE(!in_task());
+
 	/* Try to fastpath.  So, what's a CPU they want? Ignoring this one. */
 	cpu = cpumask_first_and(mask, cpu_online_mask);
 	if (cpu == this_cpu)
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index e6a02b274b73..f5440abb7532 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -226,12 +226,17 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size)
 		.store	= store,
 		.size	= size,
 	};
+	mm_segment_t fs;
 
 	/* Trace user stack if not a kernel thread */
 	if (current->flags & PF_KTHREAD)
 		return 0;
 
+	fs = get_fs();
+	set_fs(USER_DS);
 	arch_stack_walk_user(consume_entry, &c, task_pt_regs(current));
+	set_fs(fs);
+
 	return c.len;
 }
 #endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index bc6673ab3a08..5960e2980a8a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -353,23 +353,13 @@ config DEBUG_SECTION_MISMATCH
 	  which results in the code/data being placed in specific sections.
 	  The section mismatch analysis is always performed after a full
 	  kernel build, and enabling this option causes the following
-	  additional steps to occur:
+	  additional step to occur:
 	  - Add the option -fno-inline-functions-called-once to gcc commands.
 	    When inlining a function annotated with __init in a non-init
 	    function, we would lose the section information and thus
 	    the analysis would not catch the illegal reference.
 	    This option tells gcc to inline less (but it does result in
 	    a larger kernel).
-	  - Run the section mismatch analysis for each module/built-in.a file.
-	    When we run the section mismatch analysis on vmlinux.o, we
-	    lose valuable information about where the mismatch was
-	    introduced.
-	    Running the analysis for each module/built-in.a file
-	    tells where the mismatch happens much closer to the
-	    source. The drawback is that the same mismatch is
-	    reported at least twice.
-	  - Enable verbose reporting from modpost in order to help resolve
-	    the section mismatches that are reported.
 
 config SECTION_MISMATCH_WARN_ONLY
 	bool "Make section mismatch errors non-fatal"
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 154fa558bb90..5040fe43f4b4 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -6,7 +6,7 @@
 menuconfig NF_TABLES_BRIDGE
 	depends on BRIDGE && NETFILTER && NF_TABLES
 	select NETFILTER_FAMILY_BRIDGE
-	bool "Ethernet Bridge nf_tables support"
+	tristate "Ethernet Bridge nf_tables support"
 
 if NF_TABLES_BRIDGE
 
@@ -25,6 +25,8 @@ config NF_LOG_BRIDGE
 	tristate "Bridge packet logging"
 	select NF_LOG_COMMON
 
+endif # NF_TABLES_BRIDGE
+
 config NF_CONNTRACK_BRIDGE
 	tristate "IPv4/IPV6 bridge connection tracking support"
 	depends on NF_CONNTRACK
@@ -39,8 +41,6 @@ config NF_CONNTRACK_BRIDGE
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-endif # NF_TABLES_BRIDGE
-
 menuconfig BRIDGE_NF_EBTABLES
 	tristate "Ethernet Bridge tables (ebtables) support"
 	depends on BRIDGE && NETFILTER && NETFILTER_XTABLES
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 76f8db3841d7..d63b970784dc 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -165,7 +165,7 @@ void flow_rule_match_enc_opts(const struct flow_rule *rule,
 }
 EXPORT_SYMBOL(flow_rule_match_enc_opts);
 
-struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb,
+struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb,
 					  void *cb_ident, void *cb_priv,
 					  void (*release)(void *cb_priv))
 {
@@ -175,7 +175,6 @@ struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb,
 	if (!block_cb)
 		return ERR_PTR(-ENOMEM);
 
-	block_cb->net = net;
 	block_cb->cb = cb;
 	block_cb->cb_ident = cb_ident;
 	block_cb->cb_priv = cb_priv;
@@ -194,14 +193,13 @@ void flow_block_cb_free(struct flow_block_cb *block_cb)
 }
 EXPORT_SYMBOL(flow_block_cb_free);
 
-struct flow_block_cb *flow_block_cb_lookup(struct flow_block_offload *f,
-					   tc_setup_cb_t *cb, void *cb_ident)
+struct flow_block_cb *flow_block_cb_lookup(struct flow_block *block,
+					   flow_setup_cb_t *cb, void *cb_ident)
 {
 	struct flow_block_cb *block_cb;
 
-	list_for_each_entry(block_cb, f->driver_block_list, driver_list) {
-		if (block_cb->net == f->net &&
-		    block_cb->cb == cb &&
+	list_for_each_entry(block_cb, &block->cb_list, list) {
+		if (block_cb->cb == cb &&
 		    block_cb->cb_ident == cb_ident)
 			return block_cb;
 	}
@@ -228,7 +226,7 @@ unsigned int flow_block_cb_decref(struct flow_block_cb *block_cb)
 }
 EXPORT_SYMBOL(flow_block_cb_decref);
 
-bool flow_block_cb_is_busy(tc_setup_cb_t *cb, void *cb_ident,
+bool flow_block_cb_is_busy(flow_setup_cb_t *cb, void *cb_ident,
 			   struct list_head *driver_block_list)
 {
 	struct flow_block_cb *block_cb;
@@ -245,7 +243,8 @@ EXPORT_SYMBOL(flow_block_cb_is_busy);
 
 int flow_block_cb_setup_simple(struct flow_block_offload *f,
 			       struct list_head *driver_block_list,
-			       tc_setup_cb_t *cb, void *cb_ident, void *cb_priv,
+			       flow_setup_cb_t *cb,
+			       void *cb_ident, void *cb_priv,
 			       bool ingress_only)
 {
 	struct flow_block_cb *block_cb;
@@ -261,8 +260,7 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
 		if (flow_block_cb_is_busy(cb, cb_ident, driver_block_list))
 			return -EBUSY;
 
-		block_cb = flow_block_cb_alloc(f->net, cb, cb_ident,
-					       cb_priv, NULL);
+		block_cb = flow_block_cb_alloc(cb, cb_ident, cb_priv, NULL);
 		if (IS_ERR(block_cb))
 			return PTR_ERR(block_cb);
 
@@ -270,7 +268,7 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
 		list_add_tail(&block_cb->driver_list, driver_block_list);
 		return 0;
 	case FLOW_BLOCK_UNBIND:
-		block_cb = flow_block_cb_lookup(f, cb, cb_ident);
+		block_cb = flow_block_cb_lookup(f->block, cb, cb_ident);
 		if (!block_cb)
 			return -ENOENT;
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 614c38ece104..33f41178afcc 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -951,7 +951,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
 				    struct flow_block_offload *f)
 {
 	struct flow_block_cb *block_cb;
-	tc_setup_cb_t *cb;
+	flow_setup_cb_t *cb;
 
 	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 		cb = dsa_slave_setup_tc_block_cb_ig;
@@ -967,7 +967,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
 		if (flow_block_cb_is_busy(cb, dev, &dsa_slave_block_cb_list))
 			return -EBUSY;
 
-		block_cb = flow_block_cb_alloc(f->net, cb, dev, dev, NULL);
+		block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
 		if (IS_ERR(block_cb))
 			return PTR_ERR(block_cb);
 
@@ -975,7 +975,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
 		list_add_tail(&block_cb->driver_list, &dsa_slave_block_cb_list);
 		return 0;
 	case FLOW_BLOCK_UNBIND:
-		block_cb = flow_block_cb_lookup(f, cb, dev);
+		block_cb = flow_block_cb_lookup(f->block, cb, dev);
 		if (!block_cb)
 			return -ENOENT;
 
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 4d6bf7ac0792..6bdb1ab8af61 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -416,8 +416,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	     ctinfo == IP_CT_RELATED_REPLY))
 		return XT_CONTINUE;
 
-	/* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
-	 * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
+	/* nf_conntrack_proto_icmp guarantees us that we only have ICMP_ECHO,
+	 * TIMESTAMP, INFO_REQUEST or ICMP_ADDRESS type icmp packets from here
 	 * on, which all have an ID field [relevant for hashing]. */
 
 	hash = clusterip_hashfn(skb, cipinfo->config);
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 8e7f84ec783d..0e70f3f65f6f 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -36,6 +36,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 			opts.options |= XT_SYNPROXY_OPT_ECN;
 
 		opts.options &= info->options;
+		opts.mss_encode = opts.mss;
+		opts.mss = info->mss;
 		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
 			synproxy_init_timestamp_cookie(info, &opts);
 		else
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 59031670b16a..cc23f1ce239c 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -78,6 +78,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
 	flow.flowi4_tos = RT_TOS(iph->tos);
 	flow.flowi4_scope = RT_SCOPE_UNIVERSE;
+	flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par));
 
 	return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert;
 }
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 87b711fd5a44..3e2685c120c7 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -221,11 +221,11 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 		int ret;
 
 		rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
-		ret = nf_ct_expect_related(rtp_exp);
+		ret = nf_ct_expect_related(rtp_exp, 0);
 		if (ret == 0) {
 			rtcp_exp->tuple.dst.u.udp.port =
 			    htons(nated_port + 1);
-			ret = nf_ct_expect_related(rtcp_exp);
+			ret = nf_ct_expect_related(rtcp_exp, 0);
 			if (ret == 0)
 				break;
 			else if (ret == -EBUSY) {
@@ -296,7 +296,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
@@ -352,7 +352,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
@@ -444,7 +444,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
@@ -537,7 +537,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4af1f5dae9d3..6e4afc48d7bb 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1288,6 +1288,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *buff;
 	int nsize, old_factor;
+	long limit;
 	int nlen;
 	u8 flags;
 
@@ -1298,8 +1299,16 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 	if (nsize < 0)
 		nsize = 0;
 
-	if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf &&
-		     tcp_queue != TCP_FRAG_IN_WRITE_QUEUE)) {
+	/* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb.
+	 * We need some allowance to not penalize applications setting small
+	 * SO_SNDBUF values.
+	 * Also allow first and last skb in retransmit queue to be split.
+	 */
+	limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE);
+	if (unlikely((sk->sk_wmem_queued >> 1) > limit &&
+		     tcp_queue != TCP_FRAG_IN_WRITE_QUEUE &&
+		     skb != tcp_rtx_queue_head(sk) &&
+		     skb != tcp_rtx_queue_tail(sk))) {
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
 		return -ENOMEM;
 	}
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index e77ea1ed5edd..5cdb4a69d277 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -36,6 +36,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 			opts.options |= XT_SYNPROXY_OPT_ECN;
 
 		opts.options &= info->options;
+		opts.mss_encode = opts.mss;
+		opts.mss = info->mss;
 		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
 			synproxy_init_timestamp_cookie(info, &opts);
 		else
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 6bcaf7357183..d800801a5dd2 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -55,7 +55,9 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
 	if (rpfilter_addr_linklocal(&iph->saddr)) {
 		lookup_flags |= RT6_LOOKUP_F_IFACE;
 		fl6.flowi6_oif = dev->ifindex;
-	} else if ((flags & XT_RPFILTER_LOOSE) == 0)
+	/* Set flowi6_oif for vrf devices to lookup route in l3mdev domain. */
+	} else if (netif_is_l3_master(dev) || netif_is_l3_slave(dev) ||
+		  (flags & XT_RPFILTER_LOOSE) == 0)
 		fl6.flowi6_oif = dev->ifindex;
 
 	rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
@@ -70,7 +72,9 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
 		goto out;
 	}
 
-	if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE))
+	if (rt->rt6i_idev->dev == dev ||
+	    l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == dev->ifindex ||
+	    (flags & XT_RPFILTER_LOOSE))
 		ret = true;
  out:
 	ip6_rt_put(rt);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 76cc9e967fa6..4d458067d80d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -936,8 +936,10 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
 
 	err = ieee80211_set_probe_resp(sdata, params->probe_resp,
 				       params->probe_resp_len, csa);
-	if (err < 0)
+	if (err < 0) {
+		kfree(new);
 		return err;
+	}
 	if (err == 0)
 		changed |= BSS_CHANGED_AP_PROBE_RESP;
 
@@ -949,8 +951,10 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
 							 params->civicloc,
 							 params->civicloc_len);
 
-		if (err < 0)
+		if (err < 0) {
+			kfree(new);
 			return err;
+		}
 
 		changed |= BSS_CHANGED_FTM_RESPONDER;
 	}
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index acd4afb4944b..c9a8a2433e8a 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -187,11 +187,16 @@ int drv_conf_tx(struct ieee80211_local *local,
 	if (!check_sdata_in_driver(sdata))
 		return -EIO;
 
-	if (WARN_ONCE(params->cw_min == 0 ||
-		      params->cw_min > params->cw_max,
-		      "%s: invalid CW_min/CW_max: %d/%d\n",
-		      sdata->name, params->cw_min, params->cw_max))
+	if (params->cw_min == 0 || params->cw_min > params->cw_max) {
+		/*
+		 * If we can't configure hardware anyway, don't warn. We may
+		 * never have initialized the CW parameters.
+		 */
+		WARN_ONCE(local->ops->conf_tx,
+			  "%s: invalid CW_min/CW_max: %d/%d\n",
+			  sdata->name, params->cw_min, params->cw_max);
 		return -EINVAL;
+	}
 
 	trace_drv_conf_tx(local, sdata, ac, params);
 	if (local->ops->conf_tx)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 32a45c03786e..0d65f4d39494 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -223,8 +223,6 @@ config NF_CONNTRACK_FTP
 	  of Network Address Translation on them.
 
 	  This is FTP support on Layer 3 independent connection tracking.
-	  Layer 3 independent connection tracking is experimental scheme
-	  which generalize ip_conntrack to support other layer 3 protocols.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
@@ -338,7 +336,7 @@ config NF_CONNTRACK_SIP
 	help
 	  SIP is an application-layer control protocol that can establish,
 	  modify, and terminate multimedia sessions (conferences) such as
-	  Internet telephony calls. With the ip_conntrack_sip and
+	  Internet telephony calls. With the nf_conntrack_sip and
 	  the nf_nat_sip modules you can support the protocol on a connection
 	  tracking/NATing firewall.
 
@@ -1313,7 +1311,7 @@ config NETFILTER_XT_MATCH_HELPER
 	depends on NETFILTER_ADVANCED
 	help
 	  Helper matching allows you to match packets in dynamic connections
-	  tracked by a conntrack-helper, ie. ip_conntrack_ftp
+	  tracked by a conntrack-helper, ie. nf_conntrack_ftp
 
 	  To compile it as a module, choose M here.  If unsure, say Y.
 
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 403541996952..08adcb222986 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -231,7 +231,7 @@ void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
 
 	IP_VS_DBG_BUF(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
 		      __func__, ct, ARG_TUPLE(&exp->tuple));
-	nf_ct_expect_related(exp);
+	nf_ct_expect_related(exp, 0);
 	nf_ct_expect_put(exp);
 }
 EXPORT_SYMBOL(ip_vs_nfct_expect_related);
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 42ee659d0d1e..d011d2eb0848 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -159,7 +159,7 @@ static int amanda_help(struct sk_buff *skb,
 		if (nf_nat_amanda && ct->status & IPS_NAT_MASK)
 			ret = nf_nat_amanda(skb, ctinfo, protoff,
 					    off - dataoff, len, exp);
-		else if (nf_ct_expect_related(exp) != 0) {
+		else if (nf_ct_expect_related(exp, 0) != 0) {
 			nf_ct_helper_log(skb, ct, "cannot add expectation");
 			ret = NF_DROP;
 		}
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 921a7b95be68..1ba6becc3079 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -68,7 +68,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
 	exp->class		  = NF_CT_EXPECT_CLASS_DEFAULT;
 	exp->helper               = NULL;
 
-	nf_ct_expect_related(exp);
+	nf_ct_expect_related(exp, 0);
 	nf_ct_expect_put(exp);
 
 	nf_ct_refresh(ct, skb, timeout * HZ);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index bdfeacee0817..a542761e90d1 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1817,9 +1817,7 @@ EXPORT_SYMBOL_GPL(nf_ct_kill_acct);
 #include <linux/netfilter/nfnetlink_conntrack.h>
 #include <linux/mutex.h>
 
-/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
- * in ip_conntrack_core, since we don't want the protocols to autoload
- * or depend on ctnetlink */
+/* Generic function for tcp/udp/sctp/dccp and alike. */
 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
 			       const struct nf_conntrack_tuple *tuple)
 {
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index ffd1f4906c4f..65364de915d1 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -249,13 +249,22 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
 static inline int expect_matches(const struct nf_conntrack_expect *a,
 				 const struct nf_conntrack_expect *b)
 {
-	return a->master == b->master &&
-	       nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
+	return nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
 	       nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
 	       net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
 	       nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
 }
 
+static bool master_matches(const struct nf_conntrack_expect *a,
+			   const struct nf_conntrack_expect *b,
+			   unsigned int flags)
+{
+	if (flags & NF_CT_EXP_F_SKIP_MASTER)
+		return true;
+
+	return a->master == b->master;
+}
+
 /* Generally a bad idea to call this: could have matched already. */
 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
 {
@@ -399,7 +408,8 @@ static void evict_oldest_expect(struct nf_conn *master,
 		nf_ct_remove_expect(last);
 }
 
-static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
+static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
+				       unsigned int flags)
 {
 	const struct nf_conntrack_expect_policy *p;
 	struct nf_conntrack_expect *i;
@@ -417,8 +427,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 	}
 	h = nf_ct_expect_dst_hash(net, &expect->tuple);
 	hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
-		if (expect_matches(i, expect)) {
-			if (i->class != expect->class)
+		if (master_matches(i, expect, flags) &&
+		    expect_matches(i, expect)) {
+			if (i->class != expect->class ||
+			    i->master != expect->master)
 				return -EALREADY;
 
 			if (nf_ct_remove_expect(i))
@@ -453,12 +465,12 @@ out:
 }
 
 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
-				u32 portid, int report)
+				u32 portid, int report, unsigned int flags)
 {
 	int ret;
 
 	spin_lock_bh(&nf_conntrack_expect_lock);
-	ret = __nf_ct_expect_check(expect);
+	ret = __nf_ct_expect_check(expect, flags);
 	if (ret < 0)
 		goto out;
 
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 8c6c11bab5b6..0ecb3e289ef2 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -525,7 +525,7 @@ skip_nl_seq:
 				 protoff, matchoff, matchlen, exp);
 	else {
 		/* Can't expect this?  Best to drop packet now. */
-		if (nf_ct_expect_related(exp) != 0) {
+		if (nf_ct_expect_related(exp, 0) != 0) {
 			nf_ct_helper_log(skb, ct, "cannot add expectation");
 			ret = NF_DROP;
 		} else
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 8f6ba8162f0b..573cb4481481 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -1,11 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * ip_conntrack_helper_h323_asn1.c - BER and PER decoding library for H.323
- * 			      	     conntrack/NAT module.
+ * BER and PER decoding library for H.323 conntrack/NAT module.
  *
  * Copyright (c) 2006 by Jing Min Zhao <zhaojingmin@users.sourceforge.net>
  *
- * See ip_conntrack_helper_h323_asn1.h for details.
+ * See nf_conntrack_helper_h323_asn1.h for details.
  */
 
 #ifdef __KERNEL__
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 6497e5fc0871..8ba037b76ad3 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -305,8 +305,8 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 		ret = nat_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff,
 				   taddr, port, rtp_port, rtp_exp, rtcp_exp);
 	} else {		/* Conntrack only */
-		if (nf_ct_expect_related(rtp_exp) == 0) {
-			if (nf_ct_expect_related(rtcp_exp) == 0) {
+		if (nf_ct_expect_related(rtp_exp, 0) == 0) {
+			if (nf_ct_expect_related(rtcp_exp, 0) == 0) {
 				pr_debug("nf_ct_h323: expect RTP ");
 				nf_ct_dump_tuple(&rtp_exp->tuple);
 				pr_debug("nf_ct_h323: expect RTCP ");
@@ -364,7 +364,7 @@ static int expect_t120(struct sk_buff *skb,
 		ret = nat_t120(skb, ct, ctinfo, protoff, data, dataoff, taddr,
 			       port, exp);
 	} else {		/* Conntrack only */
-		if (nf_ct_expect_related(exp) == 0) {
+		if (nf_ct_expect_related(exp, 0) == 0) {
 			pr_debug("nf_ct_h323: expect T.120 ");
 			nf_ct_dump_tuple(&exp->tuple);
 		} else
@@ -701,7 +701,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
 		ret = nat_h245(skb, ct, ctinfo, protoff, data, dataoff, taddr,
 			       port, exp);
 	} else {		/* Conntrack only */
-		if (nf_ct_expect_related(exp) == 0) {
+		if (nf_ct_expect_related(exp, 0) == 0) {
 			pr_debug("nf_ct_q931: expect H.245 ");
 			nf_ct_dump_tuple(&exp->tuple);
 		} else
@@ -825,7 +825,7 @@ static int expect_callforwarding(struct sk_buff *skb,
 					 protoff, data, dataoff,
 					 taddr, port, exp);
 	} else {		/* Conntrack only */
-		if (nf_ct_expect_related(exp) == 0) {
+		if (nf_ct_expect_related(exp, 0) == 0) {
 			pr_debug("nf_ct_q931: expect Call Forwarding ");
 			nf_ct_dump_tuple(&exp->tuple);
 		} else
@@ -1284,7 +1284,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
 		ret = nat_q931(skb, ct, ctinfo, protoff, data,
 			       taddr, i, port, exp);
 	} else {		/* Conntrack only */
-		if (nf_ct_expect_related(exp) == 0) {
+		if (nf_ct_expect_related(exp, 0) == 0) {
 			pr_debug("nf_ct_ras: expect Q.931 ");
 			nf_ct_dump_tuple(&exp->tuple);
 
@@ -1349,7 +1349,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
 			  IPPROTO_UDP, NULL, &port);
 	exp->helper = nf_conntrack_helper_ras;
 
-	if (nf_ct_expect_related(exp) == 0) {
+	if (nf_ct_expect_related(exp, 0) == 0) {
 		pr_debug("nf_ct_ras: expect RAS ");
 		nf_ct_dump_tuple(&exp->tuple);
 	} else
@@ -1561,7 +1561,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
 	exp->flags = NF_CT_EXPECT_PERMANENT;
 	exp->helper = nf_conntrack_helper_q931;
 
-	if (nf_ct_expect_related(exp) == 0) {
+	if (nf_ct_expect_related(exp, 0) == 0) {
 		pr_debug("nf_ct_ras: expect Q.931 ");
 		nf_ct_dump_tuple(&exp->tuple);
 	} else
@@ -1615,7 +1615,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
 	exp->flags = NF_CT_EXPECT_PERMANENT;
 	exp->helper = nf_conntrack_helper_q931;
 
-	if (nf_ct_expect_related(exp) == 0) {
+	if (nf_ct_expect_related(exp, 0) == 0) {
 		pr_debug("nf_ct_ras: expect Q.931 ");
 		nf_ct_dump_tuple(&exp->tuple);
 	} else
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 7ac156f1f3bc..e40988a2f22f 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -213,7 +213,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
 						 addr_beg_p - ib_ptr,
 						 addr_end_p - addr_beg_p,
 						 exp);
-			else if (nf_ct_expect_related(exp) != 0) {
+			else if (nf_ct_expect_related(exp, 0) != 0) {
 				nf_ct_helper_log(skb, ct,
 						 "cannot add expectation");
 				ret = NF_DROP;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 1b77444d5b52..6aa01eb6fe99 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2616,7 +2616,7 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
 	if (IS_ERR(exp))
 		return PTR_ERR(exp);
 
-	err = nf_ct_expect_related_report(exp, portid, report);
+	err = nf_ct_expect_related_report(exp, portid, report, 0);
 	nf_ct_expect_put(exp);
 	return err;
 }
@@ -3367,7 +3367,7 @@ ctnetlink_create_expect(struct net *net,
 		goto err_rcu;
 	}
 
-	err = nf_ct_expect_related_report(exp, portid, report);
+	err = nf_ct_expect_related_report(exp, portid, report, 0);
 	nf_ct_expect_put(exp);
 err_rcu:
 	rcu_read_unlock();
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index b22042ad0fca..a971183f11af 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -234,9 +234,9 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid)
 	nf_nat_pptp_exp_gre = rcu_dereference(nf_nat_pptp_hook_exp_gre);
 	if (nf_nat_pptp_exp_gre && ct->status & IPS_NAT_MASK)
 		nf_nat_pptp_exp_gre(exp_orig, exp_reply);
-	if (nf_ct_expect_related(exp_orig) != 0)
+	if (nf_ct_expect_related(exp_orig, 0) != 0)
 		goto out_put_both;
-	if (nf_ct_expect_related(exp_reply) != 0)
+	if (nf_ct_expect_related(exp_reply, 0) != 0)
 		goto out_unexpect_orig;
 
 	/* Add GRE keymap entries */
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index c2eb365f1723..5b05487a60d2 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * ip_conntrack_proto_gre.c - Version 3.0
- *
  * Connection tracking protocol helper module for GRE.
  *
  * GRE is a generic encapsulation protocol, which is generally not very
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index dd53e2b20f6b..097deba7441a 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -215,7 +215,7 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
 		return -NF_ACCEPT;
 	}
 
-	/* See ip_conntrack_proto_tcp.c */
+	/* See nf_conntrack_proto_tcp.c */
 	if (state->net->ct.sysctl_checksum &&
 	    state->hook == NF_INET_PRE_ROUTING &&
 	    nf_ip_checksum(skb, state->hook, dataoff, IPPROTO_ICMP)) {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index d5fdfa00d683..85c1f8c213b0 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -472,6 +472,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
 	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
 	const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
 	__u32 seq, ack, sack, end, win, swin;
+	u16 win_raw;
 	s32 receiver_offset;
 	bool res, in_recv_win;
 
@@ -480,7 +481,8 @@ static bool tcp_in_window(const struct nf_conn *ct,
 	 */
 	seq = ntohl(tcph->seq);
 	ack = sack = ntohl(tcph->ack_seq);
-	win = ntohs(tcph->window);
+	win_raw = ntohs(tcph->window);
+	win = win_raw;
 	end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
 
 	if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
@@ -655,14 +657,14 @@ static bool tcp_in_window(const struct nf_conn *ct,
 			    && state->last_seq == seq
 			    && state->last_ack == ack
 			    && state->last_end == end
-			    && state->last_win == win)
+			    && state->last_win == win_raw)
 				state->retrans++;
 			else {
 				state->last_dir = dir;
 				state->last_seq = seq;
 				state->last_ack = ack;
 				state->last_end = end;
-				state->last_win = win;
+				state->last_win = win_raw;
 				state->retrans = 0;
 			}
 		}
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 81448c3db661..1aebd6569d4e 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -153,7 +153,7 @@ static int help(struct sk_buff *skb,
 	nf_ct_dump_tuple(&exp->tuple);
 
 	/* Can't expect this?  Best to drop packet now. */
-	if (nf_ct_expect_related(exp) != 0) {
+	if (nf_ct_expect_related(exp, 0) != 0) {
 		nf_ct_helper_log(skb, ct, "cannot add expectation");
 		ret = NF_DROP;
 	}
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 107251731809..b83dc9bf0a5d 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -977,11 +977,15 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 		/* -EALREADY handling works around end-points that send
 		 * SDP messages with identical port but different media type,
 		 * we pretend expectation was set up.
+		 * It also works in the case that SDP messages are sent with
+		 * identical expect tuples but for different master conntracks.
 		 */
-		int errp = nf_ct_expect_related(rtp_exp);
+		int errp = nf_ct_expect_related(rtp_exp,
+						NF_CT_EXP_F_SKIP_MASTER);
 
 		if (errp == 0 || errp == -EALREADY) {
-			int errcp = nf_ct_expect_related(rtcp_exp);
+			int errcp = nf_ct_expect_related(rtcp_exp,
+						NF_CT_EXP_F_SKIP_MASTER);
 
 			if (errcp == 0 || errcp == -EALREADY)
 				ret = NF_ACCEPT;
@@ -1296,7 +1300,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
 		ret = hooks->expect(skb, protoff, dataoff, dptr, datalen,
 				    exp, matchoff, matchlen);
 	else {
-		if (nf_ct_expect_related(exp) != 0) {
+		if (nf_ct_expect_related(exp, 0) != 0) {
 			nf_ct_helper_log(skb, ct, "cannot add expectation");
 			ret = NF_DROP;
 		} else
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index df6d6d61bd58..80ee53f29f68 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -78,7 +78,7 @@ static int tftp_help(struct sk_buff *skb,
 		nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook);
 		if (nf_nat_tftp && ct->status & IPS_NAT_MASK)
 			ret = nf_nat_tftp(skb, ctinfo, exp);
-		else if (nf_ct_expect_related(exp) != 0) {
+		else if (nf_ct_expect_related(exp, 0) != 0) {
 			nf_ct_helper_log(skb, ct, "cannot add expectation");
 			ret = NF_DROP;
 		}
diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c
index a352604d6186..3bc7e0854efe 100644
--- a/net/netfilter/nf_nat_amanda.c
+++ b/net/netfilter/nf_nat_amanda.c
@@ -48,7 +48,7 @@ static unsigned int help(struct sk_buff *skb,
 		int res;
 
 		exp->tuple.dst.u.tcp.port = htons(port);
-		res = nf_ct_expect_related(exp);
+		res = nf_ct_expect_related(exp, 0);
 		if (res == 0)
 			break;
 		else if (res != -EBUSY) {
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 9ab410455992..3f6023ed4966 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -519,7 +519,7 @@ another_round:
  * and NF_INET_LOCAL_OUT, we change the destination to map into the
  * range. It might not be possible to get a unique tuple, but we try.
  * At worst (or if we race), we will end up with a final duplicate in
- * __ip_conntrack_confirm and drop the packet. */
+ * __nf_conntrack_confirm and drop the packet. */
 static void
 get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		 const struct nf_conntrack_tuple *orig_tuple,
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index d48484a9d52d..aace6768a64e 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -91,7 +91,7 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index dfb7ef8845bd..c691ab8d234c 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -53,7 +53,7 @@ static unsigned int help(struct sk_buff *skb,
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index e338d91980d8..f0a735e86851 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -414,7 +414,7 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
 		int ret;
 
 		exp->tuple.dst.u.udp.port = htons(port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, NF_CT_EXP_F_SKIP_MASTER);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
@@ -607,7 +607,8 @@ static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff,
 		int ret;
 
 		rtp_exp->tuple.dst.u.udp.port = htons(port);
-		ret = nf_ct_expect_related(rtp_exp);
+		ret = nf_ct_expect_related(rtp_exp,
+					   NF_CT_EXP_F_SKIP_MASTER);
 		if (ret == -EBUSY)
 			continue;
 		else if (ret < 0) {
@@ -615,7 +616,8 @@ static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff,
 			break;
 		}
 		rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
-		ret = nf_ct_expect_related(rtcp_exp);
+		ret = nf_ct_expect_related(rtcp_exp,
+					   NF_CT_EXP_F_SKIP_MASTER);
 		if (ret == 0)
 			break;
 		else if (ret == -EBUSY) {
diff --git a/net/netfilter/nf_nat_tftp.c b/net/netfilter/nf_nat_tftp.c
index 833a11f68031..1a591132d6eb 100644
--- a/net/netfilter/nf_nat_tftp.c
+++ b/net/netfilter/nf_nat_tftp.c
@@ -30,7 +30,7 @@ static unsigned int help(struct sk_buff *skb,
 		= ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
 	exp->dir = IP_CT_DIR_REPLY;
 	exp->expectfn = nf_nat_follow_master;
-	if (nf_ct_expect_related(exp) != 0) {
+	if (nf_ct_expect_related(exp, 0) != 0) {
 		nf_ct_helper_log(skb, exp->master, "cannot add expectation");
 		return NF_DROP;
 	}
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index b101f187eda8..c769462a839e 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -470,7 +470,7 @@ synproxy_send_client_synack(struct net *net,
 	struct iphdr *iph, *niph;
 	struct tcphdr *nth;
 	unsigned int tcp_hdr_size;
-	u16 mss = opts->mss;
+	u16 mss = opts->mss_encode;
 
 	iph = ip_hdr(skb);
 
@@ -687,7 +687,7 @@ ipv4_synproxy_hook(void *priv, struct sk_buff *skb,
 	state = &ct->proto.tcp;
 	switch (state->state) {
 	case TCP_CONNTRACK_CLOSE:
-		if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+		if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
 			nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
 						      ntohl(th->seq) + 1);
 			break;
@@ -884,7 +884,7 @@ synproxy_send_client_synack_ipv6(struct net *net,
 	struct ipv6hdr *iph, *niph;
 	struct tcphdr *nth;
 	unsigned int tcp_hdr_size;
-	u16 mss = opts->mss;
+	u16 mss = opts->mss_encode;
 
 	iph = ipv6_hdr(skb);
 
@@ -1111,7 +1111,7 @@ ipv6_synproxy_hook(void *priv, struct sk_buff *skb,
 	state = &ct->proto.tcp;
 	switch (state->state) {
 	case TCP_CONNTRACK_CLOSE:
-		if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+		if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
 			nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
 						      ntohl(th->seq) + 1);
 			break;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ed17a7c29b86..605a7cfe7ca7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1662,7 +1662,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 
 		chain->flags |= NFT_BASE_CHAIN | flags;
 		basechain->policy = NF_ACCEPT;
-		INIT_LIST_HEAD(&basechain->cb_list);
+		flow_block_init(&basechain->flow_block);
 	} else {
 		chain = kzalloc(sizeof(*chain), GFP_KERNEL);
 		if (chain == NULL)
@@ -1900,6 +1900,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 
 	if (nla[NFTA_CHAIN_FLAGS])
 		flags = ntohl(nla_get_be32(nla[NFTA_CHAIN_FLAGS]));
+	else if (chain)
+		flags = chain->flags;
 
 	nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
 
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 2c3302845f67..64f5fd5f240e 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -116,7 +116,7 @@ static int nft_setup_cb_call(struct nft_base_chain *basechain,
 	struct flow_block_cb *block_cb;
 	int err;
 
-	list_for_each_entry(block_cb, &basechain->cb_list, list) {
+	list_for_each_entry(block_cb, &basechain->flow_block.cb_list, list) {
 		err = block_cb->cb(type, type_data, block_cb->cb_priv);
 		if (err < 0)
 			return err;
@@ -154,7 +154,7 @@ static int nft_flow_offload_rule(struct nft_trans *trans,
 static int nft_flow_offload_bind(struct flow_block_offload *bo,
 				 struct nft_base_chain *basechain)
 {
-	list_splice(&bo->cb_list, &basechain->cb_list);
+	list_splice(&bo->cb_list, &basechain->flow_block.cb_list);
 	return 0;
 }
 
@@ -198,6 +198,7 @@ static int nft_flow_offload_chain(struct nft_trans *trans,
 		return -EOPNOTSUPP;
 
 	bo.command = cmd;
+	bo.block = &basechain->flow_block;
 	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
 	bo.extack = &extack;
 	INIT_LIST_HEAD(&bo.cb_list);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 92077d459109..4abbb452cf6c 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -578,7 +578,7 @@ static int nfnetlink_bind(struct net *net, int group)
 	ss = nfnetlink_get_subsys(type << 8);
 	rcu_read_unlock();
 	if (!ss)
-		request_module("nfnetlink-subsys-%d", type);
+		request_module_nowait("nfnetlink-subsys-%d", type);
 	return 0;
 }
 #endif
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 3fd540b2c6ba..b5d5d071d765 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -193,7 +193,7 @@ static inline void nft_chain_filter_inet_init(void) {}
 static inline void nft_chain_filter_inet_fini(void) {}
 #endif /* CONFIG_NF_TABLES_IPV6 */
 
-#ifdef CONFIG_NF_TABLES_BRIDGE
+#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE)
 static unsigned int
 nft_do_chain_bridge(void *priv,
 		    struct sk_buff *skb,
diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c
index 2f89bde3c61c..ff9ac8ae0031 100644
--- a/net/netfilter/nft_chain_nat.c
+++ b/net/netfilter/nft_chain_nat.c
@@ -142,3 +142,6 @@ MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");
 #ifdef CONFIG_NF_TABLES_IPV6
 MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat");
 #endif
+#ifdef CONFIG_NF_TABLES_INET
+MODULE_ALIAS_NFT_CHAIN(1, "nat");	/* NFPROTO_INET */
+#endif
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 827ab6196df9..46ca8bcca1bd 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1252,7 +1252,7 @@ static void nft_ct_expect_obj_eval(struct nft_object *obj,
 		          priv->l4proto, NULL, &priv->dport);
 	exp->timeout.expires = jiffies + priv->timeout * HZ;
 
-	if (nf_ct_expect_related(exp) != 0)
+	if (nf_ct_expect_related(exp, 0) != 0)
 		regs->verdict.code = NF_DROP;
 }
 
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index fe93e731dc7f..b836d550b919 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -129,7 +129,7 @@ static int nft_symhash_init(const struct nft_ctx *ctx,
 	priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
 
 	priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
-	if (priv->modulus <= 1)
+	if (priv->modulus < 1)
 		return -ERANGE;
 
 	if (priv->offset + priv->modulus - 1 < priv->offset)
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 76866f77e343..f1b1d948c07b 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -546,7 +546,7 @@ nft_meta_select_ops(const struct nft_ctx *ctx,
 	if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
 		return ERR_PTR(-EINVAL);
 
-#ifdef CONFIG_NF_TABLES_BRIDGE
+#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) && IS_MODULE(CONFIG_NFT_BRIDGE_META)
 	if (ctx->family == NFPROTO_BRIDGE)
 		return ERR_PTR(-EAGAIN);
 #endif
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 8487eeff5c0e..43eeb1f609f1 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -291,4 +291,4 @@ module_exit(nft_redir_module_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
-MODULE_ALIAS_NFT_EXPR("nat");
+MODULE_ALIAS_NFT_EXPR("redir");
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 80060ade8a5b..928e661d1517 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -31,6 +31,8 @@ static void nft_synproxy_tcp_options(struct synproxy_options *opts,
 		opts->options |= NF_SYNPROXY_OPT_ECN;
 
 	opts->options &= priv->info.options;
+	opts->mss_encode = opts->mss;
+	opts->mss = info->mss;
 	if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
 		synproxy_init_timestamp_cookie(info, opts);
 	else
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index dca3b1e2acf0..bc89e16e0505 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -59,7 +59,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
 void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 			   const struct sk_buff *skb)
 {
-	struct flow_stats *stats;
+	struct sw_flow_stats *stats;
 	unsigned int cpu = smp_processor_id();
 	int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 
@@ -87,7 +87,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 			if (likely(flow->stats_last_writer != -1) &&
 			    likely(!rcu_access_pointer(flow->stats[cpu]))) {
 				/* Try to allocate CPU-specific stats. */
-				struct flow_stats *new_stats;
+				struct sw_flow_stats *new_stats;
 
 				new_stats =
 					kmem_cache_alloc_node(flow_stats_cache,
@@ -134,7 +134,7 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
 
 	/* We open code this to make sure cpu 0 is always considered */
 	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
-		struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
+		struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
 
 		if (stats) {
 			/* Local CPU may write on non-local stats, so we must
@@ -158,7 +158,7 @@ void ovs_flow_stats_clear(struct sw_flow *flow)
 
 	/* We open code this to make sure cpu 0 is always considered */
 	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
-		struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
+		struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
 
 		if (stats) {
 			spin_lock_bh(&stats->lock);
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 3e2cc2202d66..a5506e2d4b7a 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -194,7 +194,7 @@ struct sw_flow_actions {
 	struct nlattr actions[];
 };
 
-struct flow_stats {
+struct sw_flow_stats {
 	u64 packet_count;		/* Number of packets matched. */
 	u64 byte_count;			/* Number of bytes matched. */
 	unsigned long used;		/* Last used time (in jiffies). */
@@ -216,7 +216,7 @@ struct sw_flow {
 	struct cpumask cpu_used_mask;
 	struct sw_flow_mask *mask;
 	struct sw_flow_actions __rcu *sf_acts;
-	struct flow_stats __rcu *stats[]; /* One for each CPU.  First one
+	struct sw_flow_stats __rcu *stats[]; /* One for each CPU.  First one
 					   * is allocated at flow creation time,
 					   * the rest are allocated on demand
 					   * while holding the 'stats[0].lock'.
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 988fd8a94e43..cf3582c5ed70 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -66,7 +66,7 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
 struct sw_flow *ovs_flow_alloc(void)
 {
 	struct sw_flow *flow;
-	struct flow_stats *stats;
+	struct sw_flow_stats *stats;
 
 	flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL);
 	if (!flow)
@@ -110,7 +110,7 @@ static void flow_free(struct sw_flow *flow)
 	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask))
 		if (flow->stats[cpu])
 			kmem_cache_free(flow_stats_cache,
-					(struct flow_stats __force *)flow->stats[cpu]);
+					(struct sw_flow_stats __force *)flow->stats[cpu]);
 	kmem_cache_free(flow_cache, flow);
 }
 
@@ -712,13 +712,13 @@ int ovs_flow_init(void)
 
 	flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
 				       + (nr_cpu_ids
-					  * sizeof(struct flow_stats *)),
+					  * sizeof(struct sw_flow_stats *)),
 				       0, 0, NULL);
 	if (flow_cache == NULL)
 		return -ENOMEM;
 
 	flow_stats_cache
-		= kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats),
+		= kmem_cache_create("sw_flow_stats", sizeof(struct sw_flow_stats),
 				    0, SLAB_HWCACHE_ALIGN, NULL);
 	if (flow_stats_cache == NULL) {
 		kmem_cache_destroy(flow_cache);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index d144233423c5..efd3cfb80a2a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -691,6 +691,8 @@ static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
 	if (!indr_dev->block)
 		return;
 
+	bo.block = &indr_dev->block->flow_block;
+
 	indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
 			  &bo);
 	tcf_block_setup(indr_dev->block, &bo);
@@ -775,6 +777,7 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
 		.command	= command,
 		.binder_type	= ei->binder_type,
 		.net		= dev_net(dev),
+		.block		= &block->flow_block,
 		.block_shared	= tcf_block_shared(block),
 		.extack		= extack,
 	};
@@ -810,6 +813,7 @@ static int tcf_block_offload_cmd(struct tcf_block *block,
 	bo.net = dev_net(dev);
 	bo.command = command;
 	bo.binder_type = ei->binder_type;
+	bo.block = &block->flow_block;
 	bo.block_shared = tcf_block_shared(block);
 	bo.extack = extack;
 	INIT_LIST_HEAD(&bo.cb_list);
@@ -987,8 +991,8 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
 		return ERR_PTR(-ENOMEM);
 	}
 	mutex_init(&block->lock);
+	flow_block_init(&block->flow_block);
 	INIT_LIST_HEAD(&block->chain_list);
-	INIT_LIST_HEAD(&block->cb_list);
 	INIT_LIST_HEAD(&block->owner_list);
 	INIT_LIST_HEAD(&block->chain0.filter_chain_list);
 
@@ -1514,7 +1518,7 @@ void tcf_block_put(struct tcf_block *block)
 EXPORT_SYMBOL(tcf_block_put);
 
 static int
-tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
+tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
 			    void *cb_priv, bool add, bool offload_in_use,
 			    struct netlink_ext_ack *extack)
 {
@@ -1570,7 +1574,7 @@ static int tcf_block_bind(struct tcf_block *block,
 
 		i++;
 	}
-	list_splice(&bo->cb_list, &block->cb_list);
+	list_splice(&bo->cb_list, &block->flow_block.cb_list);
 
 	return 0;
 
@@ -2152,7 +2156,9 @@ replay:
 		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
 			       RTM_NEWTFILTER, false, rtnl_held);
 		tfilter_put(tp, fh);
-		q->flags &= ~TCQ_F_CAN_BYPASS;
+		/* q pointer is NULL for shared blocks */
+		if (q)
+			q->flags &= ~TCQ_F_CAN_BYPASS;
 	}
 
 errout:
@@ -3156,7 +3162,7 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
 	if (block->nooffloaddevcnt && err_stop)
 		return -EOPNOTSUPP;
 
-	list_for_each_entry(block_cb, &block->cb_list, list) {
+	list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
 		err = block_cb->cb(type, type_data, block_cb->cb_priv);
 		if (err) {
 			if (err_stop)
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 691f71830134..3f7a9c02b70c 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -651,7 +651,7 @@ skip:
 	}
 }
 
-static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
 			     void *cb_priv, struct netlink_ext_ack *extack)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 38d6e85693fc..054123742e32 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1800,7 +1800,7 @@ fl_get_next_hw_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool add)
 	return NULL;
 }
 
-static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
 			void *cb_priv, struct netlink_ext_ack *extack)
 {
 	struct tcf_block *block = tp->chain->block;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index a30d2f8feb32..455ea2793f9b 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -282,7 +282,7 @@ skip:
 	arg->count++;
 }
 
-static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
 			  void *cb_priv, struct netlink_ext_ack *extack)
 {
 	struct cls_mall_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index be9e46c77e8b..8614088edd1b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -1152,7 +1152,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
 }
 
 static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
-			       bool add, tc_setup_cb_t *cb, void *cb_priv,
+			       bool add, flow_setup_cb_t *cb, void *cb_priv,
 			       struct netlink_ext_ack *extack)
 {
 	struct tc_cls_u32_offload cls_u32 = {};
@@ -1172,7 +1172,7 @@ static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
 }
 
 static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
-			       bool add, tc_setup_cb_t *cb, void *cb_priv,
+			       bool add, flow_setup_cb_t *cb, void *cb_priv,
 			       struct netlink_ext_ack *extack)
 {
 	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
@@ -1213,7 +1213,7 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
 	return 0;
 }
 
-static int u32_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int u32_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
 			 void *cb_priv, struct netlink_ext_ack *extack)
 {
 	struct tc_u_common *tp_c = tp->data;
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index f345662890a6..ca8ac96d22a9 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -476,7 +476,7 @@ static void tipc_topsrv_accept(struct work_struct *work)
 	}
 }
 
-/* tipc_toprsv_listener_data_ready - interrupt callback with connection request
+/* tipc_topsrv_listener_data_ready - interrupt callback with connection request
  * The queued job is launched into tipc_topsrv_accept()
  */
 static void tipc_topsrv_listener_data_ready(struct sock *sk)
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 73e80b917f12..77c742fa4fb1 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -125,11 +125,6 @@ CC_OPTION_CFLAGS = $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS))
 cc-option = $(call __cc-option, $(CC),\
 	$(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS),$(1),$(2))
 
-# hostcc-option
-# Usage: cflags-y += $(call hostcc-option,-march=winchip-c6,-march=i586)
-hostcc-option = $(call __cc-option, $(HOSTCC),\
-	$(KBUILD_HOSTCFLAGS) $(HOST_EXTRACFLAGS),$(1),$(2))
-
 # cc-option-yn
 # Usage: flag := $(call cc-option-yn,-march=winchip-c6)
 cc-option-yn = $(call try-run,\
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index be38198d98b2..0d434d0afc0b 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -63,14 +63,14 @@ ifneq ($(strip $(real-obj-y) $(need-builtin)),)
 builtin-target := $(obj)/built-in.a
 endif
 
-ifdef CONFIG_MODULES
+ifeq ($(CONFIG_MODULES)$(need-modorder),y1)
 modorder-target := $(obj)/modules.order
 endif
 
-# We keep a list of all modules in $(MODVERDIR)
+mod-targets := $(patsubst %.o, %.mod, $(obj-m))
 
 __build: $(if $(KBUILD_BUILTIN),$(builtin-target) $(lib-target) $(extra-y)) \
-	 $(if $(KBUILD_MODULES),$(obj-m) $(modorder-target)) \
+	 $(if $(KBUILD_MODULES),$(obj-m) $(mod-targets) $(modorder-target)) \
 	 $(subdir-ym) $(always)
 	@:
 
@@ -87,11 +87,6 @@ ifneq ($(KBUILD_ENABLE_EXTRA_GCC_CHECKS),)
   cmd_checkdoc = $(srctree)/scripts/kernel-doc -none $<
 endif
 
-# Do section mismatch analysis for each module/built-in.a
-ifdef CONFIG_DEBUG_SECTION_MISMATCH
-  cmd_secanalysis = ; scripts/mod/modpost $@
-endif
-
 # Compile C sources (.c)
 # ---------------------------------------------------------------------------
 
@@ -268,7 +263,7 @@ endef
 
 # List module undefined symbols (or empty line if not enabled)
 ifdef CONFIG_TRIM_UNUSED_KSYMS
-cmd_undef_syms = $(NM) $@ | sed -n 's/^  *U //p' | xargs echo
+cmd_undef_syms = $(NM) $< | sed -n 's/^  *U //p' | xargs echo
 else
 cmd_undef_syms = echo
 endif
@@ -278,13 +273,15 @@ $(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_dep) FORCE
 	$(call cmd,force_checksrc)
 	$(call if_changed_rule,cc_o_c)
 
-# Single-part modules are special since we need to mark them in $(MODVERDIR)
+cmd_mod = { \
+	echo $(if $($*-objs)$($*-y)$($*-m), $(addprefix $(obj)/, $($*-objs) $($*-y) $($*-m)), $(@:.mod=.o)); \
+	$(cmd_undef_syms); \
+	} > $@
 
-$(single-used-m): $(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_dep) FORCE
-	$(call cmd,force_checksrc)
-	$(call if_changed_rule,cc_o_c)
-	@{ echo $(@:.o=.ko); echo $@; \
-	   $(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod)
+$(obj)/%.mod: $(obj)/%.o FORCE
+	$(call if_changed,mod)
+
+targets += $(mod-targets)
 
 quiet_cmd_cc_lst_c = MKLST   $@
       cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && \
@@ -294,7 +291,7 @@ quiet_cmd_cc_lst_c = MKLST   $@
 $(obj)/%.lst: $(src)/%.c FORCE
 	$(call if_changed_dep,cc_lst_c)
 
-# header test (header-test-y target)
+# header test (header-test-y, header-test-m target)
 # ---------------------------------------------------------------------------
 
 quiet_cmd_cc_s_h = CC      $@
@@ -423,13 +420,10 @@ endif # builtin-target
 #
 # Create commands to either record .ko file or cat modules.order from
 # a subdirectory
-modorder-cmds =						\
-	$(foreach m, $(modorder),			\
-		$(if $(filter %/modules.order, $m),	\
-			cat $m;, echo kernel/$m;))
-
 $(modorder-target): $(subdir-ym) FORCE
-	$(Q)(cat /dev/null; $(modorder-cmds)) > $@
+	$(Q){ $(foreach m, $(modorder), \
+	$(if $(filter %/modules.order, $m), cat $m, echo $m);) :; } \
+	| $(AWK) '!x[$$0]++' - > $@
 
 #
 # Rule to compile a set of .o files into one .a file (with symbol table)
@@ -464,12 +458,10 @@ endif
 # module is turned into a multi object module, $^ will contain header file
 # dependencies recorded in the .*.cmd file.
 quiet_cmd_link_multi-m = LD [M]  $@
-cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^) $(cmd_secanalysis)
+      cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^)
 
 $(multi-used-m): FORCE
 	$(call if_changed,link_multi-m)
-	@{ echo $(@:.o=.ko); echo $(filter %.o,$^); \
-	   $(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod)
 $(call multi_depend, $(multi-used-m), .o, -objs -y -m)
 
 targets += $(multi-used-m)
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 6cb3aa5cbc79..5241d0751eb0 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -78,7 +78,7 @@ header-test-y	+= $(filter-out $(header-test-), \
 		$(wildcard $(addprefix $(srctree)/$(src)/, \
 		$(header-test-pattern-y)))))
 
-extra-$(CONFIG_HEADER_TEST) += $(addsuffix .s, $(header-test-y))
+extra-$(CONFIG_HEADER_TEST) += $(addsuffix .s, $(header-test-y) $(header-test-m))
 
 # Add subdir path
 
diff --git a/scripts/Makefile.modbuiltin b/scripts/Makefile.modbuiltin
index 50a9990760f3..7d4711b88656 100644
--- a/scripts/Makefile.modbuiltin
+++ b/scripts/Makefile.modbuiltin
@@ -40,7 +40,7 @@ __modbuiltin: $(modbuiltin-target) $(subdir-ym)
 	@:
 
 $(modbuiltin-target): $(subdir-ym) FORCE
-	$(Q)(for m in $(modbuiltin-mods); do echo kernel/$$m; done;	\
+	$(Q)(for m in $(modbuiltin-mods); do echo $$m; done;	\
 	cat /dev/null $(modbuiltin-subdirs)) > $@
 
 PHONY += FORCE
diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst
index 0dae402661f3..5a4579e76485 100644
--- a/scripts/Makefile.modinst
+++ b/scripts/Makefile.modinst
@@ -8,10 +8,7 @@ __modinst:
 
 include scripts/Kbuild.include
 
-#
-
-__modules := $(sort $(shell grep -h '\.ko$$' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
-modules := $(patsubst %.o,%.ko,$(wildcard $(__modules:.ko=.o)))
+modules := $(sort $(shell cat $(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD)/)modules.order))
 
 PHONY += $(modules)
 __modinst: $(modules)
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index fec6ec2ffa47..6b19c1a4eae5 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -6,11 +6,12 @@
 # Stage one of module building created the following:
 # a) The individual .o files used for the module
 # b) A <module>.o file which is the .o files above linked together
-# c) A <module>.mod file in $(MODVERDIR)/, listing the name of the
-#    the preliminary <module>.o file, plus all .o files
+# c) A <module>.mod file, listing the name of the preliminary <module>.o file,
+#    plus all .o files
+# d) modules.order, which lists all the modules
 
 # Stage 2 is handled by this file and does the following
-# 1) Find all modules from the files listed in $(MODVERDIR)/
+# 1) Find all modules listed in modules.order
 # 2) modpost is then used to
 # 3)  create one <module>.mod.c file pr. module
 # 4)  create one Module.symvers file with CRC for all exported symbols
@@ -60,10 +61,12 @@ include scripts/Makefile.lib
 kernelsymfile := $(objtree)/Module.symvers
 modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers
 
-# Step 1), find all modules listed in $(MODVERDIR)/
-MODLISTCMD := find $(MODVERDIR) -name '*.mod' | xargs -r grep -h '\.ko$$' | sort -u
-__modules := $(shell $(MODLISTCMD))
-modules   := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o)))
+modorder := $(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD)/)modules.order
+
+# Step 1), find all modules listed in modules.order
+ifdef CONFIG_MODULES
+modules := $(sort $(shell cat $(modorder)))
+endif
 
 # Stop after building .o files if NOFINAL is set. Makes compile tests quicker
 _modpost: $(if $(KBUILD_MODPOST_NOFINAL), $(modules:.ko:.o),$(modules))
@@ -84,7 +87,7 @@ MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS)))
 
 # We can go over command line length here, so be careful.
 quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
-      cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/.o/' | $(modpost) $(MODPOST_OPT) -s -T -
+      cmd_modpost = sed 's/ko$$/o/' $(modorder) | $(modpost) $(MODPOST_OPT) -s -T -
 
 PHONY += __modpost
 __modpost: $(modules:.ko=.o) FORCE
diff --git a/scripts/Makefile.modsign b/scripts/Makefile.modsign
index da56aa78d245..d7325cefe709 100644
--- a/scripts/Makefile.modsign
+++ b/scripts/Makefile.modsign
@@ -8,8 +8,7 @@ __modsign:
 
 include scripts/Kbuild.include
 
-__modules := $(sort $(shell grep -h '\.ko$$' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
-modules := $(patsubst %.o,%.ko,$(wildcard $(__modules:.ko=.o)))
+modules := $(sort $(shell cat modules.order))
 
 PHONY += $(modules)
 __modsign: $(modules)
diff --git a/scripts/adjust_autoksyms.sh b/scripts/adjust_autoksyms.sh
index aab4e299d7a2..a904bf1f5e67 100755
--- a/scripts/adjust_autoksyms.sh
+++ b/scripts/adjust_autoksyms.sh
@@ -8,8 +8,7 @@
 #
 
 # Create/update the include/generated/autoksyms.h file from the list
-# of all module's needed symbols as recorded on the third line of
-# .tmp_versions/*.mod files.
+# of all module's needed symbols as recorded on the second line of *.mod files.
 #
 # For each symbol being added or removed, the corresponding dependency
 # file's timestamp is updated to force a rebuild of the affected source
@@ -47,13 +46,10 @@ cat > "$new_ksyms_file" << EOT
  */
 
 EOT
-[ "$(ls -A "$MODVERDIR")" ] &&
-for mod in "$MODVERDIR"/*.mod; do
-	sed -n -e '3{s/ /\n/g;/^$/!p;}' "$mod"
-done | sort -u |
-while read sym; do
-	echo "#define __KSYM_${sym} 1"
-done >> "$new_ksyms_file"
+sed 's/ko$/mod/' modules.order |
+xargs -n1 sed -n -e '2{s/ /\n/g;/^$/!p;}' -- |
+sort -u |
+sed -e 's/\(.*\)/#define __KSYM_\1 1/' >> "$new_ksyms_file"
 
 # Special case for modversions (see modpost.c)
 if [ -n "$CONFIG_MODVERSIONS" ]; then
diff --git a/scripts/coccinelle/api/devm_platform_ioremap_resource.cocci b/scripts/coccinelle/api/devm_platform_ioremap_resource.cocci
new file mode 100644
index 000000000000..56a2e261d61d
--- /dev/null
+++ b/scripts/coccinelle/api/devm_platform_ioremap_resource.cocci
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/// Use devm_platform_ioremap_resource helper which wraps
+/// platform_get_resource() and devm_ioremap_resource() together.
+///
+// Confidence: High
+// Copyright: (C) 2019 Himanshu Jha GPLv2.
+// Copyright: (C) 2019 Julia Lawall, Inria/LIP6. GPLv2.
+// Keywords: platform_get_resource, devm_ioremap_resource,
+// Keywords: devm_platform_ioremap_resource
+
+virtual patch
+virtual report
+
+@r depends on patch && !report@
+expression e1, e2, arg1, arg2, arg3;
+identifier id;
+@@
+
+(
+- id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
+|
+- struct resource *id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
+)
+  ... when != id
+- e1 = devm_ioremap_resource(arg3, id);
++ e1 = devm_platform_ioremap_resource(arg1, arg2);
+  ... when != id
+? id = e2
+
+@r1 depends on patch && !report@
+identifier r.id;
+type T;
+@@
+
+- T *id;
+  ...when != id
+
+@r2 depends on report && !patch@
+identifier id;
+expression e1, e2, arg1, arg2, arg3;
+position j0;
+@@
+
+(
+  id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
+|
+  struct resource *id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
+)
+  ... when != id
+  e1@j0 = devm_ioremap_resource(arg3, id);
+  ... when != id
+? id = e2
+
+@script:python depends on report && !patch@
+e1 << r2.e1;
+j0 << r2.j0;
+@@
+
+msg = "WARNING: Use devm_platform_ioremap_resource for %s" % (e1)
+coccilib.report.print_report(j0[0], msg)
diff --git a/scripts/export_report.pl b/scripts/export_report.pl
index 0f604f62f067..7d3030d03a25 100755
--- a/scripts/export_report.pl
+++ b/scripts/export_report.pl
@@ -52,13 +52,12 @@ sub usage {
 
 sub collectcfiles {
     my @file;
-    while (<.tmp_versions/*.mod>) {
-	open my $fh, '<', $_ or die "cannot open $_: $!\n";
-	push (@file,
-	      grep s/\.ko/.mod.c/,	# change the suffix
-	      grep m/.+\.ko/,		# find the .ko path
-	      <$fh>);			# lines in opened file
+    open my $fh, '< modules.order' or die "cannot open modules.order: $!\n";
+    while (<$fh>) {
+	s/\.ko$/.mod.c/;
+	push (@file, $_)
     }
+    close($fh);
     chomp @file;
     return @file;
 }
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index ab30fe724c43..7656e1137b6b 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -94,7 +94,7 @@ configfiles=$(wildcard $(srctree)/kernel/configs/$@ $(srctree)/arch/$(SRCARCH)/c
 %.config: $(obj)/conf
 	$(if $(call configfiles),, $(error No configuration exists for this target on this architecture))
 	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m .config $(configfiles)
-	+$(Q)yes "" | $(MAKE) -f $(srctree)/Makefile oldconfig
+	$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
 
 PHONY += kvmconfig
 kvmconfig: kvm_guest.config
diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index 501fdcc5e999..1134892599da 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -895,7 +895,8 @@ int conf_write(const char *name)
 				     "# %s\n"
 				     "#\n", str);
 			need_newline = false;
-		} else if (!(sym->flags & SYMBOL_CHOICE)) {
+		} else if (!(sym->flags & SYMBOL_CHOICE) &&
+			   !(sym->flags & SYMBOL_WRITTEN)) {
 			sym_calc_value(sym);
 			if (!(sym->flags & SYMBOL_WRITE))
 				goto next;
@@ -903,7 +904,7 @@ int conf_write(const char *name)
 				fprintf(out, "\n");
 				need_newline = false;
 			}
-			sym->flags &= ~SYMBOL_WRITE;
+			sym->flags |= SYMBOL_WRITTEN;
 			conf_write_symbol(out, sym, &kconfig_printer_cb, NULL);
 		}
 
@@ -1063,8 +1064,6 @@ int conf_write_autoconf(int overwrite)
 	if (!overwrite && is_present(autoconf_name))
 		return 0;
 
-	sym_clear_all_valid();
-
 	conf_write_dep("include/config/auto.conf.cmd");
 
 	if (conf_touch_deps())
diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h
index 8dde65bc3165..017843c9a4f4 100644
--- a/scripts/kconfig/expr.h
+++ b/scripts/kconfig/expr.h
@@ -141,6 +141,7 @@ struct symbol {
 #define SYMBOL_OPTIONAL   0x0100  /* choice is optional - values can be 'n' */
 #define SYMBOL_WRITE      0x0200  /* write symbol to file (KCONFIG_CONFIG) */
 #define SYMBOL_CHANGED    0x0400  /* ? */
+#define SYMBOL_WRITTEN    0x0800  /* track info to avoid double-write to .config */
 #define SYMBOL_NO_WRITE   0x1000  /* Symbol for internal use only; it will not be written */
 #define SYMBOL_CHECKED    0x2000  /* used during dependency checking */
 #define SYMBOL_WARNED     0x8000  /* warning has been issued */
diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c
index 0f6dcb4011a8..63062024ce0e 100644
--- a/scripts/mod/sumversion.c
+++ b/scripts/mod/sumversion.c
@@ -396,34 +396,19 @@ void get_src_version(const char *modname, char sum[], unsigned sumlen)
 	unsigned long len;
 	struct md4_ctx md;
 	char *sources, *end, *fname;
-	const char *basename;
 	char filelist[PATH_MAX + 1];
-	char *modverdir = getenv("MODVERDIR");
 
-	if (!modverdir)
-		modverdir = ".";
-
-	/* Source files for module are in .tmp_versions/modname.mod,
-	   after the first line. */
-	if (strrchr(modname, '/'))
-		basename = strrchr(modname, '/') + 1;
-	else
-		basename = modname;
-	snprintf(filelist, sizeof(filelist), "%s/%.*s.mod", modverdir,
-		(int) strlen(basename) - 2, basename);
+	/* objects for a module are listed in the first line of *.mod file. */
+	snprintf(filelist, sizeof(filelist), "%.*smod",
+		 (int)strlen(modname) - 1, modname);
 
 	file = grab_file(filelist, &len);
 	if (!file)
 		/* not a module or .mod file missing - ignore */
 		return;
 
-	sources = strchr(file, '\n');
-	if (!sources) {
-		warn("malformed versions file for %s\n", modname);
-		goto release;
-	}
+	sources = file;
 
-	sources++;
 	end = strchr(sources, '\n');
 	if (!end) {
 		warn("bad ending versions file for %s\n", modname);
diff --git a/scripts/modules-check.sh b/scripts/modules-check.sh
index 39e8cb36ba19..f51f446707b8 100755
--- a/scripts/modules-check.sh
+++ b/scripts/modules-check.sh
@@ -9,7 +9,7 @@ check_same_name_modules()
 	for m in $(sed 's:.*/::' modules.order | sort | uniq -d)
 	do
 		echo "warning: same module names found:" >&2
-		sed -n "/\/$m/s:^kernel/:  :p" modules.order >&2
+		sed -n "/\/$m/s:^:  :p" modules.order >&2
 	done
 }
 
diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index e8ca6dc97e96..c4c580f547ef 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -132,6 +132,11 @@ fi
 if [ "$ARCH" != "um" ]; then
 	$MAKE -f $srctree/Makefile headers
 	$MAKE -f $srctree/Makefile headers_install INSTALL_HDR_PATH="$libc_headers_dir/usr"
+	# move asm headers to /usr/include/<libc-machine>/asm to match the structure
+	# used by Debian-based distros (to support multi-arch)
+	host_arch=$(dpkg-architecture -a$(cat debian/arch) -qDEB_HOST_MULTIARCH)
+	mkdir $libc_headers_dir/usr/include/$host_arch
+	mv $libc_headers_dir/usr/include/asm $libc_headers_dir/usr/include/$host_arch/
 fi
 
 # Install the maintainer scripts
diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
index 8351584cb24e..e0750b70453f 100755
--- a/scripts/package/mkdebian
+++ b/scripts/package/mkdebian
@@ -197,6 +197,7 @@ Architecture: $debarch
 Description: Linux support headers for userspace development
  This package provides userspaces headers from the Linux kernel.  These headers
  are used by the installed headers for GNU glibc and other system libraries.
+Multi-Arch: same
 
 Package: $dbg_packagename
 Section: debug
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index 2d29df4a0a53..8640c278f1aa 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -29,7 +29,7 @@ fi
 
 PROVIDES="$PROVIDES kernel-$KERNELRELEASE"
 __KERNELRELEASE=$(echo $KERNELRELEASE | sed -e "s/-/_/g")
-EXCLUDES="$RCS_TAR_IGNORE --exclude=.tmp_versions --exclude=*vmlinux* \
+EXCLUDES="$RCS_TAR_IGNORE --exclude=*vmlinux* --exclude=*.mod \
 --exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation \
 --exclude=.config.old --exclude=.missing-syscalls.d --exclude=*.s"
 
diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index f230e65329a2..3b638c0e1a4f 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -83,6 +83,17 @@ sub check_missing(%)
 	foreach my $prog (sort keys %missing) {
 		my $is_optional = $missing{$prog};
 
+		# At least on some LTS distros like CentOS 7, texlive doesn't
+		# provide all packages we need. When such distros are
+		# detected, we have to disable PDF output.
+		#
+		# So, we need to ignore the packages that distros would
+		# need for LaTeX to work
+		if ($is_optional == 2 && !$pdf) {
+			$optional--;
+			next;
+		}
+
 		if ($is_optional) {
 			print "Warning: better to also install \"$prog\".\n";
 		} else {
@@ -333,10 +344,13 @@ sub give_debian_hints()
 
 	if ($pdf) {
 		check_missing_file("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
-				   "fonts-dejavu", 1);
+				   "fonts-dejavu", 2);
+
+		check_missing_file("/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
+				   "fonts-noto-cjk", 2);
 	}
 
-	check_program("dvipng", 1) if ($pdf);
+	check_program("dvipng", 2) if ($pdf);
 	check_missing(\%map);
 
 	return if (!$need && !$optional);
@@ -363,6 +377,7 @@ sub give_redhat_hints()
 	my @fedora_tex_pkgs = (
 		"texlive-collection-fontsrecommended",
 		"texlive-collection-latex",
+		"texlive-xecjk",
 		"dejavu-sans-fonts",
 		"dejavu-serif-fonts",
 		"dejavu-sans-mono-fonts",
@@ -371,22 +386,45 @@ sub give_redhat_hints()
 	#
 	# Checks valid for RHEL/CentOS version 7.x.
 	#
-	if (! $system_release =~ /Fedora/) {
+	my $old = 0;
+	my $rel;
+	$rel = $1 if ($system_release =~ /release\s+(\d+)/);
+
+	if (!($system_release =~ /Fedora/)) {
 		$map{"virtualenv"} = "python-virtualenv";
-	}
 
-	my $release;
+		if ($rel && $rel < 8) {
+			$old = 1;
+			$pdf = 0;
+
+			printf("Note: texlive packages on RHEL/CENTOS <= 7 are incomplete. Can't support PDF output\n");
+			printf("If you want to build PDF, please read:\n");
+			printf("\thttps://www.systutorials.com/241660/how-to-install-tex-live-on-centos-7-linux/\n");
+		}
+	} else {
+		if ($rel && $rel < 26) {
+			$old = 1;
+		}
+	}
+	if (!$rel) {
+		printf("Couldn't identify release number\n");
+		$old = 1;
+		$pdf = 0;
+	}
 
-	$release = $1 if ($system_release =~ /Fedora\s+release\s+(\d+)/);
+	if ($pdf) {
+		check_missing_file("/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc",
+				   "google-noto-sans-cjk-ttc-fonts", 2);
+	}
 
-	check_rpm_missing(\@fedora26_opt_pkgs, 1) if ($pdf && $release >= 26);
-	check_rpm_missing(\@fedora_tex_pkgs, 1) if ($pdf);
-	check_missing_tex(1) if ($pdf);
+	check_rpm_missing(\@fedora26_opt_pkgs, 2) if ($pdf && !$old);
+	check_rpm_missing(\@fedora_tex_pkgs, 2) if ($pdf);
+	check_missing_tex(2) if ($pdf);
 	check_missing(\%map);
 
 	return if (!$need && !$optional);
 
-	if ($release >= 18) {
+	if (!$old) {
 		# dnf, for Fedora 18+
 		printf("You should run:\n\n\tsudo dnf install -y $install\n");
 	} else {
@@ -425,8 +463,15 @@ sub give_opensuse_hints()
 		"texlive-zapfding",
 	);
 
-	check_rpm_missing(\@suse_tex_pkgs, 1) if ($pdf);
-	check_missing_tex(1) if ($pdf);
+	$map{"latexmk"} = "texlive-latexmk-bin";
+
+	# FIXME: add support for installing CJK fonts
+	#
+	# I tried hard, but was unable to find a way to install
+	# "Noto Sans CJK SC" on openSUSE
+
+	check_rpm_missing(\@suse_tex_pkgs, 2) if ($pdf);
+	check_missing_tex(2) if ($pdf);
 	check_missing(\%map);
 
 	return if (!$need && !$optional);
@@ -450,7 +495,14 @@ sub give_mageia_hints()
 		"texlive-fontsextra",
 	);
 
-	check_rpm_missing(\@tex_pkgs, 1) if ($pdf);
+	$map{"latexmk"} = "texlive-collection-basic";
+
+	if ($pdf) {
+		check_missing_file("/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc",
+				   "google-noto-sans-cjk-ttc-fonts", 2);
+	}
+
+	check_rpm_missing(\@tex_pkgs, 2) if ($pdf);
 	check_missing(\%map);
 
 	return if (!$need && !$optional);
@@ -473,7 +525,13 @@ sub give_arch_linux_hints()
 		"texlive-latexextra",
 		"ttf-dejavu",
 	);
-	check_pacman_missing(\@archlinux_tex_pkgs, 1) if ($pdf);
+	check_pacman_missing(\@archlinux_tex_pkgs, 2) if ($pdf);
+
+	if ($pdf) {
+		check_missing_file("/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
+				   "noto-fonts-cjk", 2);
+	}
+
 	check_missing(\%map);
 
 	return if (!$need && !$optional);
@@ -492,15 +550,31 @@ sub give_gentoo_hints()
 	);
 
 	check_missing_file("/usr/share/fonts/dejavu/DejaVuSans.ttf",
-			   "media-fonts/dejavu", 1) if ($pdf);
+			   "media-fonts/dejavu", 2) if ($pdf);
+
+	if ($pdf) {
+		check_missing_file("/usr/share/fonts/noto-cjk/NotoSansCJKsc-Regular.otf",
+				   "media-fonts/noto-cjk", 2);
+	}
 
 	check_missing(\%map);
 
 	return if (!$need && !$optional);
 
 	printf("You should run:\n\n");
-	printf("\tsudo su -c 'echo \"media-gfx/imagemagick svg png\" > /etc/portage/package.use/imagemagick'\n");
-	printf("\tsudo su -c 'echo \"media-gfx/graphviz cairo pdf\" > /etc/portage/package.use/graphviz'\n");
+
+	my $imagemagick = "media-gfx/imagemagick svg png";
+	my $cairo = "media-gfx/graphviz cairo pdf";
+	my $portage_imagemagick = "/etc/portage/package.use/imagemagick";
+	my $portage_cairo = "/etc/portage/package.use/graphviz";
+
+	if (qx(cat $portage_imagemagick) ne "$imagemagick\n") {
+		printf("\tsudo su -c 'echo \"$imagemagick\" > $portage_imagemagick'\n")
+	}
+	if (qx(cat $portage_cairo) ne  "$cairo\n") {
+		printf("\tsudo su -c 'echo \"$cairo\" > $portage_cairo'\n");
+	}
+
 	printf("\tsudo emerge --ask $install\n");
 
 }
@@ -560,7 +634,7 @@ sub check_distros()
 	my %map = (
 		"sphinx-build" => "sphinx"
 	);
-	check_missing_tex(1) if ($pdf);
+	check_missing_tex(2) if ($pdf);
 	check_missing(\%map);
 	print "I don't know distro $system_release.\n";
 	print "So, I can't provide you a hint with the install procedure.\n";
@@ -589,11 +663,13 @@ sub check_needs()
 	check_program("make", 0);
 	check_program("gcc", 0);
 	check_python_module("sphinx_rtd_theme", 1) if (!$virtualenv);
-	check_program("xelatex", 1) if ($pdf);
 	check_program("dot", 1);
 	check_program("convert", 1);
-	check_program("rsvg-convert", 1) if ($pdf);
-	check_program("latexmk", 1) if ($pdf);
+
+	# Extra PDF files - should use 2 for is_optional
+	check_program("xelatex", 2) if ($pdf);
+	check_program("rsvg-convert", 2) if ($pdf);
+	check_program("latexmk", 2) if ($pdf);
 
 	check_distros();
 
diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c
index e63a90ff2728..1f0a6eaa2d6a 100644
--- a/security/selinux/ss/sidtab.c
+++ b/security/selinux/ss/sidtab.c
@@ -286,6 +286,11 @@ static int sidtab_reverse_lookup(struct sidtab *s, struct context *context,
 		++count;
 	}
 
+	/* bail out if we already reached max entries */
+	rc = -EOVERFLOW;
+	if (count >= SIDTAB_MAX)
+		goto out_unlock;
+
 	/* insert context into new entry */
 	rc = -ENOMEM;
 	dst = sidtab_do_lookup(s, count, 1);
diff --git a/sound/ac97/bus.c b/sound/ac97/bus.c
index 7b977b753a03..7985dd8198b6 100644
--- a/sound/ac97/bus.c
+++ b/sound/ac97/bus.c
@@ -122,17 +122,12 @@ static int ac97_codec_add(struct ac97_controller *ac97_ctrl, int idx,
 						      vendor_id);
 
 	ret = device_add(&codec->dev);
-	if (ret)
-		goto err_free_codec;
+	if (ret) {
+		put_device(&codec->dev);
+		return ret;
+	}
 
 	return 0;
-err_free_codec:
-	of_node_put(codec->dev.of_node);
-	put_device(&codec->dev);
-	kfree(codec);
-	ac97_ctrl->codecs[idx] = NULL;
-
-	return ret;
 }
 
 unsigned int snd_ac97_bus_scan_one(struct ac97_controller *adrv,
diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c
index 99b882158705..41905afada63 100644
--- a/sound/core/compress_offload.c
+++ b/sound/core/compress_offload.c
@@ -574,10 +574,7 @@ snd_compr_set_params(struct snd_compr_stream *stream, unsigned long arg)
 		stream->metadata_set = false;
 		stream->next_track = false;
 
-		if (stream->direction == SND_COMPRESS_PLAYBACK)
-			stream->runtime->state = SNDRV_PCM_STATE_SETUP;
-		else
-			stream->runtime->state = SNDRV_PCM_STATE_PREPARED;
+		stream->runtime->state = SNDRV_PCM_STATE_SETUP;
 	} else {
 		return -EPERM;
 	}
@@ -693,8 +690,17 @@ static int snd_compr_start(struct snd_compr_stream *stream)
 {
 	int retval;
 
-	if (stream->runtime->state != SNDRV_PCM_STATE_PREPARED)
+	switch (stream->runtime->state) {
+	case SNDRV_PCM_STATE_SETUP:
+		if (stream->direction != SND_COMPRESS_CAPTURE)
+			return -EPERM;
+		break;
+	case SNDRV_PCM_STATE_PREPARED:
+		break;
+	default:
 		return -EPERM;
+	}
+
 	retval = stream->ops->trigger(stream, SNDRV_PCM_TRIGGER_START);
 	if (!retval)
 		stream->runtime->state = SNDRV_PCM_STATE_RUNNING;
@@ -705,9 +711,15 @@ static int snd_compr_stop(struct snd_compr_stream *stream)
 {
 	int retval;
 
-	if (stream->runtime->state == SNDRV_PCM_STATE_PREPARED ||
-			stream->runtime->state == SNDRV_PCM_STATE_SETUP)
+	switch (stream->runtime->state) {
+	case SNDRV_PCM_STATE_OPEN:
+	case SNDRV_PCM_STATE_SETUP:
+	case SNDRV_PCM_STATE_PREPARED:
 		return -EPERM;
+	default:
+		break;
+	}
+
 	retval = stream->ops->trigger(stream, SNDRV_PCM_TRIGGER_STOP);
 	if (!retval) {
 		snd_compr_drain_notify(stream);
@@ -795,9 +807,17 @@ static int snd_compr_drain(struct snd_compr_stream *stream)
 {
 	int retval;
 
-	if (stream->runtime->state == SNDRV_PCM_STATE_PREPARED ||
-			stream->runtime->state == SNDRV_PCM_STATE_SETUP)
+	switch (stream->runtime->state) {
+	case SNDRV_PCM_STATE_OPEN:
+	case SNDRV_PCM_STATE_SETUP:
+	case SNDRV_PCM_STATE_PREPARED:
+	case SNDRV_PCM_STATE_PAUSED:
 		return -EPERM;
+	case SNDRV_PCM_STATE_XRUN:
+		return -EPIPE;
+	default:
+		break;
+	}
 
 	retval = stream->ops->trigger(stream, SND_COMPR_TRIGGER_DRAIN);
 	if (retval) {
@@ -817,6 +837,10 @@ static int snd_compr_next_track(struct snd_compr_stream *stream)
 	if (stream->runtime->state != SNDRV_PCM_STATE_RUNNING)
 		return -EPERM;
 
+	/* next track doesn't have any meaning for capture streams */
+	if (stream->direction == SND_COMPRESS_CAPTURE)
+		return -EPERM;
+
 	/* you can signal next track if this is intended to be a gapless stream
 	 * and current track metadata is set
 	 */
@@ -834,9 +858,23 @@ static int snd_compr_next_track(struct snd_compr_stream *stream)
 static int snd_compr_partial_drain(struct snd_compr_stream *stream)
 {
 	int retval;
-	if (stream->runtime->state == SNDRV_PCM_STATE_PREPARED ||
-			stream->runtime->state == SNDRV_PCM_STATE_SETUP)
+
+	switch (stream->runtime->state) {
+	case SNDRV_PCM_STATE_OPEN:
+	case SNDRV_PCM_STATE_SETUP:
+	case SNDRV_PCM_STATE_PREPARED:
+	case SNDRV_PCM_STATE_PAUSED:
+		return -EPERM;
+	case SNDRV_PCM_STATE_XRUN:
+		return -EPIPE;
+	default:
+		break;
+	}
+
+	/* partial drain doesn't have any meaning for capture streams */
+	if (stream->direction == SND_COMPRESS_CAPTURE)
 		return -EPERM;
+
 	/* stream can be drained only when next track has been signalled */
 	if (stream->next_track == false)
 		return -EPERM;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 860543a4c840..12dd9b318db1 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -77,7 +77,7 @@ void snd_pcm_group_init(struct snd_pcm_group *group)
 	spin_lock_init(&group->lock);
 	mutex_init(&group->mutex);
 	INIT_LIST_HEAD(&group->substreams);
-	refcount_set(&group->refs, 0);
+	refcount_set(&group->refs, 1);
 }
 
 /* define group lock helpers */
@@ -1096,8 +1096,7 @@ static void snd_pcm_group_unref(struct snd_pcm_group *group,
 
 	if (!group)
 		return;
-	do_free = refcount_dec_and_test(&group->refs) &&
-		list_empty(&group->substreams);
+	do_free = refcount_dec_and_test(&group->refs);
 	snd_pcm_group_unlock(group, substream->pcm->nonatomic);
 	if (do_free)
 		kfree(group);
@@ -2020,6 +2019,7 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd)
 	snd_pcm_group_lock_irq(target_group, nonatomic);
 	snd_pcm_stream_lock(substream1);
 	snd_pcm_group_assign(substream1, target_group);
+	refcount_inc(&target_group->refs);
 	snd_pcm_stream_unlock(substream1);
 	snd_pcm_group_unlock_irq(target_group, nonatomic);
  _end:
@@ -2056,13 +2056,14 @@ static int snd_pcm_unlink(struct snd_pcm_substream *substream)
 	snd_pcm_group_lock_irq(group, nonatomic);
 
 	relink_to_local(substream);
+	refcount_dec(&group->refs);
 
 	/* detach the last stream, too */
 	if (list_is_singular(&group->substreams)) {
 		relink_to_local(list_first_entry(&group->substreams,
 						 struct snd_pcm_substream,
 						 link_list));
-		do_free = !refcount_read(&group->refs);
+		do_free = refcount_dec_and_test(&group->refs);
 	}
 
 	snd_pcm_group_unlock_irq(group, nonatomic);
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index e30e86ca6b72..51f10ed9bc43 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -2942,7 +2942,7 @@ static int hda_codec_runtime_resume(struct device *dev)
 static int hda_codec_force_resume(struct device *dev)
 {
 	struct hda_codec *codec = dev_to_hda_codec(dev);
-	bool forced_resume = !codec->relaxed_resume;
+	bool forced_resume = !codec->relaxed_resume && codec->jacktbl.used;
 	int ret;
 
 	/* The get/put pair below enforces the runtime resume even if the
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index cb8b0945547c..1e14d7270adf 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -313,11 +313,10 @@ enum {
 
 #define AZX_DCAPS_INTEL_SKYLAKE \
 	(AZX_DCAPS_INTEL_PCH_BASE | AZX_DCAPS_PM_RUNTIME |\
+	 AZX_DCAPS_SYNC_WRITE |\
 	 AZX_DCAPS_SEPARATE_STREAM_TAG | AZX_DCAPS_I915_COMPONENT)
 
-#define AZX_DCAPS_INTEL_BROXTON \
-	(AZX_DCAPS_INTEL_PCH_BASE | AZX_DCAPS_PM_RUNTIME |\
-	 AZX_DCAPS_SEPARATE_STREAM_TAG | AZX_DCAPS_I915_COMPONENT)
+#define AZX_DCAPS_INTEL_BROXTON		AZX_DCAPS_INTEL_SKYLAKE
 
 /* quirks for ATI SB / AMD Hudson */
 #define AZX_DCAPS_PRESET_ATI_SB \
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 4f8d0845ee1e..f299f137eaea 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -1083,6 +1083,7 @@ static int patch_conexant_auto(struct hda_codec *codec)
  */
 
 static const struct hda_device_id snd_hda_id_conexant[] = {
+	HDA_CODEC_ENTRY(0x14f11f86, "CX8070", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f12008, "CX8200", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f15045, "CX20549 (Venice)", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f15047, "CX20551 (Waikiki)", patch_conexant_auto),
diff --git a/sound/usb/line6/podhd.c b/sound/usb/line6/podhd.c
index f0662bd4e50f..27bf61c177c0 100644
--- a/sound/usb/line6/podhd.c
+++ b/sound/usb/line6/podhd.c
@@ -368,7 +368,7 @@ static const struct line6_properties podhd_properties_table[] = {
 		.name = "POD HD500",
 		.capabilities	= LINE6_CAP_PCM
 				| LINE6_CAP_HWMON,
-		.altsetting = 1,
+		.altsetting = 0,
 		.ep_ctrl_r = 0x81,
 		.ep_ctrl_w = 0x01,
 		.ep_audio_r = 0x86,
diff --git a/sound/usb/line6/variax.c b/sound/usb/line6/variax.c
index 0d24c72c155f..ed158f04de80 100644
--- a/sound/usb/line6/variax.c
+++ b/sound/usb/line6/variax.c
@@ -244,5 +244,5 @@ static struct usb_driver variax_driver = {
 
 module_usb_driver(variax_driver);
 
-MODULE_DESCRIPTION("Vairax Workbench USB driver");
+MODULE_DESCRIPTION("Variax Workbench USB driver");
 MODULE_LICENSE("GPL");
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index c2152f3dd02d..e7c67be7c15f 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -116,7 +116,7 @@ struct kvm_irq_level {
 	 * ACPI gsi notion of irq.
 	 * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
 	 * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
-	 * For ARM: See Documentation/virtual/kvm/api.txt
+	 * For ARM: See Documentation/virt/kvm/api.txt
 	 */
 	union {
 		__u32 irq;
@@ -1085,7 +1085,7 @@ struct kvm_xen_hvm_config {
  *
  * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
  * the irqfd to operate in resampling mode for level triggered interrupt
- * emulation.  See Documentation/virtual/kvm/api.txt.
+ * emulation.  See Documentation/virt/kvm/api.txt.
  */
 #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
 
diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index 580e344db3dd..ced3765c4f44 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -11,22 +11,24 @@
 #include "elf.h"
 #include "cfi.h"
 
-#define INSN_JUMP_CONDITIONAL	1
-#define INSN_JUMP_UNCONDITIONAL	2
-#define INSN_JUMP_DYNAMIC	3
-#define INSN_CALL		4
-#define INSN_CALL_DYNAMIC	5
-#define INSN_RETURN		6
-#define INSN_CONTEXT_SWITCH	7
-#define INSN_STACK		8
-#define INSN_BUG		9
-#define INSN_NOP		10
-#define INSN_STAC		11
-#define INSN_CLAC		12
-#define INSN_STD		13
-#define INSN_CLD		14
-#define INSN_OTHER		15
-#define INSN_LAST		INSN_OTHER
+enum insn_type {
+	INSN_JUMP_CONDITIONAL,
+	INSN_JUMP_UNCONDITIONAL,
+	INSN_JUMP_DYNAMIC,
+	INSN_JUMP_DYNAMIC_CONDITIONAL,
+	INSN_CALL,
+	INSN_CALL_DYNAMIC,
+	INSN_RETURN,
+	INSN_CONTEXT_SWITCH,
+	INSN_STACK,
+	INSN_BUG,
+	INSN_NOP,
+	INSN_STAC,
+	INSN_CLAC,
+	INSN_STD,
+	INSN_CLD,
+	INSN_OTHER,
+};
 
 enum op_dest_type {
 	OP_DEST_REG,
@@ -68,7 +70,7 @@ void arch_initial_func_cfi_state(struct cfi_state *state);
 
 int arch_decode_instruction(struct elf *elf, struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
-			    unsigned int *len, unsigned char *type,
+			    unsigned int *len, enum insn_type *type,
 			    unsigned long *immediate, struct stack_op *op);
 
 bool arch_callee_saved_reg(unsigned char reg);
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 584568f27a83..0567c47a91b1 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -68,7 +68,7 @@ bool arch_callee_saved_reg(unsigned char reg)
 
 int arch_decode_instruction(struct elf *elf, struct section *sec,
 			    unsigned long offset, unsigned int maxlen,
-			    unsigned int *len, unsigned char *type,
+			    unsigned int *len, enum insn_type *type,
 			    unsigned long *immediate, struct stack_op *op)
 {
 	struct insn insn;
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 172f99195726..5f26620f13f5 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -18,6 +18,8 @@
 
 #define FAKE_JUMP_OFFSET -1
 
+#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table"
+
 struct alternative {
 	struct list_head list;
 	struct instruction *insn;
@@ -95,6 +97,20 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
 	for (insn = next_insn_same_sec(file, insn); insn;		\
 	     insn = next_insn_same_sec(file, insn))
 
+static bool is_sibling_call(struct instruction *insn)
+{
+	/* An indirect jump is either a sibling call or a jump to a table. */
+	if (insn->type == INSN_JUMP_DYNAMIC)
+		return list_empty(&insn->alts);
+
+	if (insn->type != INSN_JUMP_CONDITIONAL &&
+	    insn->type != INSN_JUMP_UNCONDITIONAL)
+		return false;
+
+	/* add_jump_destinations() sets insn->call_dest for sibling calls. */
+	return !!insn->call_dest;
+}
+
 /*
  * This checks to see if the given function is a "noreturn" function.
  *
@@ -103,14 +119,9 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
  *
  * For local functions, we have to detect them manually by simply looking for
  * the lack of a return instruction.
- *
- * Returns:
- *  -1: error
- *   0: no dead end
- *   1: dead end
  */
-static int __dead_end_function(struct objtool_file *file, struct symbol *func,
-			       int recursion)
+static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
+				int recursion)
 {
 	int i;
 	struct instruction *insn;
@@ -136,30 +147,33 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
 		"rewind_stack_do_exit",
 	};
 
+	if (!func)
+		return false;
+
 	if (func->bind == STB_WEAK)
-		return 0;
+		return false;
 
 	if (func->bind == STB_GLOBAL)
 		for (i = 0; i < ARRAY_SIZE(global_noreturns); i++)
 			if (!strcmp(func->name, global_noreturns[i]))
-				return 1;
+				return true;
 
 	if (!func->len)
-		return 0;
+		return false;
 
 	insn = find_insn(file, func->sec, func->offset);
 	if (!insn->func)
-		return 0;
+		return false;
 
 	func_for_each_insn_all(file, func, insn) {
 		empty = false;
 
 		if (insn->type == INSN_RETURN)
-			return 0;
+			return false;
 	}
 
 	if (empty)
-		return 0;
+		return false;
 
 	/*
 	 * A function can have a sibling call instead of a return.  In that
@@ -167,40 +181,31 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
 	 * of the sibling call returns.
 	 */
 	func_for_each_insn_all(file, func, insn) {
-		if (insn->type == INSN_JUMP_UNCONDITIONAL) {
+		if (is_sibling_call(insn)) {
 			struct instruction *dest = insn->jump_dest;
 
 			if (!dest)
 				/* sibling call to another file */
-				return 0;
-
-			if (dest->func && dest->func->pfunc != insn->func->pfunc) {
+				return false;
 
-				/* local sibling call */
-				if (recursion == 5) {
-					/*
-					 * Infinite recursion: two functions
-					 * have sibling calls to each other.
-					 * This is a very rare case.  It means
-					 * they aren't dead ends.
-					 */
-					return 0;
-				}
-
-				return __dead_end_function(file, dest->func,
-							   recursion + 1);
+			/* local sibling call */
+			if (recursion == 5) {
+				/*
+				 * Infinite recursion: two functions have
+				 * sibling calls to each other.  This is a very
+				 * rare case.  It means they aren't dead ends.
+				 */
+				return false;
 			}
-		}
 
-		if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts))
-			/* sibling call */
-			return 0;
+			return __dead_end_function(file, dest->func, recursion+1);
+		}
 	}
 
-	return 1;
+	return true;
 }
 
-static int dead_end_function(struct objtool_file *file, struct symbol *func)
+static bool dead_end_function(struct objtool_file *file, struct symbol *func)
 {
 	return __dead_end_function(file, func, 0);
 }
@@ -262,19 +267,12 @@ static int decode_instructions(struct objtool_file *file)
 			if (ret)
 				goto err;
 
-			if (!insn->type || insn->type > INSN_LAST) {
-				WARN_FUNC("invalid instruction type %d",
-					  insn->sec, insn->offset, insn->type);
-				ret = -1;
-				goto err;
-			}
-
 			hash_add(file->insn_hash, &insn->hash, insn->offset);
 			list_add_tail(&insn->list, &file->insn_list);
 		}
 
 		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC)
+			if (func->type != STT_FUNC || func->alias != func)
 				continue;
 
 			if (!find_insn(file, sec, func->offset)) {
@@ -284,8 +282,7 @@ static int decode_instructions(struct objtool_file *file)
 			}
 
 			func_for_each_insn(file, func, insn)
-				if (!insn->func)
-					insn->func = func;
+				insn->func = func;
 		}
 	}
 
@@ -488,6 +485,7 @@ static const char *uaccess_safe_builtin[] = {
 	/* misc */
 	"csum_partial_copy_generic",
 	"__memcpy_mcsafe",
+	"mcsafe_handle_tail",
 	"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
 	NULL
 };
@@ -505,7 +503,7 @@ static void add_uaccess_safe(struct objtool_file *file)
 		if (!func)
 			continue;
 
-		func->alias->uaccess_safe = true;
+		func->uaccess_safe = true;
 	}
 }
 
@@ -577,13 +575,16 @@ static int add_jump_destinations(struct objtool_file *file)
 			 * Retpoline jumps are really dynamic jumps in
 			 * disguise, so convert them accordingly.
 			 */
-			insn->type = INSN_JUMP_DYNAMIC;
+			if (insn->type == INSN_JUMP_UNCONDITIONAL)
+				insn->type = INSN_JUMP_DYNAMIC;
+			else
+				insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL;
+
 			insn->retpoline_safe = true;
 			continue;
 		} else {
-			/* sibling call */
+			/* external sibling call */
 			insn->call_dest = rela->sym;
-			insn->jump_dest = NULL;
 			continue;
 		}
 
@@ -623,7 +624,7 @@ static int add_jump_destinations(struct objtool_file *file)
 			 * However this code can't completely replace the
 			 * read_symbols() code because this doesn't detect the
 			 * case where the parent function's only reference to a
-			 * subfunction is through a switch table.
+			 * subfunction is through a jump table.
 			 */
 			if (!strstr(insn->func->name, ".cold.") &&
 			    strstr(insn->jump_dest->func->name, ".cold.")) {
@@ -633,9 +634,8 @@ static int add_jump_destinations(struct objtool_file *file)
 			} else if (insn->jump_dest->func->pfunc != insn->func->pfunc &&
 				   insn->jump_dest->offset == insn->jump_dest->func->offset) {
 
-				/* sibling class */
+				/* internal sibling call */
 				insn->call_dest = insn->jump_dest->func;
-				insn->jump_dest = NULL;
 			}
 		}
 	}
@@ -896,20 +896,26 @@ out:
 	return ret;
 }
 
-static int add_switch_table(struct objtool_file *file, struct instruction *insn,
-			    struct rela *table, struct rela *next_table)
+static int add_jump_table(struct objtool_file *file, struct instruction *insn,
+			    struct rela *table)
 {
 	struct rela *rela = table;
-	struct instruction *alt_insn;
+	struct instruction *dest_insn;
 	struct alternative *alt;
 	struct symbol *pfunc = insn->func->pfunc;
 	unsigned int prev_offset = 0;
 
-	list_for_each_entry_from(rela, &table->rela_sec->rela_list, list) {
-		if (rela == next_table)
+	/*
+	 * Each @rela is a switch table relocation which points to the target
+	 * instruction.
+	 */
+	list_for_each_entry_from(rela, &table->sec->rela_list, list) {
+
+		/* Check for the end of the table: */
+		if (rela != table && rela->jump_table_start)
 			break;
 
-		/* Make sure the switch table entries are consecutive: */
+		/* Make sure the table entries are consecutive: */
 		if (prev_offset && rela->offset != prev_offset + 8)
 			break;
 
@@ -918,12 +924,12 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn,
 		    rela->addend == pfunc->offset)
 			break;
 
-		alt_insn = find_insn(file, rela->sym->sec, rela->addend);
-		if (!alt_insn)
+		dest_insn = find_insn(file, rela->sym->sec, rela->addend);
+		if (!dest_insn)
 			break;
 
-		/* Make sure the jmp dest is in the function or subfunction: */
-		if (alt_insn->func->pfunc != pfunc)
+		/* Make sure the destination is in the same function: */
+		if (!dest_insn->func || dest_insn->func->pfunc != pfunc)
 			break;
 
 		alt = malloc(sizeof(*alt));
@@ -932,7 +938,7 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn,
 			return -1;
 		}
 
-		alt->insn = alt_insn;
+		alt->insn = dest_insn;
 		list_add_tail(&alt->list, &insn->alts);
 		prev_offset = rela->offset;
 	}
@@ -947,7 +953,7 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn,
 }
 
 /*
- * find_switch_table() - Given a dynamic jump, find the switch jump table in
+ * find_jump_table() - Given a dynamic jump, find the switch jump table in
  * .rodata associated with it.
  *
  * There are 3 basic patterns:
@@ -989,13 +995,13 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn,
  *
  *    NOTE: RETPOLINE made it harder still to decode dynamic jumps.
  */
-static struct rela *find_switch_table(struct objtool_file *file,
+static struct rela *find_jump_table(struct objtool_file *file,
 				      struct symbol *func,
 				      struct instruction *insn)
 {
-	struct rela *text_rela, *rodata_rela;
+	struct rela *text_rela, *table_rela;
 	struct instruction *orig_insn = insn;
-	struct section *rodata_sec;
+	struct section *table_sec;
 	unsigned long table_offset;
 
 	/*
@@ -1028,42 +1034,52 @@ static struct rela *find_switch_table(struct objtool_file *file,
 			continue;
 
 		table_offset = text_rela->addend;
-		rodata_sec = text_rela->sym->sec;
+		table_sec = text_rela->sym->sec;
 
 		if (text_rela->type == R_X86_64_PC32)
 			table_offset += 4;
 
 		/*
 		 * Make sure the .rodata address isn't associated with a
-		 * symbol.  gcc jump tables are anonymous data.
+		 * symbol.  GCC jump tables are anonymous data.
+		 *
+		 * Also support C jump tables which are in the same format as
+		 * switch jump tables.  For objtool to recognize them, they
+		 * need to be placed in the C_JUMP_TABLE_SECTION section.  They
+		 * have symbols associated with them.
 		 */
-		if (find_symbol_containing(rodata_sec, table_offset))
+		if (find_symbol_containing(table_sec, table_offset) &&
+		    strcmp(table_sec->name, C_JUMP_TABLE_SECTION))
 			continue;
 
-		rodata_rela = find_rela_by_dest(rodata_sec, table_offset);
-		if (rodata_rela) {
-			/*
-			 * Use of RIP-relative switch jumps is quite rare, and
-			 * indicates a rare GCC quirk/bug which can leave dead
-			 * code behind.
-			 */
-			if (text_rela->type == R_X86_64_PC32)
-				file->ignore_unreachables = true;
+		/* Each table entry has a rela associated with it. */
+		table_rela = find_rela_by_dest(table_sec, table_offset);
+		if (!table_rela)
+			continue;
 
-			return rodata_rela;
-		}
+		/*
+		 * Use of RIP-relative switch jumps is quite rare, and
+		 * indicates a rare GCC quirk/bug which can leave dead code
+		 * behind.
+		 */
+		if (text_rela->type == R_X86_64_PC32)
+			file->ignore_unreachables = true;
+
+		return table_rela;
 	}
 
 	return NULL;
 }
 
-
-static int add_func_switch_tables(struct objtool_file *file,
-				  struct symbol *func)
+/*
+ * First pass: Mark the head of each jump table so that in the next pass,
+ * we know when a given jump table ends and the next one starts.
+ */
+static void mark_func_jump_tables(struct objtool_file *file,
+				    struct symbol *func)
 {
-	struct instruction *insn, *last = NULL, *prev_jump = NULL;
-	struct rela *rela, *prev_rela = NULL;
-	int ret;
+	struct instruction *insn, *last = NULL;
+	struct rela *rela;
 
 	func_for_each_insn_all(file, func, insn) {
 		if (!last)
@@ -1071,7 +1087,7 @@ static int add_func_switch_tables(struct objtool_file *file,
 
 		/*
 		 * Store back-pointers for unconditional forward jumps such
-		 * that find_switch_table() can back-track using those and
+		 * that find_jump_table() can back-track using those and
 		 * avoid some potentially confusing code.
 		 */
 		if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest &&
@@ -1086,27 +1102,25 @@ static int add_func_switch_tables(struct objtool_file *file,
 		if (insn->type != INSN_JUMP_DYNAMIC)
 			continue;
 
-		rela = find_switch_table(file, func, insn);
-		if (!rela)
-			continue;
-
-		/*
-		 * We found a switch table, but we don't know yet how big it
-		 * is.  Don't add it until we reach the end of the function or
-		 * the beginning of another switch table in the same function.
-		 */
-		if (prev_jump) {
-			ret = add_switch_table(file, prev_jump, prev_rela, rela);
-			if (ret)
-				return ret;
+		rela = find_jump_table(file, func, insn);
+		if (rela) {
+			rela->jump_table_start = true;
+			insn->jump_table = rela;
 		}
-
-		prev_jump = insn;
-		prev_rela = rela;
 	}
+}
+
+static int add_func_jump_tables(struct objtool_file *file,
+				  struct symbol *func)
+{
+	struct instruction *insn;
+	int ret;
 
-	if (prev_jump) {
-		ret = add_switch_table(file, prev_jump, prev_rela, NULL);
+	func_for_each_insn_all(file, func, insn) {
+		if (!insn->jump_table)
+			continue;
+
+		ret = add_jump_table(file, insn, insn->jump_table);
 		if (ret)
 			return ret;
 	}
@@ -1119,7 +1133,7 @@ static int add_func_switch_tables(struct objtool_file *file,
  * section which contains a list of addresses within the function to jump to.
  * This finds these jump tables and adds them to the insn->alts lists.
  */
-static int add_switch_table_alts(struct objtool_file *file)
+static int add_jump_table_alts(struct objtool_file *file)
 {
 	struct section *sec;
 	struct symbol *func;
@@ -1133,7 +1147,8 @@ static int add_switch_table_alts(struct objtool_file *file)
 			if (func->type != STT_FUNC)
 				continue;
 
-			ret = add_func_switch_tables(file, func);
+			mark_func_jump_tables(file, func);
+			ret = add_func_jump_tables(file, func);
 			if (ret)
 				return ret;
 		}
@@ -1277,13 +1292,18 @@ static void mark_rodata(struct objtool_file *file)
 	bool found = false;
 
 	/*
-	 * This searches for the .rodata section or multiple .rodata.func_name
-	 * sections if -fdata-sections is being used. The .str.1.1 and .str.1.8
-	 * rodata sections are ignored as they don't contain jump tables.
+	 * Search for the following rodata sections, each of which can
+	 * potentially contain jump tables:
+	 *
+	 * - .rodata: can contain GCC switch tables
+	 * - .rodata.<func>: same, if -fdata-sections is being used
+	 * - .rodata..c_jump_table: contains C annotated jump tables
+	 *
+	 * .rodata.str1.* sections are ignored; they don't contain jump tables.
 	 */
 	for_each_sec(file, sec) {
-		if (!strncmp(sec->name, ".rodata", 7) &&
-		    !strstr(sec->name, ".str1.")) {
+		if ((!strncmp(sec->name, ".rodata", 7) && !strstr(sec->name, ".str1.")) ||
+		    !strcmp(sec->name, C_JUMP_TABLE_SECTION)) {
 			sec->rodata = true;
 			found = true;
 		}
@@ -1325,7 +1345,7 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
-	ret = add_switch_table_alts(file);
+	ret = add_jump_table_alts(file);
 	if (ret)
 		return ret;
 
@@ -1873,12 +1893,12 @@ static bool insn_state_match(struct instruction *insn, struct insn_state *state)
 static inline bool func_uaccess_safe(struct symbol *func)
 {
 	if (func)
-		return func->alias->uaccess_safe;
+		return func->uaccess_safe;
 
 	return false;
 }
 
-static inline const char *insn_dest_name(struct instruction *insn)
+static inline const char *call_dest_name(struct instruction *insn)
 {
 	if (insn->call_dest)
 		return insn->call_dest->name;
@@ -1890,13 +1910,13 @@ static int validate_call(struct instruction *insn, struct insn_state *state)
 {
 	if (state->uaccess && !func_uaccess_safe(insn->call_dest)) {
 		WARN_FUNC("call to %s() with UACCESS enabled",
-				insn->sec, insn->offset, insn_dest_name(insn));
+				insn->sec, insn->offset, call_dest_name(insn));
 		return 1;
 	}
 
 	if (state->df) {
 		WARN_FUNC("call to %s() with DF set",
-				insn->sec, insn->offset, insn_dest_name(insn));
+				insn->sec, insn->offset, call_dest_name(insn));
 		return 1;
 	}
 
@@ -1920,13 +1940,12 @@ static int validate_sibling_call(struct instruction *insn, struct insn_state *st
  * each instruction and validate all the rules described in
  * tools/objtool/Documentation/stack-validation.txt.
  */
-static int validate_branch(struct objtool_file *file, struct instruction *first,
-			   struct insn_state state)
+static int validate_branch(struct objtool_file *file, struct symbol *func,
+			   struct instruction *first, struct insn_state state)
 {
 	struct alternative *alt;
 	struct instruction *insn, *next_insn;
 	struct section *sec;
-	struct symbol *func = NULL;
 	int ret;
 
 	insn = first;
@@ -1947,9 +1966,6 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 			return 1;
 		}
 
-		if (insn->func)
-			func = insn->func->pfunc;
-
 		if (func && insn->ignore) {
 			WARN_FUNC("BUG: why am I validating an ignored function?",
 				  sec, insn->offset);
@@ -1971,7 +1987,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 
 				i = insn;
 				save_insn = NULL;
-				func_for_each_insn_continue_reverse(file, insn->func, i) {
+				func_for_each_insn_continue_reverse(file, func, i) {
 					if (i->save) {
 						save_insn = i;
 						break;
@@ -2017,7 +2033,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 				if (alt->skip_orig)
 					skip_orig = true;
 
-				ret = validate_branch(file, alt->insn, state);
+				ret = validate_branch(file, func, alt->insn, state);
 				if (ret) {
 					if (backtrace)
 						BT_FUNC("(alt)", insn);
@@ -2055,7 +2071,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 
 			if (state.bp_scratch) {
 				WARN("%s uses BP as a scratch register",
-				     insn->func->name);
+				     func->name);
 				return 1;
 			}
 
@@ -2067,36 +2083,28 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 			if (ret)
 				return ret;
 
-			if (insn->type == INSN_CALL) {
-				if (is_fentry_call(insn))
-					break;
-
-				ret = dead_end_function(file, insn->call_dest);
-				if (ret == 1)
-					return 0;
-				if (ret == -1)
-					return 1;
-			}
-
-			if (!no_fp && func && !has_valid_stack_frame(&state)) {
+			if (!no_fp && func && !is_fentry_call(insn) &&
+			    !has_valid_stack_frame(&state)) {
 				WARN_FUNC("call without frame pointer save/setup",
 					  sec, insn->offset);
 				return 1;
 			}
+
+			if (dead_end_function(file, insn->call_dest))
+				return 0;
+
 			break;
 
 		case INSN_JUMP_CONDITIONAL:
 		case INSN_JUMP_UNCONDITIONAL:
-			if (func && !insn->jump_dest) {
+			if (func && is_sibling_call(insn)) {
 				ret = validate_sibling_call(insn, &state);
 				if (ret)
 					return ret;
 
-			} else if (insn->jump_dest &&
-				   (!func || !insn->jump_dest->func ||
-				    insn->jump_dest->func->pfunc == func)) {
-				ret = validate_branch(file, insn->jump_dest,
-						      state);
+			} else if (insn->jump_dest) {
+				ret = validate_branch(file, func,
+						      insn->jump_dest, state);
 				if (ret) {
 					if (backtrace)
 						BT_FUNC("(branch)", insn);
@@ -2110,13 +2118,17 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 			break;
 
 		case INSN_JUMP_DYNAMIC:
-			if (func && list_empty(&insn->alts)) {
+		case INSN_JUMP_DYNAMIC_CONDITIONAL:
+			if (func && is_sibling_call(insn)) {
 				ret = validate_sibling_call(insn, &state);
 				if (ret)
 					return ret;
 			}
 
-			return 0;
+			if (insn->type == INSN_JUMP_DYNAMIC)
+				return 0;
+
+			break;
 
 		case INSN_CONTEXT_SWITCH:
 			if (func && (!next_insn || !next_insn->hint)) {
@@ -2162,7 +2174,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 			break;
 
 		case INSN_CLAC:
-			if (!state.uaccess && insn->func) {
+			if (!state.uaccess && func) {
 				WARN_FUNC("redundant UACCESS disable", sec, insn->offset);
 				return 1;
 			}
@@ -2183,7 +2195,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 			break;
 
 		case INSN_CLD:
-			if (!state.df && insn->func)
+			if (!state.df && func)
 				WARN_FUNC("redundant CLD", sec, insn->offset);
 
 			state.df = false;
@@ -2222,7 +2234,7 @@ static int validate_unwind_hints(struct objtool_file *file)
 
 	for_each_insn(file, insn) {
 		if (insn->hint && !insn->visited) {
-			ret = validate_branch(file, insn, state);
+			ret = validate_branch(file, insn->func, insn, state);
 			if (ret && backtrace)
 				BT_FUNC("<=== (hint)", insn);
 			warnings += ret;
@@ -2345,16 +2357,25 @@ static int validate_functions(struct objtool_file *file)
 
 	for_each_sec(file, sec) {
 		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC || func->pfunc != func)
+			if (func->type != STT_FUNC)
+				continue;
+
+			if (!func->len) {
+				WARN("%s() is missing an ELF size annotation",
+				     func->name);
+				warnings++;
+			}
+
+			if (func->pfunc != func || func->alias != func)
 				continue;
 
 			insn = find_insn(file, sec, func->offset);
-			if (!insn || insn->ignore)
+			if (!insn || insn->ignore || insn->visited)
 				continue;
 
-			state.uaccess = func->alias->uaccess_safe;
+			state.uaccess = func->uaccess_safe;
 
-			ret = validate_branch(file, insn, state);
+			ret = validate_branch(file, func, insn, state);
 			if (ret && backtrace)
 				BT_FUNC("<=== (func)", insn);
 			warnings += ret;
@@ -2407,7 +2428,7 @@ int check(const char *_objname, bool orc)
 
 	objname = _objname;
 
-	file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY);
+	file.elf = elf_read(objname, orc ? O_RDWR : O_RDONLY);
 	if (!file.elf)
 		return 1;
 
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index cb60b9acf5cf..b881fafcf55d 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -31,13 +31,14 @@ struct instruction {
 	struct section *sec;
 	unsigned long offset;
 	unsigned int len;
-	unsigned char type;
+	enum insn_type type;
 	unsigned long immediate;
 	bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
 	bool retpoline_safe;
 	struct symbol *call_dest;
 	struct instruction *jump_dest;
 	struct instruction *first_jump_src;
+	struct rela *jump_table;
 	struct list_head alts;
 	struct symbol *func;
 	struct stack_op stack_op;
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index e99e1be19ad9..edba4745f25a 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -278,7 +278,7 @@ static int read_symbols(struct elf *elf)
 			}
 
 			if (sym->offset == s->offset) {
-				if (sym->len == s->len && alias == sym)
+				if (sym->len && sym->len == s->len && alias == sym)
 					alias = s;
 
 				if (sym->len >= s->len) {
@@ -385,7 +385,7 @@ static int read_relas(struct elf *elf)
 			rela->offset = rela->rela.r_offset;
 			symndx = GELF_R_SYM(rela->rela.r_info);
 			rela->sym = find_symbol_by_index(elf, symndx);
-			rela->rela_sec = sec;
+			rela->sec = sec;
 			if (!rela->sym) {
 				WARN("can't find rela entry symbol %d for %s",
 				     symndx, sec->name);
@@ -401,7 +401,7 @@ static int read_relas(struct elf *elf)
 	return 0;
 }
 
-struct elf *elf_open(const char *name, int flags)
+struct elf *elf_read(const char *name, int flags)
 {
 	struct elf *elf;
 	Elf_Cmd cmd;
@@ -463,7 +463,7 @@ struct section *elf_create_section(struct elf *elf, const char *name,
 {
 	struct section *sec, *shstrtab;
 	size_t size = entsize * nr;
-	struct Elf_Scn *s;
+	Elf_Scn *s;
 	Elf_Data *data;
 
 	sec = malloc(sizeof(*sec));
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index e44ca5d51871..44150204db4d 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -57,11 +57,12 @@ struct rela {
 	struct list_head list;
 	struct hlist_node hash;
 	GElf_Rela rela;
-	struct section *rela_sec;
+	struct section *sec;
 	struct symbol *sym;
 	unsigned int type;
 	unsigned long offset;
 	int addend;
+	bool jump_table_start;
 };
 
 struct elf {
@@ -74,7 +75,7 @@ struct elf {
 };
 
 
-struct elf *elf_open(const char *name, int flags);
+struct elf *elf_read(const char *name, int flags);
 struct section *find_section_by_name(struct elf *elf, const char *name);
 struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
 struct symbol *find_symbol_by_name(struct elf *elf, const char *name);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 79367087bd18..8f24865596af 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2289,6 +2289,12 @@ static int process_switch_event(struct perf_tool *tool,
 	if (perf_event__process_switch(tool, event, sample, machine) < 0)
 		return -1;
 
+	if (scripting_ops && scripting_ops->process_switch)
+		scripting_ops->process_switch(event, sample, machine);
+
+	if (!script->show_switch_events)
+		return 0;
+
 	thread = machine__findnew_thread(machine, sample->pid,
 					 sample->tid);
 	if (thread == NULL) {
@@ -2467,7 +2473,7 @@ static int __cmd_script(struct perf_script *script)
 		script->tool.mmap = process_mmap_event;
 		script->tool.mmap2 = process_mmap2_event;
 	}
-	if (script->show_switch_events)
+	if (script->show_switch_events || (scripting_ops && scripting_ops->process_switch))
 		script->tool.context_switch = process_switch_event;
 	if (script->show_namespace_events)
 		script->tool.namespaces = process_namespaces_event;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 1aa2ed096f65..4f0bbffee05f 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -19,6 +19,7 @@
 #include <api/fs/tracing_path.h>
 #include <bpf/bpf.h>
 #include "util/bpf_map.h"
+#include "util/rlimit.h"
 #include "builtin.h"
 #include "util/cgroup.h"
 #include "util/color.h"
@@ -3864,6 +3865,15 @@ int cmd_trace(int argc, const char **argv)
 		goto out;
 	}
 
+	/*
+	 * Parsing .perfconfig may entail creating a BPF event, that may need
+	 * to create BPF maps, so bump RLIM_MEMLOCK as the default 64K setting
+	 * is too small. This affects just this process, not touching the
+	 * global setting. If it fails we'll get something in 'perf trace -v'
+	 * to help diagnose the problem.
+	 */
+	rlimit__bump_memlock();
+
 	err = perf_config(trace__config, &trace);
 	if (err)
 		goto out;
diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c
index f470144d1a70..bf114ca9ca87 100644
--- a/tools/perf/builtin-version.c
+++ b/tools/perf/builtin-version.c
@@ -19,6 +19,7 @@ static struct version version;
 static struct option version_options[] = {
 	OPT_BOOLEAN(0, "build-options", &version.build_options,
 		    "display the build options"),
+	OPT_END(),
 };
 
 static const char * const version_usage[] = {
diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json b/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json
new file mode 100644
index 000000000000..17fb5241928b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json
@@ -0,0 +1,58 @@
+[
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "0",
+		"EventName": "CPU_CYCLES",
+		"BriefDescription": "CPU Cycles",
+		"PublicDescription": "Cycle Count"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "1",
+		"EventName": "INSTRUCTIONS",
+		"BriefDescription": "Instructions",
+		"PublicDescription": "Instruction Count"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "2",
+		"EventName": "L1I_DIR_WRITES",
+		"BriefDescription": "L1I Directory Writes",
+		"PublicDescription": "Level-1 I-Cache Directory Write Count"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "3",
+		"EventName": "L1I_PENALTY_CYCLES",
+		"BriefDescription": "L1I Penalty Cycles",
+		"PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "4",
+		"EventName": "L1D_DIR_WRITES",
+		"BriefDescription": "L1D Directory Writes",
+		"PublicDescription": "Level-1 D-Cache Directory Write Count"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "5",
+		"EventName": "L1D_PENALTY_CYCLES",
+		"BriefDescription": "L1D Penalty Cycles",
+		"PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "32",
+		"EventName": "PROBLEM_STATE_CPU_CYCLES",
+		"BriefDescription": "Problem-State CPU Cycles",
+		"PublicDescription": "Problem-State Cycle Count"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "33",
+		"EventName": "PROBLEM_STATE_INSTRUCTIONS",
+		"BriefDescription": "Problem-State Instructions",
+		"PublicDescription": "Problem-State Instruction Count"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json
new file mode 100644
index 000000000000..db286f19e7b6
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json
@@ -0,0 +1,114 @@
+[
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "64",
+		"EventName": "PRNG_FUNCTIONS",
+		"BriefDescription": "PRNG Functions",
+		"PublicDescription": "Total number of the PRNG functions issued by the CPU"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "65",
+		"EventName": "PRNG_CYCLES",
+		"BriefDescription": "PRNG Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "66",
+		"EventName": "PRNG_BLOCKED_FUNCTIONS",
+		"BriefDescription": "PRNG Blocked Functions",
+		"PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "67",
+		"EventName": "PRNG_BLOCKED_CYCLES",
+		"BriefDescription": "PRNG Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "68",
+		"EventName": "SHA_FUNCTIONS",
+		"BriefDescription": "SHA Functions",
+		"PublicDescription": "Total number of SHA functions issued by the CPU"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "69",
+		"EventName": "SHA_CYCLES",
+		"BriefDescription": "SHA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "70",
+		"EventName": "SHA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "SHA Blocked Functions",
+		"PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "71",
+		"EventName": "SHA_BLOCKED_CYCLES",
+		"BriefDescription": "SHA Bloced Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "72",
+		"EventName": "DEA_FUNCTIONS",
+		"BriefDescription": "DEA Functions",
+		"PublicDescription": "Total number of the DEA functions issued by the CPU"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "73",
+		"EventName": "DEA_CYCLES",
+		"BriefDescription": "DEA Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "74",
+		"EventName": "DEA_BLOCKED_FUNCTIONS",
+		"BriefDescription": "DEA Blocked Functions",
+		"PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "75",
+		"EventName": "DEA_BLOCKED_CYCLES",
+		"BriefDescription": "DEA Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "76",
+		"EventName": "AES_FUNCTIONS",
+		"BriefDescription": "AES Functions",
+		"PublicDescription": "Total number of AES functions issued by the CPU"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "77",
+		"EventName": "AES_CYCLES",
+		"BriefDescription": "AES Cycles",
+		"PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "78",
+		"EventName": "AES_BLOCKED_FUNCTIONS",
+		"BriefDescription": "AES Blocked Functions",
+		"PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "79",
+		"EventName": "AES_BLOCKED_CYCLES",
+		"BriefDescription": "AES Blocked Cycles",
+		"PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json
new file mode 100644
index 000000000000..5e36bc2468d0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json
@@ -0,0 +1,30 @@
+[
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "80",
+		"EventName": "ECC_FUNCTION_COUNT",
+		"BriefDescription": "ECC Function Count",
+		"PublicDescription": "Long ECC function Count"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "81",
+		"EventName": "ECC_CYCLES_COUNT",
+		"BriefDescription": "ECC Cycles Count",
+		"PublicDescription": "Long ECC Function cycles count"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "82",
+		"EventName": "ECC_BLOCKED_FUNCTION_COUNT",
+		"BriefDescription": "Ecc Blocked Function Count",
+		"PublicDescription": "Long ECC blocked function count"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "83",
+		"EventName": "ECC_BLOCKED_CYCLES_COUNT",
+		"BriefDescription": "ECC Blocked Cycles Count",
+		"PublicDescription": "Long ECC blocked cycles count"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json b/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json
new file mode 100644
index 000000000000..89e070727e1b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json
@@ -0,0 +1,373 @@
+[
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "128",
+		"EventName": "L1D_RO_EXCL_WRITES",
+		"BriefDescription": "L1D Read-only Exclusive Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "129",
+		"EventName": "DTLB2_WRITES",
+		"BriefDescription": "DTLB2 Writes",
+		"PublicDescription": "A translation has been written into The Translation Lookaside Buffer 2 (TLB2) and the request was made by the data cache"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "130",
+		"EventName": "DTLB2_MISSES",
+		"BriefDescription": "DTLB2 Misses",
+		"PublicDescription": "A TLB2 miss is in progress for a request made by the data cache. Incremented by one for every TLB2 miss in progress for the Level-1 Data cache on this cycle"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "131",
+		"EventName": "DTLB2_HPAGE_WRITES",
+		"BriefDescription": "DTLB2 One-Megabyte Page Writes",
+		"PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "132",
+		"EventName": "DTLB2_GPAGE_WRITES",
+		"BriefDescription": "DTLB2 Two-Gigabyte Page Writes",
+		"PublicDescription": "A translation entry for a two-gigabyte page was written into the Level-2 TLB"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "133",
+		"EventName": "L1D_L2D_SOURCED_WRITES",
+		"BriefDescription": "L1D L2D Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "134",
+		"EventName": "ITLB2_WRITES",
+		"BriefDescription": "ITLB2 Writes",
+		"PublicDescription": "A translation entry has been written into the Translation Lookaside Buffer 2 (TLB2) and the request was made by the instruction cache"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "135",
+		"EventName": "ITLB2_MISSES",
+		"BriefDescription": "ITLB2 Misses",
+		"PublicDescription": "A TLB2 miss is in progress for a request made by the instruction cache. Incremented by one for every TLB2 miss in progress for the Level-1 Instruction cache in a cycle"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "136",
+		"EventName": "L1I_L2I_SOURCED_WRITES",
+		"BriefDescription": "L1I L2I Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "137",
+		"EventName": "TLB2_PTE_WRITES",
+		"BriefDescription": "TLB2 PTE Writes",
+		"PublicDescription": "A translation entry was written into the Page Table Entry array in the Level-2 TLB"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "138",
+		"EventName": "TLB2_CRSTE_WRITES",
+		"BriefDescription": "TLB2 CRSTE Writes",
+		"PublicDescription": "Translation entries were written into the Combined Region and Segment Table Entry array and the Page Table Entry array in the Level-2 TLB"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "139",
+		"EventName": "TLB2_ENGINES_BUSY",
+		"BriefDescription": "TLB2 Engines Busy",
+		"PublicDescription": "The number of Level-2 TLB translation engines busy in a cycle"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "140",
+		"EventName": "TX_C_TEND",
+		"BriefDescription": "Completed TEND instructions in constrained TX mode",
+		"PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "141",
+		"EventName": "TX_NC_TEND",
+		"BriefDescription": "Completed TEND instructions in non-constrained TX mode",
+		"PublicDescription": "A TEND instruction has completed in a non-constrained transactional-execution mode"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "143",
+		"EventName": "L1C_TLB2_MISSES",
+		"BriefDescription": "L1C TLB2 Misses",
+		"PublicDescription": "Increments by one for any cycle where a level-1 cache or level-2 TLB miss is in progress"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "144",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "145",
+		"EventName": "L1D_ONCHIP_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Chip Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip memory"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "146",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache with intervention"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "147",
+		"EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Cluster L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Cluster Level-3 cache withountervention"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "148",
+		"EventName": "L1D_ONCLUSTER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Cluster Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster memory"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "149",
+		"EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D On-Cluster L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache with intervention"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "150",
+		"EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Cluster L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "151",
+		"EventName": "L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Cluster Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Cluster memory"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "152",
+		"EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D Off-Cluster L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "153",
+		"EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "154",
+		"EventName": "L1D_OFFDRAWER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer memory"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "155",
+		"EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1D Off-Drawer L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "156",
+		"EventName": "L1D_ONDRAWER_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D On-Drawer L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer Level-4 cache"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "157",
+		"EventName": "L1D_OFFDRAWER_L4_SOURCED_WRITES",
+		"BriefDescription": "L1D Off-Drawer L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "158",
+		"EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_RO",
+		"BriefDescription": "L1D On-Chip L3 Sourced Writes read-only",
+		"PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip L3 but a read-only invalidate was done to remove other copies of the cache line"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "162",
+		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Chip L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache without intervention"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "163",
+		"EventName": "L1I_ONCHIP_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Chip Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from On-Chip memory"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "164",
+		"EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache with intervention"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "165",
+		"EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Cluster L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache without intervention"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "166",
+		"EventName": "L1I_ONCLUSTER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Cluster Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster memory"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "167",
+		"EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I On-Cluster L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Cluster Level-3 cache with intervention"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "168",
+		"EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Cluster L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "169",
+		"EventName": "L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Cluster Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Cluster memory"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "170",
+		"EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I Off-Cluster L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "171",
+		"EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer L3 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "172",
+		"EventName": "L1I_OFFDRAWER_MEMORY_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer Memory Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer memory"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "173",
+		"EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES_IV",
+		"BriefDescription": "L1I Off-Drawer L3 Sourced Writes with Intervention",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "174",
+		"EventName": "L1I_ONDRAWER_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I On-Drawer L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer Level-4 cache"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "175",
+		"EventName": "L1I_OFFDRAWER_L4_SOURCED_WRITES",
+		"BriefDescription": "L1I Off-Drawer L4 Sourced Writes",
+		"PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "224",
+		"EventName": "BCD_DFP_EXECUTION_SLOTS",
+		"BriefDescription": "BCD DFP Execution Slots",
+		"PublicDescription": "Count of floating point execution slots used for finished Binary Coded Decimal to Decimal Floating Point conversions. Instructions: CDZT, CXZT, CZDT, CZXT"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "225",
+		"EventName": "VX_BCD_EXECUTION_SLOTS",
+		"BriefDescription": "VX BCD Execution Slots",
+		"PublicDescription": "Count of floating point execution slots used for finished vector arithmetic Binary Coded Decimal instructions. Instructions: VAP, VSP, VMPVMSP, VDP, VSDP, VRP, VLIP, VSRP, VPSOPVCP, VTP, VPKZ, VUPKZ, VCVB, VCVBG, VCVDVCVDG"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "226",
+		"EventName": "DECIMAL_INSTRUCTIONS",
+		"BriefDescription": "Decimal Instructions",
+		"PublicDescription": "Decimal instructions dispatched. Instructions: CVB, CVD, AP, CP, DP, ED, EDMK, MP, SRP, SP, ZAP"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "232",
+		"EventName": "LAST_HOST_TRANSLATIONS",
+		"BriefDescription": "Last host translation done",
+		"PublicDescription": "Last Host Translation done"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "243",
+		"EventName": "TX_NC_TABORT",
+		"BriefDescription": "Aborted transactions in non-constrained TX mode",
+		"PublicDescription": "A transaction abort has occurred in a non-constrained transactional-execution mode"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "244",
+		"EventName": "TX_C_TABORT_NO_SPECIAL",
+		"BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic",
+		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "245",
+		"EventName": "TX_C_TABORT_SPECIAL",
+		"BriefDescription": "Aborted transactions in constrained TX mode using special completion logic",
+		"PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "448",
+		"EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE",
+		"BriefDescription": "Cycle count with one thread active",
+		"PublicDescription": "Cycle count with one thread active"
+	},
+	{
+		"Unit": "CPU-M-CF",
+		"EventCode": "449",
+		"EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE",
+		"BriefDescription": "Cycle count with two threads active",
+		"PublicDescription": "Cycle count with two threads active"
+	},
+]
diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv
index 78bcf7f8e206..bd3fc577139c 100644
--- a/tools/perf/pmu-events/arch/s390/mapfile.csv
+++ b/tools/perf/pmu-events/arch/s390/mapfile.csv
@@ -4,3 +4,4 @@ Family-model,Version,Filename,EventType
 ^IBM.282[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_zec12,core
 ^IBM.296[45].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z13,core
 ^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core
+^IBM.856[12].*3\.6.[[:xdigit:]]+$,3,cf_m8561,core
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index 92713d93e956..7bd73a904b4e 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -353,7 +353,10 @@ do_query(query, 'CREATE TABLE threads ('
 		'tid		integer)')
 do_query(query, 'CREATE TABLE comms ('
 		'id		bigint		NOT NULL,'
-		'comm		varchar(16))')
+		'comm		varchar(16),'
+		'c_thread_id	bigint,'
+		'c_time		bigint,'
+		'exec_flag	boolean)')
 do_query(query, 'CREATE TABLE comm_threads ('
 		'id		bigint		NOT NULL,'
 		'comm_id	bigint,'
@@ -479,6 +482,17 @@ do_query(query, 'CREATE TABLE pwrx ('
 	'last_cstate	integer,'
 	'wake_reason	integer)')
 
+do_query(query, 'CREATE TABLE context_switches ('
+		'id		bigint		NOT NULL,'
+		'machine_id	bigint,'
+		'time		bigint,'
+		'cpu		integer,'
+		'thread_out_id	bigint,'
+		'comm_out_id	bigint,'
+		'thread_in_id	bigint,'
+		'comm_in_id	bigint,'
+		'flags		integer)')
+
 do_query(query, 'CREATE VIEW machines_view AS '
 	'SELECT '
 		'id,'
@@ -692,6 +706,29 @@ do_query(query, 'CREATE VIEW power_events_view AS '
 	' INNER JOIN selected_events ON selected_events.id = samples.evsel_id'
 	' ORDER BY samples.id')
 
+do_query(query, 'CREATE VIEW context_switches_view AS '
+	'SELECT '
+		'context_switches.id,'
+		'context_switches.machine_id,'
+		'context_switches.time,'
+		'context_switches.cpu,'
+		'th_out.pid AS pid_out,'
+		'th_out.tid AS tid_out,'
+		'comm_out.comm AS comm_out,'
+		'th_in.pid AS pid_in,'
+		'th_in.tid AS tid_in,'
+		'comm_in.comm AS comm_in,'
+		'CASE	  WHEN context_switches.flags = 0 THEN \'in\''
+			' WHEN context_switches.flags = 1 THEN \'out\''
+			' WHEN context_switches.flags = 3 THEN \'out preempt\''
+			' ELSE CAST ( context_switches.flags AS VARCHAR(11) )'
+		'END AS flags'
+	' FROM context_switches'
+	' INNER JOIN threads AS th_out ON th_out.id   = context_switches.thread_out_id'
+	' INNER JOIN threads AS th_in  ON th_in.id    = context_switches.thread_in_id'
+	' INNER JOIN comms AS comm_out ON comm_out.id = context_switches.comm_out_id'
+	' INNER JOIN comms AS comm_in  ON comm_in.id  = context_switches.comm_in_id')
+
 file_header = struct.pack("!11sii", b"PGCOPY\n\377\r\n\0", 0, 0)
 file_trailer = b"\377\377"
 
@@ -756,6 +793,7 @@ mwait_file		= open_output_file("mwait_table.bin")
 pwre_file		= open_output_file("pwre_table.bin")
 exstop_file		= open_output_file("exstop_table.bin")
 pwrx_file		= open_output_file("pwrx_table.bin")
+context_switches_file	= open_output_file("context_switches_table.bin")
 
 def trace_begin():
 	printdate("Writing to intermediate files...")
@@ -763,7 +801,7 @@ def trace_begin():
 	evsel_table(0, "unknown")
 	machine_table(0, 0, "unknown")
 	thread_table(0, 0, 0, -1, -1)
-	comm_table(0, "unknown")
+	comm_table(0, "unknown", 0, 0, 0)
 	dso_table(0, 0, "unknown", "unknown", "")
 	symbol_table(0, 0, 0, 0, 0, "unknown")
 	sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
@@ -804,6 +842,7 @@ def trace_end():
 	copy_output_file(pwre_file,		"pwre")
 	copy_output_file(exstop_file,		"exstop")
 	copy_output_file(pwrx_file,		"pwrx")
+	copy_output_file(context_switches_file,	"context_switches")
 
 	printdate("Removing intermediate files...")
 	remove_output_file(evsel_file)
@@ -825,6 +864,7 @@ def trace_end():
 	remove_output_file(pwre_file)
 	remove_output_file(exstop_file)
 	remove_output_file(pwrx_file)
+	remove_output_file(context_switches_file)
 	os.rmdir(output_dir_name)
 	printdate("Adding primary keys")
 	do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)')
@@ -846,11 +886,14 @@ def trace_end():
 	do_query(query, 'ALTER TABLE pwre            ADD PRIMARY KEY (id)')
 	do_query(query, 'ALTER TABLE exstop          ADD PRIMARY KEY (id)')
 	do_query(query, 'ALTER TABLE pwrx            ADD PRIMARY KEY (id)')
+	do_query(query, 'ALTER TABLE context_switches ADD PRIMARY KEY (id)')
 
 	printdate("Adding foreign keys")
 	do_query(query, 'ALTER TABLE threads '
 					'ADD CONSTRAINT machinefk  FOREIGN KEY (machine_id)   REFERENCES machines   (id),'
 					'ADD CONSTRAINT processfk  FOREIGN KEY (process_id)   REFERENCES threads    (id)')
+	do_query(query, 'ALTER TABLE comms '
+					'ADD CONSTRAINT threadfk   FOREIGN KEY (c_thread_id)  REFERENCES threads    (id)')
 	do_query(query, 'ALTER TABLE comm_threads '
 					'ADD CONSTRAINT commfk     FOREIGN KEY (comm_id)      REFERENCES comms      (id),'
 					'ADD CONSTRAINT threadfk   FOREIGN KEY (thread_id)    REFERENCES threads    (id)')
@@ -881,6 +924,8 @@ def trace_end():
 					'ADD CONSTRAINT parent_call_pathfk FOREIGN KEY (parent_call_path_id) REFERENCES call_paths (id)')
 		do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)')
 		do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)')
+		do_query(query, 'ALTER TABLE comms ADD has_calls boolean')
+		do_query(query, 'UPDATE comms SET has_calls = TRUE WHERE comms.id IN (SELECT DISTINCT comm_id FROM calls)')
 	do_query(query, 'ALTER TABLE ptwrite '
 					'ADD CONSTRAINT idfk        FOREIGN KEY (id)           REFERENCES samples   (id)')
 	do_query(query, 'ALTER TABLE  cbr '
@@ -893,6 +938,12 @@ def trace_end():
 					'ADD CONSTRAINT idfk        FOREIGN KEY (id)           REFERENCES samples   (id)')
 	do_query(query, 'ALTER TABLE  pwrx '
 					'ADD CONSTRAINT idfk        FOREIGN KEY (id)           REFERENCES samples   (id)')
+	do_query(query, 'ALTER TABLE  context_switches '
+					'ADD CONSTRAINT machinefk   FOREIGN KEY (machine_id)    REFERENCES machines (id),'
+					'ADD CONSTRAINT toutfk      FOREIGN KEY (thread_out_id) REFERENCES threads  (id),'
+					'ADD CONSTRAINT tinfk       FOREIGN KEY (thread_in_id)  REFERENCES threads  (id),'
+					'ADD CONSTRAINT coutfk      FOREIGN KEY (comm_out_id)   REFERENCES comms    (id),'
+					'ADD CONSTRAINT cinfk       FOREIGN KEY (comm_in_id)    REFERENCES comms    (id)')
 
 	printdate("Dropping unused tables")
 	if is_table_empty("ptwrite"):
@@ -905,6 +956,8 @@ def trace_end():
 		drop("pwrx")
 		if is_table_empty("cbr"):
 			drop("cbr")
+	if is_table_empty("context_switches"):
+		drop("context_switches")
 
 	if (unhandled_count):
 		printdate("Warning: ", unhandled_count, " unhandled events")
@@ -935,11 +988,11 @@ def thread_table(thread_id, machine_id, process_id, pid, tid, *x):
 	value = struct.pack("!hiqiqiqiiii", 5, 8, thread_id, 8, machine_id, 8, process_id, 4, pid, 4, tid)
 	thread_file.write(value)
 
-def comm_table(comm_id, comm_str, *x):
+def comm_table(comm_id, comm_str, thread_id, time, exec_flag, *x):
 	comm_str = toserverstr(comm_str)
 	n = len(comm_str)
-	fmt = "!hiqi" + str(n) + "s"
-	value = struct.pack(fmt, 2, 8, comm_id, n, comm_str)
+	fmt = "!hiqi" + str(n) + "s" + "iqiqiB"
+	value = struct.pack(fmt, 5, 8, comm_id, n, comm_str, 8, thread_id, 8, time, 1, exec_flag)
 	comm_file.write(value)
 
 def comm_thread_table(comm_thread_id, comm_id, thread_id, *x):
@@ -1051,3 +1104,8 @@ def synth_data(id, config, raw_buf, *x):
 		pwrx(id, raw_buf)
 	elif config == 5:
 		cbr(id, raw_buf)
+
+def context_switch_table(id, machine_id, time, cpu, thread_out_id, comm_out_id, thread_in_id, comm_in_id, flags, *x):
+	fmt = "!hiqiqiqiiiqiqiqiqii"
+	value = struct.pack(fmt, 9, 8, id, 8, machine_id, 8, time, 4, cpu, 8, thread_out_id, 8, comm_out_id, 8, thread_in_id, 8, comm_in_id, 4, flags)
+	context_switches_file.write(value)
diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py
index 021326c46285..8043a7272a56 100644
--- a/tools/perf/scripts/python/export-to-sqlite.py
+++ b/tools/perf/scripts/python/export-to-sqlite.py
@@ -177,7 +177,10 @@ do_query(query, 'CREATE TABLE threads ('
 		'tid		integer)')
 do_query(query, 'CREATE TABLE comms ('
 		'id		integer		NOT NULL	PRIMARY KEY,'
-		'comm		varchar(16))')
+		'comm		varchar(16),'
+		'c_thread_id	bigint,'
+		'c_time		bigint,'
+		'exec_flag	boolean)')
 do_query(query, 'CREATE TABLE comm_threads ('
 		'id		integer		NOT NULL	PRIMARY KEY,'
 		'comm_id	bigint,'
@@ -303,6 +306,17 @@ do_query(query, 'CREATE TABLE pwrx ('
 		'last_cstate	integer,'
 		'wake_reason	integer)')
 
+do_query(query, 'CREATE TABLE context_switches ('
+		'id		integer		NOT NULL	PRIMARY KEY,'
+		'machine_id	bigint,'
+		'time		bigint,'
+		'cpu		integer,'
+		'thread_out_id	bigint,'
+		'comm_out_id	bigint,'
+		'thread_in_id	bigint,'
+		'comm_in_id	bigint,'
+		'flags		integer)')
+
 # printf was added to sqlite in version 3.8.3
 sqlite_has_printf = False
 try:
@@ -527,6 +541,29 @@ do_query(query, 'CREATE VIEW power_events_view AS '
 	' INNER JOIN selected_events ON selected_events.id = evsel_id'
 	' WHERE selected_events.name IN (\'cbr\',\'mwait\',\'exstop\',\'pwre\',\'pwrx\')')
 
+do_query(query, 'CREATE VIEW context_switches_view AS '
+	'SELECT '
+		'context_switches.id,'
+		'context_switches.machine_id,'
+		'context_switches.time,'
+		'context_switches.cpu,'
+		'th_out.pid AS pid_out,'
+		'th_out.tid AS tid_out,'
+		'comm_out.comm AS comm_out,'
+		'th_in.pid AS pid_in,'
+		'th_in.tid AS tid_in,'
+		'comm_in.comm AS comm_in,'
+		'CASE	  WHEN context_switches.flags = 0 THEN \'in\''
+			' WHEN context_switches.flags = 1 THEN \'out\''
+			' WHEN context_switches.flags = 3 THEN \'out preempt\''
+			' ELSE context_switches.flags '
+		'END AS flags'
+	' FROM context_switches'
+	' INNER JOIN threads AS th_out ON th_out.id   = context_switches.thread_out_id'
+	' INNER JOIN threads AS th_in  ON th_in.id    = context_switches.thread_in_id'
+	' INNER JOIN comms AS comm_out ON comm_out.id = context_switches.comm_out_id'
+	' INNER JOIN comms AS comm_in  ON comm_in.id  = context_switches.comm_in_id')
+
 do_query(query, 'END TRANSACTION')
 
 evsel_query = QSqlQuery(db)
@@ -536,7 +573,7 @@ machine_query.prepare("INSERT INTO machines VALUES (?, ?, ?)")
 thread_query = QSqlQuery(db)
 thread_query.prepare("INSERT INTO threads VALUES (?, ?, ?, ?, ?)")
 comm_query = QSqlQuery(db)
-comm_query.prepare("INSERT INTO comms VALUES (?, ?)")
+comm_query.prepare("INSERT INTO comms VALUES (?, ?, ?, ?, ?)")
 comm_thread_query = QSqlQuery(db)
 comm_thread_query.prepare("INSERT INTO comm_threads VALUES (?, ?, ?)")
 dso_query = QSqlQuery(db)
@@ -568,6 +605,8 @@ exstop_query = QSqlQuery(db)
 exstop_query.prepare("INSERT INTO exstop VALUES (?, ?)")
 pwrx_query = QSqlQuery(db)
 pwrx_query.prepare("INSERT INTO pwrx VALUES (?, ?, ?, ?)")
+context_switch_query = QSqlQuery(db)
+context_switch_query.prepare("INSERT INTO context_switches VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)")
 
 def trace_begin():
 	printdate("Writing records...")
@@ -576,7 +615,7 @@ def trace_begin():
 	evsel_table(0, "unknown")
 	machine_table(0, 0, "unknown")
 	thread_table(0, 0, 0, -1, -1)
-	comm_table(0, "unknown")
+	comm_table(0, "unknown", 0, 0, 0)
 	dso_table(0, 0, "unknown", "unknown", "")
 	symbol_table(0, 0, 0, 0, 0, "unknown")
 	sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
@@ -603,6 +642,8 @@ def trace_end():
 	if perf_db_export_calls:
 		do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)')
 		do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)')
+		do_query(query, 'ALTER TABLE comms ADD has_calls boolean')
+		do_query(query, 'UPDATE comms SET has_calls = 1 WHERE comms.id IN (SELECT DISTINCT comm_id FROM calls)')
 
 	printdate("Dropping unused tables")
 	if is_table_empty("ptwrite"):
@@ -615,6 +656,8 @@ def trace_end():
 		drop("pwrx")
 		if is_table_empty("cbr"):
 			drop("cbr")
+	if is_table_empty("context_switches"):
+		drop("context_switches")
 
 	if (unhandled_count):
 		printdate("Warning: ", unhandled_count, " unhandled events")
@@ -642,7 +685,7 @@ def thread_table(*x):
 	bind_exec(thread_query, 5, x)
 
 def comm_table(*x):
-	bind_exec(comm_query, 2, x)
+	bind_exec(comm_query, 5, x)
 
 def comm_thread_table(*x):
 	bind_exec(comm_thread_query, 3, x)
@@ -748,3 +791,6 @@ def synth_data(id, config, raw_buf, *x):
 		pwrx(id, raw_buf)
 	elif config == 5:
 		cbr(id, raw_buf)
+
+def context_switch_table(*x):
+	bind_exec(context_switch_query, 9, x)
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 6e7934f2ac9a..61b3911d91e6 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -392,7 +392,7 @@ class FindBar():
 		self.hbox.addWidget(self.close_button)
 
 		self.bar = QWidget()
-		self.bar.setLayout(self.hbox);
+		self.bar.setLayout(self.hbox)
 		self.bar.hide()
 
 	def Widget(self):
@@ -470,7 +470,7 @@ class CallGraphLevelItemBase(object):
 		self.params = params
 		self.row = row
 		self.parent_item = parent_item
-		self.query_done = False;
+		self.query_done = False
 		self.child_count = 0
 		self.child_items = []
 		if parent_item:
@@ -517,7 +517,7 @@ class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase):
 		self.time = time
 
 	def Select(self):
-		self.query_done = True;
+		self.query_done = True
 		query = QSqlQuery(self.glb.db)
 		if self.params.have_ipc:
 			ipc_str = ", SUM(insn_count), SUM(cyc_count)"
@@ -604,7 +604,7 @@ class CallGraphLevelOneItem(CallGraphLevelItemBase):
 		self.dbid = comm_id
 
 	def Select(self):
-		self.query_done = True;
+		self.query_done = True
 		query = QSqlQuery(self.glb.db)
 		QueryExec(query, "SELECT thread_id, pid, tid"
 					" FROM comm_threads"
@@ -622,9 +622,12 @@ class CallGraphRootItem(CallGraphLevelItemBase):
 	def __init__(self, glb, params):
 		super(CallGraphRootItem, self).__init__(glb, params, 0, None)
 		self.dbid = 0
-		self.query_done = True;
+		self.query_done = True
+		if_has_calls = ""
+		if IsSelectable(glb.db, "comms", columns = "has_calls"):
+			if_has_calls = " WHERE has_calls = TRUE"
 		query = QSqlQuery(glb.db)
-		QueryExec(query, "SELECT id, comm FROM comms")
+		QueryExec(query, "SELECT id, comm FROM comms" + if_has_calls)
 		while query.next():
 			if not query.value(0):
 				continue
@@ -793,7 +796,7 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase):
 		self.time = time
 
 	def Select(self):
-		self.query_done = True;
+		self.query_done = True
 		if self.calls_id == 0:
 			comm_thread = " AND comm_id = " + str(self.comm_id) + " AND thread_id = " + str(self.thread_id)
 		else:
@@ -881,7 +884,7 @@ class CallTreeLevelOneItem(CallGraphLevelItemBase):
 		self.dbid = comm_id
 
 	def Select(self):
-		self.query_done = True;
+		self.query_done = True
 		query = QSqlQuery(self.glb.db)
 		QueryExec(query, "SELECT thread_id, pid, tid"
 					" FROM comm_threads"
@@ -899,9 +902,12 @@ class CallTreeRootItem(CallGraphLevelItemBase):
 	def __init__(self, glb, params):
 		super(CallTreeRootItem, self).__init__(glb, params, 0, None)
 		self.dbid = 0
-		self.query_done = True;
+		self.query_done = True
+		if_has_calls = ""
+		if IsSelectable(glb.db, "comms", columns = "has_calls"):
+			if_has_calls = " WHERE has_calls = TRUE"
 		query = QSqlQuery(glb.db)
-		QueryExec(query, "SELECT id, comm FROM comms")
+		QueryExec(query, "SELECT id, comm FROM comms" + if_has_calls)
 		while query.next():
 			if not query.value(0):
 				continue
@@ -971,7 +977,7 @@ class VBox():
 
 	def __init__(self, w1, w2, w3=None):
 		self.vbox = QWidget()
-		self.vbox.setLayout(QVBoxLayout());
+		self.vbox.setLayout(QVBoxLayout())
 
 		self.vbox.layout().setContentsMargins(0, 0, 0, 0)
 
@@ -1391,7 +1397,7 @@ class FetchMoreRecordsBar():
 		self.hbox.addWidget(self.close_button)
 
 		self.bar = QWidget()
-		self.bar.setLayout(self.hbox);
+		self.bar.setLayout(self.hbox)
 		self.bar.show()
 
 		self.in_progress = False
@@ -2206,7 +2212,7 @@ class ReportDialogBase(QDialog):
 		self.vbox.addLayout(self.grid)
 		self.vbox.addLayout(self.hbox)
 
-		self.setLayout(self.vbox);
+		self.setLayout(self.vbox)
 
 	def Ok(self):
 		vars = self.report_vars
@@ -3139,7 +3145,7 @@ class AboutDialog(QDialog):
 		self.vbox = QVBoxLayout()
 		self.vbox.addWidget(self.text)
 
-		self.setLayout(self.vbox);
+		self.setLayout(self.vbox)
 
 # Font resize
 
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 66a82badc1d1..c3bec9d2c201 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -21,6 +21,7 @@
 #include <subcmd/parse-options.h>
 #include "string2.h"
 #include "symbol.h"
+#include "util/rlimit.h"
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <subcmd/exec-cmd.h>
@@ -727,6 +728,11 @@ int cmd_test(int argc, const char **argv)
 
 	if (skip != NULL)
 		skiplist = intlist__new(skip);
+	/*
+	 * Tests that create BPF maps, for instance, need more than the 64K
+	 * default:
+	 */
+	rlimit__bump_memlock();
 
 	return __cmd_test(argc, argv, skiplist);
 }
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index d7e3b008a613..14f812bb07a7 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -20,6 +20,7 @@ perf-y += parse-events.o
 perf-y += perf_regs.o
 perf-y += path.o
 perf-y += print_binary.o
+perf-y += rlimit.o
 perf-y += argv_split.o
 perf-y += rbtree.o
 perf-y += libstring.o
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 67b88b599a53..3d1c34fc4d68 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -2460,7 +2460,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 
 		/* Something went wrong, no need to continue */
 		if (!inode) {
-			err = PTR_ERR(inode);
+			err = -ENOMEM;
 			goto err_free_metadata;
 		}
 
@@ -2517,8 +2517,10 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 	session->auxtrace = &etm->auxtrace;
 
 	etm->unknown_thread = thread__new(999999999, 999999999);
-	if (!etm->unknown_thread)
+	if (!etm->unknown_thread) {
+		err = -ENOMEM;
 		goto err_free_queues;
+	}
 
 	/*
 	 * Initialize list node so that at thread__zput() we can avoid
@@ -2530,8 +2532,10 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 	if (err)
 		goto err_delete_thread;
 
-	if (thread__init_map_groups(etm->unknown_thread, etm->machine))
+	if (thread__init_map_groups(etm->unknown_thread, etm->machine)) {
+		err = -ENOMEM;
 		goto err_delete_thread;
+	}
 
 	if (dump_trace) {
 		cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
@@ -2575,5 +2579,5 @@ err_free_traceid_list:
 err_free_hdr:
 	zfree(&hdr);
 
-	return -EINVAL;
+	return err;
 }
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index 2394c7506abe..ffbb3e7d3288 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -20,70 +20,14 @@
 #include "db-export.h"
 #include <linux/zalloc.h>
 
-struct deferred_export {
-	struct list_head node;
-	struct comm *comm;
-};
-
-static int db_export__deferred(struct db_export *dbe)
-{
-	struct deferred_export *de;
-	int err;
-
-	while (!list_empty(&dbe->deferred)) {
-		de = list_entry(dbe->deferred.next, struct deferred_export,
-				node);
-		err = dbe->export_comm(dbe, de->comm);
-		list_del_init(&de->node);
-		free(de);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static void db_export__free_deferred(struct db_export *dbe)
-{
-	struct deferred_export *de;
-
-	while (!list_empty(&dbe->deferred)) {
-		de = list_entry(dbe->deferred.next, struct deferred_export,
-				node);
-		list_del_init(&de->node);
-		free(de);
-	}
-}
-
-static int db_export__defer_comm(struct db_export *dbe, struct comm *comm)
-{
-	struct deferred_export *de;
-
-	de = zalloc(sizeof(struct deferred_export));
-	if (!de)
-		return -ENOMEM;
-
-	de->comm = comm;
-	list_add_tail(&de->node, &dbe->deferred);
-
-	return 0;
-}
-
 int db_export__init(struct db_export *dbe)
 {
 	memset(dbe, 0, sizeof(struct db_export));
-	INIT_LIST_HEAD(&dbe->deferred);
 	return 0;
 }
 
-int db_export__flush(struct db_export *dbe)
-{
-	return db_export__deferred(dbe);
-}
-
 void db_export__exit(struct db_export *dbe)
 {
-	db_export__free_deferred(dbe);
 	call_return_processor__free(dbe->crp);
 	dbe->crp = NULL;
 }
@@ -115,71 +59,73 @@ int db_export__machine(struct db_export *dbe, struct machine *machine)
 }
 
 int db_export__thread(struct db_export *dbe, struct thread *thread,
-		      struct machine *machine, struct comm *comm)
+		      struct machine *machine, struct thread *main_thread)
 {
-	struct thread *main_thread;
 	u64 main_thread_db_id = 0;
-	int err;
 
 	if (thread->db_id)
 		return 0;
 
 	thread->db_id = ++dbe->thread_last_db_id;
 
-	if (thread->pid_ != -1) {
-		if (thread->pid_ == thread->tid) {
-			main_thread = thread;
-		} else {
-			main_thread = machine__findnew_thread(machine,
-							      thread->pid_,
-							      thread->pid_);
-			if (!main_thread)
-				return -ENOMEM;
-			err = db_export__thread(dbe, main_thread, machine,
-						comm);
-			if (err)
-				goto out_put;
-			if (comm) {
-				err = db_export__comm_thread(dbe, comm, thread);
-				if (err)
-					goto out_put;
-			}
-		}
+	if (main_thread)
 		main_thread_db_id = main_thread->db_id;
-		if (main_thread != thread)
-			thread__put(main_thread);
-	}
 
 	if (dbe->export_thread)
 		return dbe->export_thread(dbe, thread, main_thread_db_id,
 					  machine);
 
 	return 0;
+}
 
-out_put:
-	thread__put(main_thread);
-	return err;
+static int __db_export__comm(struct db_export *dbe, struct comm *comm,
+			     struct thread *thread)
+{
+	comm->db_id = ++dbe->comm_last_db_id;
+
+	if (dbe->export_comm)
+		return dbe->export_comm(dbe, comm, thread);
+
+	return 0;
 }
 
 int db_export__comm(struct db_export *dbe, struct comm *comm,
-		    struct thread *main_thread)
+		    struct thread *thread)
+{
+	if (comm->db_id)
+		return 0;
+
+	return __db_export__comm(dbe, comm, thread);
+}
+
+/*
+ * Export the "exec" comm. The "exec" comm is the program / application command
+ * name at the time it first executes. It is used to group threads for the same
+ * program. Note that the main thread pid (or thread group id tgid) cannot be
+ * used because it does not change when a new program is exec'ed.
+ */
+int db_export__exec_comm(struct db_export *dbe, struct comm *comm,
+			 struct thread *main_thread)
 {
 	int err;
 
 	if (comm->db_id)
 		return 0;
 
-	comm->db_id = ++dbe->comm_last_db_id;
-
-	if (dbe->export_comm) {
-		if (main_thread->comm_set)
-			err = dbe->export_comm(dbe, comm);
-		else
-			err = db_export__defer_comm(dbe, comm);
-		if (err)
-			return err;
-	}
+	err = __db_export__comm(dbe, comm, main_thread);
+	if (err)
+		return err;
 
+	/*
+	 * Record the main thread for this comm. Note that the main thread can
+	 * have many "exec" comms because there will be a new one every time it
+	 * exec's. An "exec" comm however will only ever have 1 main thread.
+	 * That is different to any other threads for that same program because
+	 * exec() will effectively kill them, so the relationship between the
+	 * "exec" comm and non-main threads is 1-to-1. That is why
+	 * db_export__comm_thread() is called here for the main thread, but it
+	 * is called for non-main threads when they are exported.
+	 */
 	return db_export__comm_thread(dbe, comm, main_thread);
 }
 
@@ -340,11 +286,65 @@ int db_export__branch_type(struct db_export *dbe, u32 branch_type,
 	return 0;
 }
 
+static int db_export__threads(struct db_export *dbe, struct thread *thread,
+			      struct thread *main_thread,
+			      struct machine *machine, struct comm **comm_ptr)
+{
+	struct comm *comm = NULL;
+	struct comm *curr_comm;
+	int err;
+
+	if (main_thread) {
+		/*
+		 * A thread has a reference to the main thread, so export the
+		 * main thread first.
+		 */
+		err = db_export__thread(dbe, main_thread, machine, main_thread);
+		if (err)
+			return err;
+		/*
+		 * Export comm before exporting the non-main thread because
+		 * db_export__comm_thread() can be called further below.
+		 */
+		comm = machine__thread_exec_comm(machine, main_thread);
+		if (comm) {
+			err = db_export__exec_comm(dbe, comm, main_thread);
+			if (err)
+				return err;
+			*comm_ptr = comm;
+		}
+	}
+
+	if (thread != main_thread) {
+		/*
+		 * For a non-main thread, db_export__comm_thread() must be
+		 * called only if thread has not previously been exported.
+		 */
+		bool export_comm_thread = comm && !thread->db_id;
+
+		err = db_export__thread(dbe, thread, machine, main_thread);
+		if (err)
+			return err;
+
+		if (export_comm_thread) {
+			err = db_export__comm_thread(dbe, comm, thread);
+			if (err)
+				return err;
+		}
+	}
+
+	curr_comm = thread__comm(thread);
+	if (curr_comm)
+		return db_export__comm(dbe, curr_comm, thread);
+
+	return 0;
+}
+
 int db_export__sample(struct db_export *dbe, union perf_event *event,
 		      struct perf_sample *sample, struct perf_evsel *evsel,
 		      struct addr_location *al)
 {
-	struct thread* thread = al->thread;
+	struct thread *thread = al->thread;
 	struct export_sample es = {
 		.event = event,
 		.sample = sample,
@@ -364,19 +364,13 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
 		return err;
 
 	main_thread = thread__main_thread(al->machine, thread);
-	if (main_thread)
-		comm = machine__thread_exec_comm(al->machine, main_thread);
 
-	err = db_export__thread(dbe, thread, al->machine, comm);
+	err = db_export__threads(dbe, thread, main_thread, al->machine, &comm);
 	if (err)
 		goto out_put;
 
-	if (comm) {
-		err = db_export__comm(dbe, comm, main_thread);
-		if (err)
-			goto out_put;
+	if (comm)
 		es.comm_db_id = comm->db_id;
-	}
 
 	es.db_id = ++dbe->sample_last_db_id;
 
@@ -525,3 +519,92 @@ int db_export__call_return(struct db_export *dbe, struct call_return *cr,
 
 	return 0;
 }
+
+static int db_export__pid_tid(struct db_export *dbe, struct machine *machine,
+			      pid_t pid, pid_t tid, u64 *db_id,
+			      struct comm **comm_ptr, bool *is_idle)
+{
+	struct thread *thread = machine__find_thread(machine, pid, tid);
+	struct thread *main_thread;
+	int err = 0;
+
+	if (!thread || !thread->comm_set)
+		goto out_put;
+
+	*is_idle = !thread->pid_ && !thread->tid;
+
+	main_thread = thread__main_thread(machine, thread);
+
+	err = db_export__threads(dbe, thread, main_thread, machine, comm_ptr);
+
+	*db_id = thread->db_id;
+
+	thread__put(main_thread);
+out_put:
+	thread__put(thread);
+
+	return err;
+}
+
+int db_export__switch(struct db_export *dbe, union perf_event *event,
+		      struct perf_sample *sample, struct machine *machine)
+{
+	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+	bool out_preempt = out &&
+		(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT);
+	int flags = out | (out_preempt << 1);
+	bool is_idle_a = false, is_idle_b = false;
+	u64 th_a_id = 0, th_b_id = 0;
+	u64 comm_out_id, comm_in_id;
+	struct comm *comm_a = NULL;
+	struct comm *comm_b = NULL;
+	u64 th_out_id, th_in_id;
+	u64 db_id;
+	int err;
+
+	err = db_export__machine(dbe, machine);
+	if (err)
+		return err;
+
+	err = db_export__pid_tid(dbe, machine, sample->pid, sample->tid,
+				 &th_a_id, &comm_a, &is_idle_a);
+	if (err)
+		return err;
+
+	if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
+		pid_t pid = event->context_switch.next_prev_pid;
+		pid_t tid = event->context_switch.next_prev_tid;
+
+		err = db_export__pid_tid(dbe, machine, pid, tid, &th_b_id,
+					 &comm_b, &is_idle_b);
+		if (err)
+			return err;
+	}
+
+	/*
+	 * Do not export if both threads are unknown (i.e. not being traced),
+	 * or one is unknown and the other is the idle task.
+	 */
+	if ((!th_a_id || is_idle_a) && (!th_b_id || is_idle_b))
+		return 0;
+
+	db_id = ++dbe->context_switch_last_db_id;
+
+	if (out) {
+		th_out_id   = th_a_id;
+		th_in_id    = th_b_id;
+		comm_out_id = comm_a ? comm_a->db_id : 0;
+		comm_in_id  = comm_b ? comm_b->db_id : 0;
+	} else {
+		th_out_id   = th_b_id;
+		th_in_id    = th_a_id;
+		comm_out_id = comm_b ? comm_b->db_id : 0;
+		comm_in_id  = comm_a ? comm_a->db_id : 0;
+	}
+
+	if (dbe->export_context_switch)
+		return dbe->export_context_switch(dbe, db_id, machine, sample,
+						  th_out_id, comm_out_id,
+						  th_in_id, comm_in_id, flags);
+	return 0;
+}
diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h
index e8a64028a386..ba1f62a5fe10 100644
--- a/tools/perf/util/db-export.h
+++ b/tools/perf/util/db-export.h
@@ -43,7 +43,8 @@ struct db_export {
 	int (*export_machine)(struct db_export *dbe, struct machine *machine);
 	int (*export_thread)(struct db_export *dbe, struct thread *thread,
 			     u64 main_thread_db_id, struct machine *machine);
-	int (*export_comm)(struct db_export *dbe, struct comm *comm);
+	int (*export_comm)(struct db_export *dbe, struct comm *comm,
+			   struct thread *thread);
 	int (*export_comm_thread)(struct db_export *dbe, u64 db_id,
 				  struct comm *comm, struct thread *thread);
 	int (*export_dso)(struct db_export *dbe, struct dso *dso,
@@ -56,6 +57,11 @@ struct db_export {
 	int (*export_call_path)(struct db_export *dbe, struct call_path *cp);
 	int (*export_call_return)(struct db_export *dbe,
 				  struct call_return *cr);
+	int (*export_context_switch)(struct db_export *dbe, u64 db_id,
+				     struct machine *machine,
+				     struct perf_sample *sample,
+				     u64 th_out_id, u64 comm_out_id,
+				     u64 th_in_id, u64 comm_in_id, int flags);
 	struct call_return_processor *crp;
 	struct call_path_root *cpr;
 	u64 evsel_last_db_id;
@@ -68,18 +74,19 @@ struct db_export {
 	u64 sample_last_db_id;
 	u64 call_path_last_db_id;
 	u64 call_return_last_db_id;
-	struct list_head deferred;
+	u64 context_switch_last_db_id;
 };
 
 int db_export__init(struct db_export *dbe);
-int db_export__flush(struct db_export *dbe);
 void db_export__exit(struct db_export *dbe);
 int db_export__evsel(struct db_export *dbe, struct perf_evsel *evsel);
 int db_export__machine(struct db_export *dbe, struct machine *machine);
 int db_export__thread(struct db_export *dbe, struct thread *thread,
-		      struct machine *machine, struct comm *comm);
+		      struct machine *machine, struct thread *main_thread);
 int db_export__comm(struct db_export *dbe, struct comm *comm,
-		    struct thread *main_thread);
+		    struct thread *thread);
+int db_export__exec_comm(struct db_export *dbe, struct comm *comm,
+			 struct thread *main_thread);
 int db_export__comm_thread(struct db_export *dbe, struct comm *comm,
 			   struct thread *thread);
 int db_export__dso(struct db_export *dbe, struct dso *dso,
@@ -97,5 +104,7 @@ int db_export__branch_types(struct db_export *dbe);
 int db_export__call_path(struct db_export *dbe, struct call_path *cp);
 int db_export__call_return(struct db_export *dbe, struct call_return *cr,
 			   u64 *parent_db_id);
+int db_export__switch(struct db_export *dbe, union perf_event *event,
+		      struct perf_sample *sample, struct machine *machine);
 
 #endif
diff --git a/tools/perf/util/rlimit.c b/tools/perf/util/rlimit.c
new file mode 100644
index 000000000000..13521d392a22
--- /dev/null
+++ b/tools/perf/util/rlimit.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+
+#include "util/debug.h"
+#include "util/rlimit.h"
+#include <sys/time.h>
+#include <sys/resource.h>
+
+/*
+ * Bump the memlock so that we can get bpf maps of a reasonable size,
+ * like the ones used with 'perf trace' and with 'perf test bpf',
+ * improve this to some specific request if needed.
+ */
+void rlimit__bump_memlock(void)
+{
+	struct rlimit rlim;
+
+	if (getrlimit(RLIMIT_MEMLOCK, &rlim) == 0) {
+		rlim.rlim_cur *= 4;
+		rlim.rlim_max *= 4;
+
+		if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0) {
+			rlim.rlim_cur /= 2;
+			rlim.rlim_max /= 2;
+
+			if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0)
+				pr_debug("Couldn't bump rlimit(MEMLOCK), failures may take place when creating BPF maps, etc\n");
+		}
+	}
+}
diff --git a/tools/perf/util/rlimit.h b/tools/perf/util/rlimit.h
new file mode 100644
index 000000000000..9f59d8e710a3
--- /dev/null
+++ b/tools/perf/util/rlimit.h
@@ -0,0 +1,6 @@
+#ifndef __PERF_RLIMIT_H_
+#define __PERF_RLIMIT_H_
+/* SPDX-License-Identifier: LGPL-2.1 */
+
+void rlimit__bump_memlock(void);
+#endif // __PERF_RLIMIT_H_
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 112bed65232f..25dc1d765553 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -113,6 +113,7 @@ struct tables {
 	PyObject		*call_path_handler;
 	PyObject		*call_return_handler;
 	PyObject		*synth_handler;
+	PyObject		*context_switch_handler;
 	bool			db_export_mode;
 };
 
@@ -1011,15 +1012,19 @@ static int python_export_thread(struct db_export *dbe, struct thread *thread,
 	return 0;
 }
 
-static int python_export_comm(struct db_export *dbe, struct comm *comm)
+static int python_export_comm(struct db_export *dbe, struct comm *comm,
+			      struct thread *thread)
 {
 	struct tables *tables = container_of(dbe, struct tables, dbe);
 	PyObject *t;
 
-	t = tuple_new(2);
+	t = tuple_new(5);
 
 	tuple_set_u64(t, 0, comm->db_id);
 	tuple_set_string(t, 1, comm__str(comm));
+	tuple_set_u64(t, 2, thread->db_id);
+	tuple_set_u64(t, 3, comm->start);
+	tuple_set_s32(t, 4, comm->exec);
 
 	call_object(tables->comm_handler, t, "comm_table");
 
@@ -1233,6 +1238,34 @@ static int python_export_call_return(struct db_export *dbe,
 	return 0;
 }
 
+static int python_export_context_switch(struct db_export *dbe, u64 db_id,
+					struct machine *machine,
+					struct perf_sample *sample,
+					u64 th_out_id, u64 comm_out_id,
+					u64 th_in_id, u64 comm_in_id, int flags)
+{
+	struct tables *tables = container_of(dbe, struct tables, dbe);
+	PyObject *t;
+
+	t = tuple_new(9);
+
+	tuple_set_u64(t, 0, db_id);
+	tuple_set_u64(t, 1, machine->db_id);
+	tuple_set_u64(t, 2, sample->time);
+	tuple_set_s32(t, 3, sample->cpu);
+	tuple_set_u64(t, 4, th_out_id);
+	tuple_set_u64(t, 5, comm_out_id);
+	tuple_set_u64(t, 6, th_in_id);
+	tuple_set_u64(t, 7, comm_in_id);
+	tuple_set_s32(t, 8, flags);
+
+	call_object(tables->context_switch_handler, t, "context_switch");
+
+	Py_DECREF(t);
+
+	return 0;
+}
+
 static int python_process_call_return(struct call_return *cr, u64 *parent_db_id,
 				      void *data)
 {
@@ -1296,6 +1329,16 @@ static void python_process_event(union perf_event *event,
 	}
 }
 
+static void python_process_switch(union perf_event *event,
+				  struct perf_sample *sample,
+				  struct machine *machine)
+{
+	struct tables *tables = &tables_global;
+
+	if (tables->db_export_mode)
+		db_export__switch(&tables->dbe, event, sample, machine);
+}
+
 static void get_handler_name(char *str, size_t size,
 			     struct perf_evsel *evsel)
 {
@@ -1511,6 +1554,7 @@ static void set_table_handlers(struct tables *tables)
 	SET_TABLE_HANDLER(sample);
 	SET_TABLE_HANDLER(call_path);
 	SET_TABLE_HANDLER(call_return);
+	SET_TABLE_HANDLER(context_switch);
 
 	/*
 	 * Synthesized events are samples but with architecture-specific data
@@ -1620,9 +1664,7 @@ error:
 
 static int python_flush_script(void)
 {
-	struct tables *tables = &tables_global;
-
-	return db_export__flush(&tables->dbe);
+	return 0;
 }
 
 /*
@@ -1831,6 +1873,7 @@ struct scripting_ops python_scripting_ops = {
 	.flush_script		= python_flush_script,
 	.stop_script		= python_stop_script,
 	.process_event		= python_process_event,
+	.process_switch		= python_process_switch,
 	.process_stat		= python_process_stat,
 	.process_stat_interval	= python_process_stat_interval,
 	.generate_script	= python_generate_script,
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index d9b0a942090a..c7002fe11673 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -81,6 +81,9 @@ struct scripting_ops {
 			       struct perf_sample *sample,
 			       struct perf_evsel *evsel,
 			       struct addr_location *al);
+	void (*process_switch)(union perf_event *event,
+			       struct perf_sample *sample,
+			       struct machine *machine);
 	void (*process_stat)(struct perf_stat_config *config,
 			     struct perf_evsel *evsel, u64 tstamp);
 	void (*process_stat_interval)(u64 tstamp);
diff --git a/tools/power/cpupower/debug/kernel/Makefile b/tools/power/cpupower/debug/kernel/Makefile
index c23e5a6ceb7e..7b5c43684be1 100644
--- a/tools/power/cpupower/debug/kernel/Makefile
+++ b/tools/power/cpupower/debug/kernel/Makefile
@@ -12,8 +12,8 @@ default:
 	$(MAKE) -C $(KDIR) M=$(CURDIR)
 
 clean:
-	- rm -rf *.o *.ko .tmp-versions .*.cmd .*.mod.* *.mod.c
-	- rm -rf .tmp_versions* Module.symvers modules.order
+	- rm -rf *.o *.ko .*.cmd .*.mod.* *.mod.c
+	- rm -rf Module.symvers modules.order
 
 install: default
 	install -d $(KMISC)
diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl
index 72525426654b..6fd864935319 100755
--- a/tools/testing/ktest/config-bisect.pl
+++ b/tools/testing/ktest/config-bisect.pl
@@ -663,7 +663,7 @@ while ($#ARGV >= 0) {
     }
 
     else {
-	die "Unknow option $opt\n";
+	die "Unknown option $opt\n";
     }
 }
 
@@ -732,7 +732,7 @@ if ($start) {
 	}
     }
     run_command "cp $good_start $good" or die "failed to copy to $good\n";
-    run_command "cp $bad_start $bad" or die "faield to copy to $bad\n";
+    run_command "cp $bad_start $bad" or die "failed to copy to $bad\n";
 } else {
     if ( ! -f $good ) {
 	die "Can not find file $good\n";
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 62afd0b43074..ba7849751989 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -10,11 +10,11 @@ UNAME_M := $(shell uname -m)
 LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c
 LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c
 LIBKVM_aarch64 = lib/aarch64/processor.c
+LIBKVM_s390x = lib/s390x/processor.c
 
 TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
 TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
-TEST_GEN_PROGS_x86_64 += x86_64/kvm_create_max_vcpus
 TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
 TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
 TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
@@ -26,9 +26,14 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
 TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
+TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
 
 TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
+TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
+
+TEST_GEN_PROGS_s390x += s390x/sync_regs_test
+TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
 
 TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
 LIBKVM += $(LIBKVM_$(UNAME_M))
@@ -43,7 +48,12 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
 no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
         $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
 
-LDFLAGS += -pthread $(no-pie-option)
+# On s390, build the testcases KVM-enabled
+pgste-option = $(call try-run, echo 'int main() { return 0; }' | \
+	$(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste)
+
+
+LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
 
 # After inclusion, $(OUTPUT) is defined and
 # $(TEST_GEN_PROGS) starts with $(OUTPUT)/
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 00235f5932f0..e0e66b115ef2 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -41,6 +41,12 @@ enum vm_guest_mode {
 	NUM_VM_MODES,
 };
 
+#ifdef __aarch64__
+#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+#else
+#define VM_MODE_DEFAULT VM_MODE_P52V48_4K
+#endif
+
 #define vm_guest_mode_string(m) vm_guest_mode_string[m]
 extern const char * const vm_guest_mode_string[];
 
@@ -111,10 +117,12 @@ void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
 		    struct kvm_sregs *sregs);
 int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
 		    struct kvm_sregs *sregs);
+#ifdef __KVM_HAVE_VCPU_EVENTS
 void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
 		     struct kvm_vcpu_events *events);
 void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
 		     struct kvm_vcpu_events *events);
+#endif
 #ifdef __x86_64__
 void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
 			   struct kvm_nested_state *state);
diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h
new file mode 100644
index 000000000000..e0e96a5f608c
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390x/processor.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * s390x processor specific defines
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+/* Bits in the region/segment table entry */
+#define REGION_ENTRY_ORIGIN	~0xfffUL /* region/segment table origin	   */
+#define REGION_ENTRY_PROTECT	0x200	 /* region protection bit	   */
+#define REGION_ENTRY_NOEXEC	0x100	 /* region no-execute bit	   */
+#define REGION_ENTRY_OFFSET	0xc0	 /* region table offset		   */
+#define REGION_ENTRY_INVALID	0x20	 /* invalid region table entry	   */
+#define REGION_ENTRY_TYPE	0x0c	 /* region/segment table type mask */
+#define REGION_ENTRY_LENGTH	0x03	 /* region third length		   */
+
+/* Bits in the page table entry */
+#define PAGE_INVALID	0x400		/* HW invalid bit    */
+#define PAGE_PROTECT	0x200		/* HW read-only bit  */
+#define PAGE_NOEXEC	0x100		/* HW no-execute bit */
+
+#endif
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index 429226bc6a92..231d79e57774 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -27,7 +27,7 @@ void test_vcpu_creation(int first_vcpu_id, int num_vcpus)
 	printf("Testing creating %d vCPUs, with IDs %d...%d.\n",
 	       num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1);
 
-	vm = vm_create(VM_MODE_P52V48_4K, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
 
 	for (i = 0; i < num_vcpus; i++) {
 		int vcpu_id = first_vcpu_id + i;
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index af2023d818a5..486400a97374 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -227,7 +227,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
 	uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2;
 	struct kvm_vm *vm;
 
-	vm = vm_create(VM_MODE_P40V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
 
 	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
 	vm_vcpu_add_default(vm, vcpuid, guest_code);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 221e3fa46680..6e49bb039376 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -556,6 +556,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 	int ret;
 	struct userspace_mem_region *region;
 	size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
+	size_t alignment;
 
 	TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
 		"address not on a page boundary.\n"
@@ -605,9 +606,20 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 	TEST_ASSERT(region != NULL, "Insufficient Memory");
 	region->mmap_size = npages * vm->page_size;
 
-	/* Enough memory to align up to a huge page. */
+#ifdef __s390x__
+	/* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
+	alignment = 0x100000;
+#else
+	alignment = 1;
+#endif
+
 	if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
-		region->mmap_size += huge_page_size;
+		alignment = max(huge_page_size, alignment);
+
+	/* Add enough memory to align up if necessary */
+	if (alignment > 1)
+		region->mmap_size += alignment;
+
 	region->mmap_start = mmap(NULL, region->mmap_size,
 				  PROT_READ | PROT_WRITE,
 				  MAP_PRIVATE | MAP_ANONYMOUS
@@ -617,9 +629,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 		    "test_malloc failed, mmap_start: %p errno: %i",
 		    region->mmap_start, errno);
 
-	/* Align THP allocation up to start of a huge page. */
-	region->host_mem = align(region->mmap_start,
-				 src_type == VM_MEM_SRC_ANONYMOUS_THP ?  huge_page_size : 1);
+	/* Align host address */
+	region->host_mem = align(region->mmap_start, alignment);
 
 	/* As needed perform madvise */
 	if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
@@ -1218,6 +1229,7 @@ void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
 		ret, errno);
 }
 
+#ifdef __KVM_HAVE_VCPU_EVENTS
 void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
 		     struct kvm_vcpu_events *events)
 {
@@ -1243,6 +1255,7 @@ void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
 	TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
 		ret, errno);
 }
+#endif
 
 #ifdef __x86_64__
 void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
new file mode 100644
index 000000000000..32a02360b1eb
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM selftest s390x library code - CPU-related functions (page tables...)
+ *
+ * Copyright (C) 2019, Red Hat, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "processor.h"
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR		0x180000
+
+#define PAGES_PER_REGION 4
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
+{
+	vm_paddr_t paddr;
+
+	TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
+		    vm->page_size);
+
+	if (vm->pgd_created)
+		return;
+
+	paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
+				   KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+	memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
+
+	vm->pgd = paddr;
+	vm->pgd_created = true;
+}
+
+/*
+ * Allocate 4 pages for a region/segment table (ri < 4), or one page for
+ * a page table (ri == 4). Returns a suitable region/segment table entry
+ * which points to the freshly allocated pages.
+ */
+static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
+{
+	uint64_t taddr;
+
+	taddr = vm_phy_pages_alloc(vm,  ri < 4 ? PAGES_PER_REGION : 1,
+				   KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+	memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size);
+
+	return (taddr & REGION_ENTRY_ORIGIN)
+		| (((4 - ri) << 2) & REGION_ENTRY_TYPE)
+		| ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
+}
+
+/*
+ * VM Virtual Page Map
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   gva - VM Virtual Address
+ *   gpa - VM Physical Address
+ *   memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a virtual translation for the page
+ * starting at vaddr to the page starting at paddr.
+ */
+void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
+		 uint32_t memslot)
+{
+	int ri, idx;
+	uint64_t *entry;
+
+	TEST_ASSERT((gva % vm->page_size) == 0,
+		"Virtual address not on page boundary,\n"
+		"  vaddr: 0x%lx vm->page_size: 0x%x",
+		gva, vm->page_size);
+	TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+		(gva >> vm->page_shift)),
+		"Invalid virtual address, vaddr: 0x%lx",
+		gva);
+	TEST_ASSERT((gpa % vm->page_size) == 0,
+		"Physical address not on page boundary,\n"
+		"  paddr: 0x%lx vm->page_size: 0x%x",
+		gva, vm->page_size);
+	TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn,
+		"Physical address beyond beyond maximum supported,\n"
+		"  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+		gva, vm->max_gfn, vm->page_size);
+
+	/* Walk through region and segment tables */
+	entry = addr_gpa2hva(vm, vm->pgd);
+	for (ri = 1; ri <= 4; ri++) {
+		idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
+		if (entry[idx] & REGION_ENTRY_INVALID)
+			entry[idx] = virt_alloc_region(vm, ri, memslot);
+		entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
+	}
+
+	/* Fill in page table entry */
+	idx = (gva >> 12) & 0x0ffu;		/* page index */
+	if (!(entry[idx] & PAGE_INVALID))
+		fprintf(stderr,
+			"WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa);
+	entry[idx] = gpa;
+}
+
+/*
+ * Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   gpa - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Equivalent VM physical address
+ *
+ * Translates the VM virtual address given by gva to a VM physical
+ * address and then locates the memory region containing the VM
+ * physical address, within the VM given by vm.  When found, the host
+ * virtual address providing the memory to the vm physical address is
+ * returned.
+ * A TEST_ASSERT failure occurs if no region containing translated
+ * VM virtual address exists.
+ */
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	int ri, idx;
+	uint64_t *entry;
+
+	TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
+		    vm->page_size);
+
+	entry = addr_gpa2hva(vm, vm->pgd);
+	for (ri = 1; ri <= 4; ri++) {
+		idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
+		TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID),
+			    "No region mapping for vm virtual address 0x%lx",
+			    gva);
+		entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
+	}
+
+	idx = (gva >> 12) & 0x0ffu;		/* page index */
+
+	TEST_ASSERT(!(entry[idx] & PAGE_INVALID),
+		    "No page mapping for vm virtual address 0x%lx", gva);
+
+	return (entry[idx] & ~0xffful) + (gva & 0xffful);
+}
+
+static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+			   uint64_t ptea_start)
+{
+	uint64_t *pte, ptea;
+
+	for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) {
+		pte = addr_gpa2hva(vm, ptea);
+		if (*pte & PAGE_INVALID)
+			continue;
+		fprintf(stream, "%*spte @ 0x%lx: 0x%016lx\n",
+			indent, "", ptea, *pte);
+	}
+}
+
+static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+			     uint64_t reg_tab_addr)
+{
+	uint64_t addr, *entry;
+
+	for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) {
+		entry = addr_gpa2hva(vm, addr);
+		if (*entry & REGION_ENTRY_INVALID)
+			continue;
+		fprintf(stream, "%*srt%lde @ 0x%lx: 0x%016lx\n",
+			indent, "", 4 - ((*entry & REGION_ENTRY_TYPE) >> 2),
+			addr, *entry);
+		if (*entry & REGION_ENTRY_TYPE) {
+			virt_dump_region(stream, vm, indent + 2,
+					 *entry & REGION_ENTRY_ORIGIN);
+		} else {
+			virt_dump_ptes(stream, vm, indent + 2,
+				       *entry & REGION_ENTRY_ORIGIN);
+		}
+	}
+}
+
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+	if (!vm->pgd_created)
+		return;
+
+	virt_dump_region(stream, vm, indent, vm->pgd);
+}
+
+/*
+ * Create a VM with reasonable defaults
+ *
+ * Input Args:
+ *   vcpuid - The id of the single VCPU to add to the VM.
+ *   extra_mem_pages - The size of extra memories to add (this will
+ *                     decide how much extra space we will need to
+ *                     setup the page tables using mem slot 0)
+ *   guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to opaque structure that describes the created VM.
+ */
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+				 void *guest_code)
+{
+	/*
+	 * The additional amount of pages required for the page tables is:
+	 * 1 * n / 256 + 4 * (n / 256) / 2048 + 4 * (n / 256) / 2048^2 + ...
+	 * which is definitely smaller than (n / 256) * 2.
+	 */
+	uint64_t extra_pg_pages = extra_mem_pages / 256 * 2;
+	struct kvm_vm *vm;
+
+	vm = vm_create(VM_MODE_DEFAULT,
+		       DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+
+	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+	vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+	return vm;
+}
+
+/*
+ * Adds a vCPU with reasonable defaults (i.e. a stack and initial PSW)
+ *
+ * Input Args:
+ *   vcpuid - The id of the VCPU to add to the VM.
+ *   guest_code - The vCPU's entry point
+ */
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+	size_t stack_size =  DEFAULT_STACK_PGS * getpagesize();
+	uint64_t stack_vaddr;
+	struct kvm_regs regs;
+	struct kvm_sregs sregs;
+	struct kvm_run *run;
+
+	TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
+		    vm->page_size);
+
+	stack_vaddr = vm_vaddr_alloc(vm, stack_size,
+				     DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+
+	vm_vcpu_add(vm, vcpuid);
+
+	/* Setup guest registers */
+	vcpu_regs_get(vm, vcpuid, &regs);
+	regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160;
+	vcpu_regs_set(vm, vcpuid, &regs);
+
+	vcpu_sregs_get(vm, vcpuid, &sregs);
+	sregs.crs[0] |= 0x00040000;		/* Enable floating point regs */
+	sregs.crs[1] = vm->pgd | 0xf;		/* Primary region table */
+	vcpu_sregs_set(vm, vcpuid, &sregs);
+
+	run = vcpu_state(vm, vcpuid);
+	run->psw_mask = 0x0400000180000000ULL;  /* DAT enabled + 64 bit mode */
+	run->psw_addr = (uintptr_t)guest_code;
+}
+
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+{
+	struct vcpu *vcpu = vm->vcpu_head;
+
+	fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
+		indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index b430f962e323..6cb34a0fa200 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -821,7 +821,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
 	uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
 
 	/* Create VM */
-	vm = vm_create(VM_MODE_P52V48_4K,
+	vm = vm_create(VM_MODE_DEFAULT,
 		       DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
 		       O_RDWR);
 
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index fe56d159d65f..204f847bd065 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -5,8 +5,6 @@
  * Copyright (C) 2018, Google LLC.
  */
 
-#define _GNU_SOURCE /* for program_invocation_name */
-
 #include "test_util.h"
 #include "kvm_util.h"
 #include "processor.h"
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
new file mode 100644
index 000000000000..e85ff0d69548
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x KVM_CAP_SYNC_REGS
+ *
+ * Based on the same test for x86:
+ * Copyright (C) 2018, Google LLC.
+ *
+ * Adaptions for s390x:
+ * Copyright (C) 2019, Red Hat, Inc.
+ *
+ * Test expected behavior of the KVM_CAP_SYNC_REGS functionality.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define VCPU_ID 5
+
+static void guest_code(void)
+{
+	for (;;) {
+		asm volatile ("diag 0,0,0x501");
+		asm volatile ("ahi 11,1");
+	}
+}
+
+#define REG_COMPARE(reg) \
+	TEST_ASSERT(left->reg == right->reg, \
+		    "Register " #reg \
+		    " values did not match: 0x%llx, 0x%llx\n", \
+		    left->reg, right->reg)
+
+static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right)
+{
+	int i;
+
+	for (i = 0; i < 16; i++)
+		REG_COMPARE(gprs[i]);
+}
+
+static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
+{
+	int i;
+
+	for (i = 0; i < 16; i++)
+		REG_COMPARE(acrs[i]);
+
+	for (i = 0; i < 16; i++)
+		REG_COMPARE(crs[i]);
+}
+
+#undef REG_COMPARE
+
+#define TEST_SYNC_FIELDS   (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS)
+#define INVALID_SYNC_FIELD 0x80000000
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_regs regs;
+	struct kvm_sregs sregs;
+	int rv, cap;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+	if (!cap) {
+		fprintf(stderr, "CAP_SYNC_REGS not supported, skipping test\n");
+		exit(KSFT_SKIP);
+	}
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	/* Request and verify all valid register sets. */
+	run->kvm_valid_regs = TEST_SYNC_FIELDS;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
+		    "Unexpected exit reason: %u (%s)\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s390_sieic.icptcode == 4 &&
+		    (run->s390_sieic.ipa >> 8) == 0x83 &&
+		    (run->s390_sieic.ipb >> 16) == 0x501,
+		    "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x\n",
+		    run->s390_sieic.icptcode, run->s390_sieic.ipa,
+		    run->s390_sieic.ipb);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	compare_regs(&regs, &run->s.regs);
+
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	compare_sregs(&sregs, &run->s.regs);
+
+	/* Set and verify various register values */
+	run->s.regs.gprs[11] = 0xBAD1DEA;
+	run->s.regs.acrs[0] = 1 << 11;
+
+	run->kvm_valid_regs = TEST_SYNC_FIELDS;
+	run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
+		    "Unexpected exit reason: %u (%s)\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1,
+		    "r11 sync regs value incorrect 0x%llx.",
+		    run->s.regs.gprs[11]);
+	TEST_ASSERT(run->s.regs.acrs[0]  == 1 << 11,
+		    "acr0 sync regs value incorrect 0x%llx.",
+		    run->s.regs.acrs[0]);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	compare_regs(&regs, &run->s.regs);
+
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	compare_sregs(&sregs, &run->s.regs);
+
+	/* Clear kvm_dirty_regs bits, verify new s.regs values are
+	 * overwritten with existing guest values.
+	 */
+	run->kvm_valid_regs = TEST_SYNC_FIELDS;
+	run->kvm_dirty_regs = 0;
+	run->s.regs.gprs[11] = 0xDEADBEEF;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
+		    "Unexpected exit reason: %u (%s)\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
+		    "r11 sync regs value incorrect 0x%llx.",
+		    run->s.regs.gprs[11]);
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index 8a20e03d4cb7..9c60337317c6 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -78,10 +78,10 @@ set -e
 
 function _modprobe()
 {
-	modprobe "$@"
+	modprobe "$@" || return 1
 
 	if [[ "$REMOTE_HOST" != "" ]]; then
-		ssh "$REMOTE_HOST" modprobe "$@"
+		ssh "$REMOTE_HOST" modprobe "$@" || return 1
 	fi
 }
 
@@ -442,6 +442,30 @@ function pingpong_test()
 	echo "  Passed"
 }
 
+function msi_test()
+{
+	LOC=$1
+	REM=$2
+
+	write_file 1 $LOC/ready
+
+	echo "Running MSI interrupt tests on: $(subdirname $LOC) / $(subdirname $REM)"
+
+	CNT=$(read_file "$LOC/count")
+	for ((i = 0; i < $CNT; i++)); do
+		START=$(read_file $REM/../irq${i}_occurrences)
+		write_file $i $LOC/trigger
+		END=$(read_file $REM/../irq${i}_occurrences)
+
+		if [[ $(($END - $START)) != 1 ]]; then
+			echo "MSI did not trigger the interrupt on the remote side!" >&2
+			exit 1
+		fi
+	done
+
+	echo "  Passed"
+}
+
 function perf_test()
 {
 	USE_DMA=$1
@@ -520,6 +544,29 @@ function ntb_pingpong_tests()
 	_modprobe -r ntb_pingpong
 }
 
+function ntb_msi_tests()
+{
+	LOCAL_MSI="$DEBUGFS/ntb_msi_test/$LOCAL_DEV"
+	REMOTE_MSI="$REMOTE_HOST:$DEBUGFS/ntb_msi_test/$REMOTE_DEV"
+
+	echo "Starting ntb_msi_test tests..."
+
+	if ! _modprobe ntb_msi_test 2> /dev/null; then
+		echo "  Not doing MSI tests seeing the module is not available."
+		return
+	fi
+
+	port_test $LOCAL_MSI $REMOTE_MSI
+
+	LOCAL_PEER="$LOCAL_MSI/peer$LOCAL_PIDX"
+	REMOTE_PEER="$REMOTE_MSI/peer$REMOTE_PIDX"
+
+	msi_test $LOCAL_PEER $REMOTE_PEER
+	msi_test $REMOTE_PEER $LOCAL_PEER
+
+	_modprobe -r ntb_msi_test
+}
+
 function ntb_perf_tests()
 {
 	LOCAL_PERF="$DEBUGFS/ntb_perf/$LOCAL_DEV"
@@ -541,6 +588,7 @@ function cleanup()
 	_modprobe -r ntb_perf 2> /dev/null
 	_modprobe -r ntb_pingpong 2> /dev/null
 	_modprobe -r ntb_transport 2> /dev/null
+	_modprobe -r ntb_msi_test 2> /dev/null
 	set -e
 }
 
@@ -577,5 +625,7 @@ ntb_tool_tests
 echo
 ntb_pingpong_tests
 echo
+ntb_msi_tests
+echo
 ntb_perf_tests
 echo
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 5ab4c60c100e..15a329da59fa 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -489,25 +489,11 @@ static void test_ptrace_write_gsbase(void)
 		 * selector value is changed or not by the GSBASE write in
 		 * a ptracer.
 		 */
-		if (gs != *shared_scratch) {
-			nerrs++;
-			printf("[FAIL]\tGS changed to %lx\n", gs);
-
-			/*
-			 * On older kernels, poking a nonzero value into the
-			 * base would zero the selector.  On newer kernels,
-			 * this behavior has changed -- poking the base
-			 * changes only the base and, if FSGSBASE is not
-			 * available, this may have no effect.
-			 */
-			if (gs == 0)
-				printf("\tNote: this is expected behavior on older kernels.\n");
-		} else if (have_fsgsbase && (base != 0xFF)) {
-			nerrs++;
-			printf("[FAIL]\tGSBASE changed to %lx\n", base);
+		if (gs == 0 && base == 0xFF) {
+			printf("[OK]\tGS was reset as expected\n");
 		} else {
-			printf("[OK]\tGS remained 0x%hx%s", *shared_scratch, have_fsgsbase ? " and GSBASE changed to 0xFF" : "");
-			printf("\n");
+			nerrs++;
+			printf("[FAIL]\tGS=0x%lx, GSBASE=0x%lx (should be 0, 0xFF)\n", gs, base);
 		}
 	}
 
diff --git a/usr/include/Makefile b/usr/include/Makefile
index cd8daa20d487..aa316d99e035 100644
--- a/usr/include/Makefile
+++ b/usr/include/Makefile
@@ -30,8 +30,6 @@ header-test-$(CONFIG_CPU_BIG_ENDIAN) += linux/byteorder/big_endian.h
 header-test-$(CONFIG_CPU_LITTLE_ENDIAN) += linux/byteorder/little_endian.h
 header-test- += linux/coda.h
 header-test- += linux/coda_psdev.h
-header-test- += linux/dvb/audio.h
-header-test- += linux/dvb/osd.h
 header-test- += linux/elfcore.h
 header-test- += linux/errqueue.h
 header-test- += linux/fsmap.h
@@ -44,7 +42,6 @@ header-test- += linux/netfilter_bridge/ebtables.h
 header-test- += linux/netfilter_ipv4/ipt_LOG.h
 header-test- += linux/netfilter_ipv6/ip6t_LOG.h
 header-test- += linux/nfc.h
-header-test- += linux/nilfs2_ondisk.h
 header-test- += linux/omap3isp.h
 header-test- += linux/omapfb.h
 header-test- += linux/patchkey.h
@@ -59,9 +56,6 @@ header-test- += linux/v4l2-mediabus.h
 header-test- += linux/v4l2-subdev.h
 header-test- += linux/videodev2.h
 header-test- += linux/vm_sockets.h
-header-test- += misc/ocxl.h
-header-test- += mtd/mtd-abi.h
-header-test- += mtd/mtd-user.h
 header-test- += scsi/scsi_bsg_fc.h
 header-test- += scsi/scsi_netlink.h
 header-test- += scsi/scsi_netlink_fc.h
@@ -108,7 +102,6 @@ header-test- += linux/bpf_perf_event.h
 endif
 
 ifeq ($(SRCARCH),s390)
-header-test- += asm/runtime_instr.h
 header-test- += asm/zcrypt.h
 endif
 
@@ -116,7 +109,6 @@ ifeq ($(SRCARCH),sparc)
 header-test- += asm/stat.h
 header-test- += asm/uctx.h
 header-test- += asm/fbio.h
-header-test- += asm/openpromio.h
 endif
 
 # asm-generic/*.h is used by asm/*.h, and should not be included directly
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index f645c0fbf7ec..acc43242a310 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -727,7 +727,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 * Ensure we set mode to IN_GUEST_MODE after we disable
 		 * interrupts and before the final VCPU requests check.
 		 * See the comment in kvm_vcpu_exiting_guest_mode() and
-		 * Documentation/virtual/kvm/vcpu-requests.rst
+		 * Documentation/virt/kvm/vcpu-requests.rst
 		 */
 		smp_store_mb(vcpu->mode, IN_GUEST_MODE);
 
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 936962abc38d..c45e2d7e942f 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -250,7 +250,7 @@ static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
 	 * pending state of interrupt is latched in pending_latch variable.
 	 * Userspace will save and restore pending state and line_level
 	 * separately.
-	 * Refer to Documentation/virtual/kvm/devices/arm-vgic-v3.txt
+	 * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.txt
 	 * for handling of ISPENDR and ICPENDR.
 	 */
 	for (i = 0; i < len * 8; i++) {
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 57205beaa981..3b7525deec80 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -42,7 +42,7 @@
 			    VGIC_AFFINITY_LEVEL(val, 3))
 
 /*
- * As per Documentation/virtual/kvm/devices/arm-vgic-v3.txt,
+ * As per Documentation/virt/kvm/devices/arm-vgic-v3.txt,
  * below macros are defined for CPUREG encoding.
  */
 #define KVM_REG_ARM_VGIC_SYSREG_OP0_MASK   0x000000000000c000
@@ -63,7 +63,7 @@
 				      KVM_REG_ARM_VGIC_SYSREG_OP2_MASK)
 
 /*
- * As per Documentation/virtual/kvm/devices/arm-vgic-its.txt,
+ * As per Documentation/virt/kvm/devices/arm-vgic-its.txt,
  * below macros are defined for ITS table entry encoding.
  */
 #define KVM_ITS_CTE_VALID_SHIFT		63
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b4ab59dd6846..887f3b0c2b60 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -314,6 +314,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 	kvm_vcpu_set_in_spin_loop(vcpu, false);
 	kvm_vcpu_set_dy_eligible(vcpu, false);
 	vcpu->preempted = false;
+	vcpu->ready = false;
 
 	r = kvm_arch_vcpu_init(vcpu);
 	if (r < 0)
@@ -2387,6 +2388,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
 	wqp = kvm_arch_vcpu_wq(vcpu);
 	if (swq_has_sleeper(wqp)) {
 		swake_up_one(wqp);
+		WRITE_ONCE(vcpu->ready, true);
 		++vcpu->stat.halt_wakeup;
 		return true;
 	}
@@ -2500,7 +2502,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
 				continue;
 			} else if (pass && i > last_boosted_vcpu)
 				break;
-			if (!READ_ONCE(vcpu->preempted))
+			if (!READ_ONCE(vcpu->ready))
 				continue;
 			if (vcpu == me)
 				continue;
@@ -4203,8 +4205,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
 {
 	struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
 
-	if (vcpu->preempted)
-		vcpu->preempted = false;
+	vcpu->preempted = false;
+	WRITE_ONCE(vcpu->ready, false);
 
 	kvm_arch_sched_in(vcpu, cpu);
 
@@ -4216,8 +4218,10 @@ static void kvm_sched_out(struct preempt_notifier *pn,
 {
 	struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
 
-	if (current->state == TASK_RUNNING)
+	if (current->state == TASK_RUNNING) {
 		vcpu->preempted = true;
+		WRITE_ONCE(vcpu->ready, true);
+	}
 	kvm_arch_vcpu_put(vcpu);
 }