summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/s390/index.rst1
-rw-r--r--Documentation/s390/vfio-ap-locking.rst115
-rw-r--r--Documentation/s390/vfio-ap.rst498
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/s390/boot/startup.c10
-rw-r--r--arch/s390/boot/uv.c5
-rw-r--r--arch/s390/boot/uv.h7
-rw-r--r--arch/s390/crypto/aes_s390.c2
-rw-r--r--arch/s390/crypto/chacha-glue.c2
-rw-r--r--arch/s390/crypto/crc32-vx.c2
-rw-r--r--arch/s390/crypto/des_s390.c2
-rw-r--r--arch/s390/crypto/ghash_s390.c2
-rw-r--r--arch/s390/crypto/prng.c2
-rw-r--r--arch/s390/crypto/sha1_s390.c2
-rw-r--r--arch/s390/crypto/sha256_s390.c2
-rw-r--r--arch/s390/crypto/sha3_256_s390.c2
-rw-r--r--arch/s390/crypto/sha3_512_s390.c2
-rw-r--r--arch/s390/crypto/sha512_s390.c2
-rw-r--r--arch/s390/include/asm/cpufeature.h23
-rw-r--r--arch/s390/include/asm/mmu.h14
-rw-r--r--arch/s390/include/asm/os_info.h17
-rw-r--r--arch/s390/include/asm/sclp.h4
-rw-r--r--arch/s390/include/asm/uaccess.h1
-rw-r--r--arch/s390/include/asm/unwind.h2
-rw-r--r--arch/s390/kernel/Makefile2
-rw-r--r--arch/s390/kernel/cpufeature.c46
-rw-r--r--arch/s390/kernel/crash_dump.c116
-rw-r--r--arch/s390/kernel/nmi.c8
-rw-r--r--arch/s390/kernel/processor.c10
-rw-r--r--arch/s390/kernel/setup.c13
-rw-r--r--arch/s390/mm/maccess.c26
-rw-r--r--drivers/char/hw_random/s390-trng.c2
-rw-r--r--drivers/s390/char/Kconfig2
-rw-r--r--drivers/s390/char/tape_34xx.c2
-rw-r--r--drivers/s390/char/uvdevice.c5
-rw-r--r--drivers/s390/char/zcore.c55
-rw-r--r--drivers/s390/crypto/ap_bus.c31
-rw-r--r--drivers/s390/crypto/pkey_api.c2
-rw-r--r--drivers/s390/crypto/vfio_ap_drv.c124
-rw-r--r--drivers/s390/crypto/vfio_ap_ops.c1441
-rw-r--r--drivers/s390/crypto/vfio_ap_private.h47
-rw-r--r--include/uapi/linux/elf.h1
-rw-r--r--tools/testing/crypto/chacha20-s390/test-cipher.c9
43 files changed, 1842 insertions, 821 deletions
diff --git a/Documentation/s390/index.rst b/Documentation/s390/index.rst
index b10ca9192557..73c79bf586fd 100644
--- a/Documentation/s390/index.rst
+++ b/Documentation/s390/index.rst
@@ -12,6 +12,7 @@ s390 Architecture
qeth
s390dbf
vfio-ap
+ vfio-ap-locking
vfio-ccw
zfcpdump
common_io
diff --git a/Documentation/s390/vfio-ap-locking.rst b/Documentation/s390/vfio-ap-locking.rst
new file mode 100644
index 000000000000..0dfcdb562e21
--- /dev/null
+++ b/Documentation/s390/vfio-ap-locking.rst
@@ -0,0 +1,115 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+VFIO AP Locks Overview
+======================
+This document describes the locks that are pertinent to the secure operation
+of the vfio_ap device driver. Throughout this document, the following variables
+will be used to denote instances of the structures herein described:
+
+.. code-block:: c
+
+ struct ap_matrix_dev *matrix_dev;
+ struct ap_matrix_mdev *matrix_mdev;
+ struct kvm *kvm;
+
+The Matrix Devices Lock (drivers/s390/crypto/vfio_ap_private.h)
+---------------------------------------------------------------
+
+.. code-block:: c
+
+ struct ap_matrix_dev {
+ ...
+ struct list_head mdev_list;
+ struct mutex mdevs_lock;
+ ...
+ }
+
+The Matrix Devices Lock (matrix_dev->mdevs_lock) is implemented as a global
+mutex contained within the single object of struct ap_matrix_dev. This lock
+controls access to all fields contained within each matrix_mdev
+(matrix_dev->mdev_list). This lock must be held while reading from, writing to
+or using the data from a field contained within a matrix_mdev instance
+representing one of the vfio_ap device driver's mediated devices.
+
+The KVM Lock (include/linux/kvm_host.h)
+---------------------------------------
+
+.. code-block:: c
+
+ struct kvm {
+ ...
+ struct mutex lock;
+ ...
+ }
+
+The KVM Lock (kvm->lock) controls access to the state data for a KVM guest. This
+lock must be held by the vfio_ap device driver while one or more AP adapters,
+domains or control domains are being plugged into or unplugged from the guest.
+
+The KVM pointer is stored in the in the matrix_mdev instance
+(matrix_mdev->kvm = kvm) containing the state of the mediated device that has
+been attached to the KVM guest.
+
+The Guests Lock (drivers/s390/crypto/vfio_ap_private.h)
+-----------------------------------------------------------
+
+.. code-block:: c
+
+ struct ap_matrix_dev {
+ ...
+ struct list_head mdev_list;
+ struct mutex guests_lock;
+ ...
+ }
+
+The Guests Lock (matrix_dev->guests_lock) controls access to the
+matrix_mdev instances (matrix_dev->mdev_list) that represent mediated devices
+that hold the state for the mediated devices that have been attached to a
+KVM guest. This lock must be held:
+
+1. To control access to the KVM pointer (matrix_mdev->kvm) while the vfio_ap
+ device driver is using it to plug/unplug AP devices passed through to the KVM
+ guest.
+
+2. To add matrix_mdev instances to or remove them from matrix_dev->mdev_list.
+ This is necessary to ensure the proper locking order when the list is perused
+ to find an ap_matrix_mdev instance for the purpose of plugging/unplugging
+ AP devices passed through to a KVM guest.
+
+ For example, when a queue device is removed from the vfio_ap device driver,
+ if the adapter is passed through to a KVM guest, it will have to be
+ unplugged. In order to figure out whether the adapter is passed through,
+ the matrix_mdev object to which the queue is assigned will have to be
+ found. The KVM pointer (matrix_mdev->kvm) can then be used to determine if
+ the mediated device is passed through (matrix_mdev->kvm != NULL) and if so,
+ to unplug the adapter.
+
+It is not necessary to take the Guests Lock to access the KVM pointer if the
+pointer is not used to plug/unplug devices passed through to the KVM guest;
+however, in this case, the Matrix Devices Lock (matrix_dev->mdevs_lock) must be
+held in order to access the KVM pointer since it is set and cleared under the
+protection of the Matrix Devices Lock. A case in point is the function that
+handles interception of the PQAP(AQIC) instruction sub-function. This handler
+needs to access the KVM pointer only for the purposes of setting or clearing IRQ
+resources, so only the matrix_dev->mdevs_lock needs to be held.
+
+The PQAP Hook Lock (arch/s390/include/asm/kvm_host.h)
+-----------------------------------------------------
+
+.. code-block:: c
+
+ typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
+
+ struct kvm_s390_crypto {
+ ...
+ struct rw_semaphore pqap_hook_rwsem;
+ crypto_hook *pqap_hook;
+ ...
+ };
+
+The PQAP Hook Lock is a r/w semaphore that controls access to the function
+pointer of the handler ``(*kvm->arch.crypto.pqap_hook)`` to invoke when the
+PQAP(AQIC) instruction sub-function is intercepted by the host. The lock must be
+held in write mode when pqap_hook value is set, and in read mode when the
+pqap_hook function is called.
diff --git a/Documentation/s390/vfio-ap.rst b/Documentation/s390/vfio-ap.rst
index f57ae621f33e..61a0a3c6c7b4 100644
--- a/Documentation/s390/vfio-ap.rst
+++ b/Documentation/s390/vfio-ap.rst
@@ -123,27 +123,24 @@ Let's now take a look at how AP instructions executed on a guest are interpreted
by the hardware.
A satellite control block called the Crypto Control Block (CRYCB) is attached to
-our main hardware virtualization control block. The CRYCB contains three fields
-to identify the adapters, usage domains and control domains assigned to the KVM
-guest:
+our main hardware virtualization control block. The CRYCB contains an AP Control
+Block (APCB) that has three fields to identify the adapters, usage domains and
+control domains assigned to the KVM guest:
* The AP Mask (APM) field is a bit mask that identifies the AP adapters assigned
- to the KVM guest. Each bit in the mask, from left to right (i.e. from most
- significant to least significant bit in big endian order), corresponds to
+ to the KVM guest. Each bit in the mask, from left to right, corresponds to
an APID from 0-255. If a bit is set, the corresponding adapter is valid for
use by the KVM guest.
* The AP Queue Mask (AQM) field is a bit mask identifying the AP usage domains
- assigned to the KVM guest. Each bit in the mask, from left to right (i.e. from
- most significant to least significant bit in big endian order), corresponds to
- an AP queue index (APQI) from 0-255. If a bit is set, the corresponding queue
- is valid for use by the KVM guest.
+ assigned to the KVM guest. Each bit in the mask, from left to right,
+ corresponds to an AP queue index (APQI) from 0-255. If a bit is set, the
+ corresponding queue is valid for use by the KVM guest.
* The AP Domain Mask field is a bit mask that identifies the AP control domains
assigned to the KVM guest. The ADM bit mask controls which domains can be
changed by an AP command-request message sent to a usage domain from the
- guest. Each bit in the mask, from left to right (i.e. from most significant to
- least significant bit in big endian order), corresponds to a domain from
+ guest. Each bit in the mask, from left to right, corresponds to a domain from
0-255. If a bit is set, the corresponding domain can be modified by an AP
command-request message sent to a usage domain.
@@ -151,10 +148,10 @@ If you recall from the description of an AP Queue, AP instructions include
an APQN to identify the AP queue to which an AP command-request message is to be
sent (NQAP and PQAP instructions), or from which a command-reply message is to
be received (DQAP instruction). The validity of an APQN is defined by the matrix
-calculated from the APM and AQM; it is the cross product of all assigned adapter
-numbers (APM) with all assigned queue indexes (AQM). For example, if adapters 1
-and 2 and usage domains 5 and 6 are assigned to a guest, the APQNs (1,5), (1,6),
-(2,5) and (2,6) will be valid for the guest.
+calculated from the APM and AQM; it is the Cartesian product of all assigned
+adapter numbers (APM) with all assigned queue indexes (AQM). For example, if
+adapters 1 and 2 and usage domains 5 and 6 are assigned to a guest, the APQNs
+(1,5), (1,6), (2,5) and (2,6) will be valid for the guest.
The APQNs can provide secure key functionality - i.e., a private key is stored
on the adapter card for each of its domains - so each APQN must be assigned to
@@ -192,7 +189,7 @@ The design introduces three new objects:
1. AP matrix device
2. VFIO AP device driver (vfio_ap.ko)
-3. VFIO AP mediated matrix pass-through device
+3. VFIO AP mediated pass-through device
The VFIO AP device driver
-------------------------
@@ -200,12 +197,13 @@ The VFIO AP (vfio_ap) device driver serves the following purposes:
1. Provides the interfaces to secure APQNs for exclusive use of KVM guests.
-2. Sets up the VFIO mediated device interfaces to manage a mediated matrix
+2. Sets up the VFIO mediated device interfaces to manage a vfio_ap mediated
device and creates the sysfs interfaces for assigning adapters, usage
domains, and control domains comprising the matrix for a KVM guest.
-3. Configures the APM, AQM and ADM in the CRYCB referenced by a KVM guest's
- SIE state description to grant the guest access to a matrix of AP devices
+3. Configures the APM, AQM and ADM in the APCB contained in the CRYCB referenced
+ by a KVM guest's SIE state description to grant the guest access to a matrix
+ of AP devices
Reserve APQNs for exclusive use of KVM guests
---------------------------------------------
@@ -235,10 +233,10 @@ reserved::
| | 8 probe | |
+--------^---------+ +--^--^------------+
6 edit | | |
- apmask | +-----------------------------+ | 9 mdev create
+ apmask | +-----------------------------+ | 11 mdev create
aqmask | | 1 modprobe |
+--------+-----+---+ +----------------+-+ +----------------+
- | | | |8 create | mediated |
+ | | | |10 create| mediated |
| admin | | VFIO device core |---------> matrix |
| + | | | device |
+------+-+---------+ +--------^---------+ +--------^-------+
@@ -246,14 +244,14 @@ reserved::
| | 9 create vfio_ap-passthrough | |
| +------------------------------+ |
+-------------------------------------------------------------+
- 10 assign adapter/domain/control domain
+ 12 assign adapter/domain/control domain
The process for reserving an AP queue for use by a KVM guest is:
1. The administrator loads the vfio_ap device driver
2. The vfio-ap driver during its initialization will register a single 'matrix'
device with the device core. This will serve as the parent device for
- all mediated matrix devices used to configure an AP matrix for a guest.
+ all vfio_ap mediated devices used to configure an AP matrix for a guest.
3. The /sys/devices/vfio_ap/matrix device is created by the device core
4. The vfio_ap device driver will register with the AP bus for AP queue devices
of type 10 and higher (CEX4 and newer). The driver will provide the vfio_ap
@@ -269,24 +267,24 @@ The process for reserving an AP queue for use by a KVM guest is:
default zcrypt cex4queue driver.
8. The AP bus probes the vfio_ap device driver to bind the queues reserved for
it.
-9. The administrator creates a passthrough type mediated matrix device to be
+9. The administrator creates a passthrough type vfio_ap mediated device to be
used by a guest
10. The administrator assigns the adapters, usage domains and control domains
to be exclusively used by a guest.
Set up the VFIO mediated device interfaces
------------------------------------------
-The VFIO AP device driver utilizes the common interface of the VFIO mediated
+The VFIO AP device driver utilizes the common interfaces of the VFIO mediated
device core driver to:
-* Register an AP mediated bus driver to add a mediated matrix device to and
+* Register an AP mediated bus driver to add a vfio_ap mediated device to and
remove it from a VFIO group.
-* Create and destroy a mediated matrix device
-* Add a mediated matrix device to and remove it from the AP mediated bus driver
-* Add a mediated matrix device to and remove it from an IOMMU group
+* Create and destroy a vfio_ap mediated device
+* Add a vfio_ap mediated device to and remove it from the AP mediated bus driver
+* Add a vfio_ap mediated device to and remove it from an IOMMU group
The following high-level block diagram shows the main components and interfaces
-of the VFIO AP mediated matrix device driver::
+of the VFIO AP mediated device driver::
+-------------+
| |
@@ -343,7 +341,7 @@ matrix device.
* device_api:
the mediated device type's API
* available_instances:
- the number of mediated matrix passthrough devices
+ the number of vfio_ap mediated passthrough devices
that can be created
* device_api:
specifies the VFIO API
@@ -351,29 +349,37 @@ matrix device.
This attribute group identifies the user-defined sysfs attributes of the
mediated device. When a device is registered with the VFIO mediated device
framework, the sysfs attribute files identified in the 'mdev_attr_groups'
- structure will be created in the mediated matrix device's directory. The
- sysfs attributes for a mediated matrix device are:
+ structure will be created in the vfio_ap mediated device's directory. The
+ sysfs attributes for a vfio_ap mediated device are:
assign_adapter / unassign_adapter:
Write-only attributes for assigning/unassigning an AP adapter to/from the
- mediated matrix device. To assign/unassign an adapter, the APID of the
- adapter is echoed to the respective attribute file.
+ vfio_ap mediated device. To assign/unassign an adapter, the APID of the
+ adapter is echoed into the respective attribute file.
assign_domain / unassign_domain:
Write-only attributes for assigning/unassigning an AP usage domain to/from
- the mediated matrix device. To assign/unassign a domain, the domain
- number of the usage domain is echoed to the respective attribute
+ the vfio_ap mediated device. To assign/unassign a domain, the domain
+ number of the usage domain is echoed into the respective attribute
file.
matrix:
- A read-only file for displaying the APQNs derived from the cross product
- of the adapter and domain numbers assigned to the mediated matrix device.
+ A read-only file for displaying the APQNs derived from the Cartesian
+ product of the adapter and domain numbers assigned to the vfio_ap mediated
+ device.
+ guest_matrix:
+ A read-only file for displaying the APQNs derived from the Cartesian
+ product of the adapter and domain numbers assigned to the APM and AQM
+ fields respectively of the KVM guest's CRYCB. This may differ from the
+ the APQNs assigned to the vfio_ap mediated device if any APQN does not
+ reference a queue device bound to the vfio_ap device driver (i.e., the
+ queue is not in the host's AP configuration).
assign_control_domain / unassign_control_domain:
Write-only attributes for assigning/unassigning an AP control domain
- to/from the mediated matrix device. To assign/unassign a control domain,
- the ID of the domain to be assigned/unassigned is echoed to the respective
- attribute file.
+ to/from the vfio_ap mediated device. To assign/unassign a control domain,
+ the ID of the domain to be assigned/unassigned is echoed into the
+ respective attribute file.
control_domains:
A read-only file for displaying the control domain numbers assigned to the
- mediated matrix device.
+ vfio_ap mediated device.
* functions:
@@ -383,45 +389,75 @@ matrix device.
* Store the reference to the KVM structure for the guest using the mdev
* Store the AP matrix configuration for the adapters, domains, and control
domains assigned via the corresponding sysfs attributes files
+ * Store the AP matrix configuration for the adapters, domains and control
+ domains available to a guest. A guest may not be provided access to APQNs
+ referencing queue devices that do not exist, or are not bound to the
+ vfio_ap device driver.
remove:
- deallocates the mediated matrix device's ap_matrix_mdev structure. This will
- be allowed only if a running guest is not using the mdev.
+ deallocates the vfio_ap mediated device's ap_matrix_mdev structure.
+ This will be allowed only if a running guest is not using the mdev.
* callback interfaces
- open:
+ open_device:
The vfio_ap driver uses this callback to register a
- VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the mdev matrix
- device. The open is invoked when QEMU connects the VFIO iommu group
- for the mdev matrix device to the MDEV bus. Access to the KVM structure used
- to configure the KVM guest is provided via this callback. The KVM structure,
- is used to configure the guest's access to the AP matrix defined via the
- mediated matrix device's sysfs attribute files.
- release:
+ VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the matrix mdev
+ devices. The open_device callback is invoked by userspace to connect the
+ VFIO iommu group for the matrix mdev device to the MDEV bus. Access to the
+ KVM structure used to configure the KVM guest is provided via this callback.
+ The KVM structure, is used to configure the guest's access to the AP matrix
+ defined via the vfio_ap mediated device's sysfs attribute files.
+
+ close_device:
unregisters the VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the
- mdev matrix device and deconfigures the guest's AP matrix.
+ matrix mdev device and deconfigures the guest's AP matrix.
-Configure the APM, AQM and ADM in the CRYCB
--------------------------------------------
-Configuring the AP matrix for a KVM guest will be performed when the
+ ioctl:
+ this callback handles the VFIO_DEVICE_GET_INFO and VFIO_DEVICE_RESET ioctls
+ defined by the vfio framework.
+
+Configure the guest's AP resources
+----------------------------------
+Configuring the AP resources for a KVM guest will be performed when the
VFIO_GROUP_NOTIFY_SET_KVM notifier callback is invoked. The notifier
-function is called when QEMU connects to KVM. The guest's AP matrix is
-configured via it's CRYCB by:
+function is called when userspace connects to KVM. The guest's AP resources are
+configured via it's APCB by:
* Setting the bits in the APM corresponding to the APIDs assigned to the
- mediated matrix device via its 'assign_adapter' interface.
+ vfio_ap mediated device via its 'assign_adapter' interface.
* Setting the bits in the AQM corresponding to the domains assigned to the
- mediated matrix device via its 'assign_domain' interface.
+ vfio_ap mediated device via its 'assign_domain' interface.
* Setting the bits in the ADM corresponding to the domain dIDs assigned to the
- mediated matrix device via its 'assign_control_domains' interface.
+ vfio_ap mediated device via its 'assign_control_domains' interface.
+
+The linux device model precludes passing a device through to a KVM guest that
+is not bound to the device driver facilitating its pass-through. Consequently,
+an APQN that does not reference a queue device bound to the vfio_ap device
+driver will not be assigned to a KVM guest's matrix. The AP architecture,
+however, does not provide a means to filter individual APQNs from the guest's
+matrix, so the adapters, domains and control domains assigned to vfio_ap
+mediated device via its sysfs 'assign_adapter', 'assign_domain' and
+'assign_control_domain' interfaces will be filtered before providing the AP
+configuration to a guest:
+
+* The APIDs of the adapters, the APQIs of the domains and the domain numbers of
+ the control domains assigned to the matrix mdev that are not also assigned to
+ the host's AP configuration will be filtered.
+
+* Each APQN derived from the Cartesian product of the APIDs and APQIs assigned
+ to the vfio_ap mdev is examined and if any one of them does not reference a
+ queue device bound to the vfio_ap device driver, the adapter will not be
+ plugged into the guest (i.e., the bit corresponding to its APID will not be
+ set in the APM of the guest's APCB).
The CPU model features for AP
-----------------------------
-The AP stack relies on the presence of the AP instructions as well as two
-facilities: The AP Facilities Test (APFT) facility; and the AP Query
-Configuration Information (QCI) facility. These features/facilities are made
-available to a KVM guest via the following CPU model features:
+The AP stack relies on the presence of the AP instructions as well as three
+facilities: The AP Facilities Test (APFT) facility; the AP Query
+Configuration Information (QCI) facility; and the AP Queue Interruption Control
+facility. These features/facilities are made available to a KVM guest via the
+following CPU model features:
1. ap: Indicates whether the AP instructions are installed on the guest. This
feature will be enabled by KVM only if the AP instructions are installed
@@ -435,24 +471,28 @@ available to a KVM guest via the following CPU model features:
can be made available to the guest only if it is available on the host (i.e.,
facility bit 12 is set).
+4. apqi: Indicates AP Queue Interruption Control faclity is available on the
+ guest. This facility can be made available to the guest only if it is
+ available on the host (i.e., facility bit 65 is set).
+
Note: If the user chooses to specify a CPU model different than the 'host'
model to QEMU, the CPU model features and facilities need to be turned on
explicitly; for example::
- /usr/bin/qemu-system-s390x ... -cpu z13,ap=on,apqci=on,apft=on
+ /usr/bin/qemu-system-s390x ... -cpu z13,ap=on,apqci=on,apft=on,apqi=on
A guest can be precluded from using AP features/facilities by turning them off
explicitly; for example::
- /usr/bin/qemu-system-s390x ... -cpu host,ap=off,apqci=off,apft=off
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=off,apqci=off,apft=off,apqi=off
Note: If the APFT facility is turned off (apft=off) for the guest, the guest
-will not see any AP devices. The zcrypt device drivers that register for type 10
-and newer AP devices - i.e., the cex4card and cex4queue device drivers - need
-the APFT facility to ascertain the facilities installed on a given AP device. If
-the APFT facility is not installed on the guest, then the probe of device
-drivers will fail since only type 10 and newer devices can be configured for
-guest use.
+will not see any AP devices. The zcrypt device drivers on the guest that
+register for type 10 and newer AP devices - i.e., the cex4card and cex4queue
+device drivers - need the APFT facility to ascertain the facilities installed on
+a given AP device. If the APFT facility is not installed on the guest, then no
+adapter or domain devices will get created by the AP bus running on the
+guest because only type 10 and newer devices can be configured for guest use.
Example
=======
@@ -471,7 +511,7 @@ CARD.DOMAIN TYPE MODE
05.00ab CEX5C CCA-Coproc
06 CEX5A Accelerator
06.0004 CEX5A Accelerator
-06.00ab CEX5C CCA-Coproc
+06.00ab CEX5A Accelerator
=========== ===== ============
Guest2
@@ -479,9 +519,9 @@ Guest2
=========== ===== ============
CARD.DOMAIN TYPE MODE
=========== ===== ============
-05 CEX5A Accelerator
-05.0047 CEX5A Accelerator
-05.00ff CEX5A Accelerator
+05 CEX5C CCA-Coproc
+05.0047 CEX5C CCA-Coproc
+05.00ff CEX5C CCA-Coproc
=========== ===== ============
Guest3
@@ -529,40 +569,56 @@ These are the steps:
2. Secure the AP queues to be used by the three guests so that the host can not
access them. To secure them, there are two sysfs files that specify
- bitmasks marking a subset of the APQN range as 'usable by the default AP
- queue device drivers' or 'not usable by the default device drivers' and thus
- available for use by the vfio_ap device driver'. The location of the sysfs
- files containing the masks are::
+ bitmasks marking a subset of the APQN range as usable only by the default AP
+ queue device drivers. All remaining APQNs are available for use by
+ any other device driver. The vfio_ap device driver is currently the only
+ non-default device driver. The location of the sysfs files containing the
+ masks are::
/sys/bus/ap/apmask
/sys/bus/ap/aqmask
The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs
- (APID). Each bit in the mask, from left to right (i.e., from most significant
- to least significant bit in big endian order), corresponds to an APID from
- 0-255. If a bit is set, the APID is marked as usable only by the default AP
- queue device drivers; otherwise, the APID is usable by the vfio_ap
- device driver.
+ (APID). Each bit in the mask, from left to right, corresponds to an APID from
+ 0-255. If a bit is set, the APID belongs to the subset of APQNs marked as
+ available only to the default AP queue device drivers.
The 'aqmask' is a 256-bit mask that identifies a set of AP queue indexes
- (APQI). Each bit in the mask, from left to right (i.e., from most significant
- to least significant bit in big endian order), corresponds to an APQI from
- 0-255. If a bit is set, the APQI is marked as usable only by the default AP
- queue device drivers; otherwise, the APQI is usable by the vfio_ap device
- driver.
+ (APQI). Each bit in the mask, from left to right, corresponds to an APQI from
+ 0-255. If a bit is set, the APQI belongs to the subset of APQNs marked as
+ available only to the default AP queue device drivers.
+
+ The Cartesian product of the APIDs corresponding to the bits set in the
+ apmask and the APQIs corresponding to the bits set in the aqmask comprise
+ the subset of APQNs that can be used only by the host default device drivers.
+ All other APQNs are available to the non-default device drivers such as the
+ vfio_ap driver.
+
+ Take, for example, the following masks::
+
+ apmask:
+ 0x7d00000000000000000000000000000000000000000000000000000000000000
- Take, for example, the following mask::
+ aqmask:
+ 0x8000000000000000000000000000000000000000000000000000000000000000
- 0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ The masks indicate:
- It indicates:
+ * Adapters 1, 2, 3, 4, 5, and 7 are available for use by the host default
+ device drivers.
- 1, 2, 3, 4, 5, and 7-255 belong to the default drivers' pool, and 0 and 6
- belong to the vfio_ap device driver's pool.
+ * Domain 0 is available for use by the host default device drivers
+
+ * The subset of APQNs available for use only by the default host device
+ drivers are:
+
+ (1,0), (2,0), (3,0), (4.0), (5,0) and (7,0)
+
+ * All other APQNs are available for use by the non-default device drivers.
The APQN of each AP queue device assigned to the linux host is checked by the
- AP bus against the set of APQNs derived from the cross product of APIDs
- and APQIs marked as usable only by the default AP queue device drivers. If a
+ AP bus against the set of APQNs derived from the Cartesian product of APIDs
+ and APQIs marked as available to the default AP queue device drivers. If a
match is detected, only the default AP queue device drivers will be probed;
otherwise, the vfio_ap device driver will be probed.
@@ -579,8 +635,7 @@ These are the steps:
0x4100000000000000000000000000000000000000000000000000000000000000
- Keep in mind that the mask reads from left to right (i.e., most
- significant to least significant bit in big endian order), so the mask
+ Keep in mind that the mask reads from left to right, so the mask
above identifies device numbers 1 and 7 (01000001).
If the string is longer than the mask, the operation is terminated with
@@ -626,11 +681,22 @@ These are the steps:
default drivers pool: adapter 0-15, domain 1
alternate drivers pool: adapter 16-255, domains 0, 2-255
+ **Note:**
+ Changing a mask such that one or more APQNs will be taken from a vfio_ap
+ mediated device (see below) will fail with an error (EBUSY). A message
+ is logged to the kernel ring buffer which can be viewed with the 'dmesg'
+ command. The output identifies each APQN flagged as 'in use' and identifies
+ the vfio_ap mediated device to which it is assigned; for example:
+
+ Userspace may not re-assign queue 05.0054 already assigned to 62177883-f1bb-47f0-914d-32a22e3a8804
+ Userspace may not re-assign queue 04.0054 already assigned to cef03c3c-903d-4ecc-9a83-40694cb8aee4
+
Securing the APQNs for our example
----------------------------------
To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, 06.0047,
06.00ab, and 06.00ff for use by the vfio_ap device driver, the corresponding
- APQNs can either be removed from the default masks::
+ APQNs can be removed from the default masks using either of the following
+ commands::
echo -5,-6 > /sys/bus/ap/apmask
@@ -683,7 +749,7 @@ Securing the APQNs for our example
/sys/devices/vfio_ap/matrix/
--- [mdev_supported_types]
- ------ [vfio_ap-passthrough] (passthrough mediated matrix device type)
+ ------ [vfio_ap-passthrough] (passthrough vfio_ap mediated device type)
--------- create
--------- [devices]
@@ -734,6 +800,9 @@ Securing the APQNs for our example
----------------unassign_control_domain
----------------unassign_domain
+ Note *****: The vfio_ap mdevs do not persist across reboots unless the
+ mdevctl tool is used to create and persist them.
+
4. The administrator now needs to configure the matrixes for the mediated
devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3).
@@ -755,6 +824,10 @@ Securing the APQNs for our example
cat matrix
+ To display the matrix that is or will be assigned to Guest1::
+
+ cat guest_matrix
+
This is how the matrix is configured for Guest2::
echo 5 > assign_adapter
@@ -774,17 +847,24 @@ Securing the APQNs for our example
higher than the maximum is specified, the operation will terminate with
an error (ENODEV).
- * All APQNs that can be derived from the adapter ID and the IDs of
- the previously assigned domains must be bound to the vfio_ap device
- driver. If no domains have yet been assigned, then there must be at least
- one APQN with the specified APID bound to the vfio_ap driver. If no such
- APQNs are bound to the driver, the operation will terminate with an
- error (EADDRNOTAVAIL).
+ Note: The maximum adapter number can be obtained via the sysfs
+ /sys/bus/ap/ap_max_adapter_id attribute file.
+
+ * Each APQN derived from the Cartesian product of the APID of the adapter
+ being assigned and the APQIs of the domains previously assigned:
+
+ - Must only be available to the vfio_ap device driver as specified in the
+ sysfs /sys/bus/ap/apmask and /sys/bus/ap/aqmask attribute files. If even
+ one APQN is reserved for use by the host device driver, the operation
+ will terminate with an error (EADDRNOTAVAIL).
+
+ - Must NOT be assigned to another vfio_ap mediated device. If even one APQN
+ is assigned to another vfio_ap mediated device, the operation will
+ terminate with an error (EBUSY).
- No APQN that can be derived from the adapter ID and the IDs of the
- previously assigned domains can be assigned to another mediated matrix
- device. If an APQN is assigned to another mediated matrix device, the
- operation will terminate with an error (EADDRINUSE).
+ - Must NOT be assigned while the sysfs /sys/bus/ap/apmask and
+ sys/bus/ap/aqmask attribute files are being edited or the operation may
+ terminate with an error (EBUSY).
In order to successfully assign a domain:
@@ -793,41 +873,50 @@ Securing the APQNs for our example
higher than the maximum is specified, the operation will terminate with
an error (ENODEV).
- * All APQNs that can be derived from the domain ID and the IDs of
- the previously assigned adapters must be bound to the vfio_ap device
- driver. If no domains have yet been assigned, then there must be at least
- one APQN with the specified APQI bound to the vfio_ap driver. If no such
- APQNs are bound to the driver, the operation will terminate with an
- error (EADDRNOTAVAIL).
+ Note: The maximum domain number can be obtained via the sysfs
+ /sys/bus/ap/ap_max_domain_id attribute file.
+
+ * Each APQN derived from the Cartesian product of the APQI of the domain
+ being assigned and the APIDs of the adapters previously assigned:
+
+ - Must only be available to the vfio_ap device driver as specified in the
+ sysfs /sys/bus/ap/apmask and /sys/bus/ap/aqmask attribute files. If even
+ one APQN is reserved for use by the host device driver, the operation
+ will terminate with an error (EADDRNOTAVAIL).
+
+ - Must NOT be assigned to another vfio_ap mediated device. If even one APQN
+ is assigned to another vfio_ap mediated device, the operation will
+ terminate with an error (EBUSY).
- No APQN that can be derived from the domain ID and the IDs of the
- previously assigned adapters can be assigned to another mediated matrix
- device. If an APQN is assigned to another mediated matrix device, the
- operation will terminate with an error (EADDRINUSE).
+ - Must NOT be assigned while the sysfs /sys/bus/ap/apmask and
+ sys/bus/ap/aqmask attribute files are being edited or the operation may
+ terminate with an error (EBUSY).
- In order to successfully assign a control domain, the domain number
- specified must represent a value from 0 up to the maximum domain number
- configured for the system. If a control domain number higher than the maximum
- is specified, the operation will terminate with an error (ENODEV).
+ In order to successfully assign a control domain:
+
+ * The domain number specified must represent a value from 0 up to the maximum
+ domain number configured for the system. If a control domain number higher
+ than the maximum is specified, the operation will terminate with an
+ error (ENODEV).
5. Start Guest1::
- /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on,apqi=on \
-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
7. Start Guest2::
- /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on,apqi=on \
-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
7. Start Guest3::
- /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on,apqi=on \
-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
-When the guest is shut down, the mediated matrix devices may be removed.
+When the guest is shut down, the vfio_ap mediated devices may be removed.
-Using our example again, to remove the mediated matrix device $uuid1::
+Using our example again, to remove the vfio_ap mediated device $uuid1::
/sys/devices/vfio_ap/matrix/
--- [mdev_supported_types]
@@ -840,26 +929,143 @@ Using our example again, to remove the mediated matrix device $uuid1::
echo 1 > remove
-This will remove all of the mdev matrix device's sysfs structures including
-the mdev device itself. To recreate and reconfigure the mdev matrix device,
+This will remove all of the matrix mdev device's sysfs structures including
+the mdev device itself. To recreate and reconfigure the matrix mdev device,
all of the steps starting with step 3 will have to be performed again. Note
-that the remove will fail if a guest using the mdev is still running.
+that the remove will fail if a guest using the vfio_ap mdev is still running.
-It is not necessary to remove an mdev matrix device, but one may want to
+It is not necessary to remove a vfio_ap mdev, but one may want to
remove it if no guest will use it during the remaining lifetime of the linux
-host. If the mdev matrix device is removed, one may want to also reconfigure
+host. If the vfio_ap mdev is removed, one may want to also reconfigure
the pool of adapters and queues reserved for use by the default drivers.
+Hot plug/unplug support:
+========================
+An adapter, domain or control domain may be hot plugged into a running KVM
+guest by assigning it to the vfio_ap mediated device being used by the guest if
+the following conditions are met:
+
+* The adapter, domain or control domain must also be assigned to the host's
+ AP configuration.
+
+* Each APQN derived from the Cartesian product comprised of the APID of the
+ adapter being assigned and the APQIs of the domains assigned must reference a
+ queue device bound to the vfio_ap device driver.
+
+* To hot plug a domain, each APQN derived from the Cartesian product
+ comprised of the APQI of the domain being assigned and the APIDs of the
+ adapters assigned must reference a queue device bound to the vfio_ap device
+ driver.
+
+An adapter, domain or control domain may be hot unplugged from a running KVM
+guest by unassigning it from the vfio_ap mediated device being used by the
+guest.
+
+Over-provisioning of AP queues for a KVM guest:
+===============================================
+Over-provisioning is defined herein as the assignment of adapters or domains to
+a vfio_ap mediated device that do not reference AP devices in the host's AP
+configuration. The idea here is that when the adapter or domain becomes
+available, it will be automatically hot-plugged into the KVM guest using
+the vfio_ap mediated device to which it is assigned as long as each new APQN
+resulting from plugging it in references a queue device bound to the vfio_ap
+device driver.
+
Limitations
===========
-* The KVM/kernel interfaces do not provide a way to prevent restoring an APQN
- to the default drivers pool of a queue that is still assigned to a mediated
- device in use by a guest. It is incumbent upon the administrator to
- ensure there is no mediated device in use by a guest to which the APQN is
- assigned lest the host be given access to the private data of the AP queue
- device such as a private key configured specifically for the guest.
+Live guest migration is not supported for guests using AP devices without
+intervention by a system administrator. Before a KVM guest can be migrated,
+the vfio_ap mediated device must be removed. Unfortunately, it can not be
+removed manually (i.e., echo 1 > /sys/devices/vfio_ap/matrix/$UUID/remove) while
+the mdev is in use by a KVM guest. If the guest is being emulated by QEMU,
+its mdev can be hot unplugged from the guest in one of two ways:
+
+1. If the KVM guest was started with libvirt, you can hot unplug the mdev via
+ the following commands:
+
+ virsh detach-device <guestname> <path-to-device-xml>
+
+ For example, to hot unplug mdev 62177883-f1bb-47f0-914d-32a22e3a8804 from
+ the guest named 'my-guest':
+
+ virsh detach-device my-guest ~/config/my-guest-hostdev.xml
+
+ The contents of my-guest-hostdev.xml:
+
+.. code-block:: xml
+
+ <hostdev mode='subsystem' type='mdev' managed='no' model='vfio-ap'>
+ <source>
+ <address uuid='62177883-f1bb-47f0-914d-32a22e3a8804'/>
+ </source>
+ </hostdev>
+
+
+ virsh qemu-monitor-command <guest-name> --hmp "device-del <device-id>"
+
+ For example, to hot unplug the vfio_ap mediated device identified on the
+ qemu command line with 'id=hostdev0' from the guest named 'my-guest':
+
+.. code-block:: sh
+
+ virsh qemu-monitor-command my-guest --hmp "device_del hostdev0"
+
+2. A vfio_ap mediated device can be hot unplugged by attaching the qemu monitor
+ to the guest and using the following qemu monitor command:
+
+ (QEMU) device-del id=<device-id>
+
+ For example, to hot unplug the vfio_ap mediated device that was specified
+ on the qemu command line with 'id=hostdev0' when the guest was started:
+
+ (QEMU) device-del id=hostdev0
+
+After live migration of the KVM guest completes, an AP configuration can be
+restored to the KVM guest by hot plugging a vfio_ap mediated device on the target
+system into the guest in one of two ways:
+
+1. If the KVM guest was started with libvirt, you can hot plug a matrix mediated
+ device into the guest via the following virsh commands:
+
+ virsh attach-device <guestname> <path-to-device-xml>
+
+ For example, to hot plug mdev 62177883-f1bb-47f0-914d-32a22e3a8804 into
+ the guest named 'my-guest':
+
+ virsh attach-device my-guest ~/config/my-guest-hostdev.xml
+
+ The contents of my-guest-hostdev.xml:
+
+.. code-block:: xml
+
+ <hostdev mode='subsystem' type='mdev' managed='no' model='vfio-ap'>
+ <source>
+ <address uuid='62177883-f1bb-47f0-914d-32a22e3a8804'/>
+ </source>
+ </hostdev>
+
+
+ virsh qemu-monitor-command <guest-name> --hmp \
+ "device_add vfio-ap,sysfsdev=<path-to-mdev>,id=<device-id>"
+
+ For example, to hot plug the vfio_ap mediated device
+ 62177883-f1bb-47f0-914d-32a22e3a8804 into the guest named 'my-guest' with
+ device-id hostdev0:
+
+ virsh qemu-monitor-command my-guest --hmp \
+ "device_add vfio-ap,\
+ sysfsdev=/sys/devices/vfio_ap/matrix/62177883-f1bb-47f0-914d-32a22e3a8804,\
+ id=hostdev0"
+
+2. A vfio_ap mediated device can be hot plugged by attaching the qemu monitor
+ to the guest and using the following qemu monitor command:
+
+ (qemu) device_add "vfio-ap,sysfsdev=<path-to-mdev>,id=<device-id>"
-* Dynamically modifying the AP matrix for a running guest (which would amount to
- hot(un)plug of AP devices for the guest) is currently not supported
+ For example, to plug the vfio_ap mediated device
+ 62177883-f1bb-47f0-914d-32a22e3a8804 into the guest with the device-id
+ hostdev0:
-* Live guest migration is not supported for guests using AP devices.
+ (QEMU) device-add "vfio-ap,\
+ sysfsdev=/sys/devices/vfio_ap/matrix/62177883-f1bb-47f0-914d-32a22e3a8804,\
+ id=hostdev0"
diff --git a/MAINTAINERS b/MAINTAINERS
index 106e70b80966..421f659ebef9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17808,7 +17808,7 @@ M: Jason Herne <jjherne@linux.ibm.com>
L: linux-s390@vger.kernel.org
S: Supported
W: http://www.ibm.com/developerworks/linux/linux390/
-F: Documentation/s390/vfio-ap.rst
+F: Documentation/s390/vfio-ap*
F: drivers/s390/crypto/vfio_ap*
S390 VFIO-CCW DRIVER
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 863e6bcaa5a1..bc48fe82d949 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -152,6 +152,7 @@ static void setup_kernel_memory_layout(void)
unsigned long vmemmap_start;
unsigned long rte_size;
unsigned long pages;
+ unsigned long vmax;
pages = ident_map_size / PAGE_SIZE;
/* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
@@ -163,10 +164,10 @@ static void setup_kernel_memory_layout(void)
vmalloc_size > _REGION2_SIZE ||
vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
_REGION2_SIZE) {
- MODULES_END = _REGION1_SIZE;
+ vmax = _REGION1_SIZE;
rte_size = _REGION2_SIZE;
} else {
- MODULES_END = _REGION2_SIZE;
+ vmax = _REGION2_SIZE;
rte_size = _REGION3_SIZE;
}
/*
@@ -174,11 +175,12 @@ static void setup_kernel_memory_layout(void)
* secure storage limit, so that any vmalloc allocation
* we do could be used to back secure guest storage.
*/
- adjust_to_uv_max(&MODULES_END);
+ vmax = adjust_to_uv_max(vmax);
#ifdef CONFIG_KASAN
/* force vmalloc and modules below kasan shadow */
- MODULES_END = min(MODULES_END, KASAN_SHADOW_START);
+ vmax = min(vmax, KASAN_SHADOW_START);
#endif
+ MODULES_END = vmax;
MODULES_VADDR = MODULES_END - MODULES_LEN;
VMALLOC_END = MODULES_VADDR;
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index a5fa667160b2..0a077c0a2056 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -57,10 +57,11 @@ void uv_query_info(void)
}
#if IS_ENABLED(CONFIG_KVM)
-void adjust_to_uv_max(unsigned long *vmax)
+unsigned long adjust_to_uv_max(unsigned long limit)
{
if (is_prot_virt_host() && uv_info.max_sec_stor_addr)
- *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr);
+ limit = min_t(unsigned long, limit, uv_info.max_sec_stor_addr);
+ return limit;
}
static int is_prot_virt_host_capable(void)
diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h
index 690ce019af5a..0f3070856f8d 100644
--- a/arch/s390/boot/uv.h
+++ b/arch/s390/boot/uv.h
@@ -3,10 +3,13 @@
#define BOOT_UV_H
#if IS_ENABLED(CONFIG_KVM)
-void adjust_to_uv_max(unsigned long *vmax);
+unsigned long adjust_to_uv_max(unsigned long limit);
void sanitize_prot_virt_host(void);
#else
-static inline void adjust_to_uv_max(unsigned long *vmax) {}
+static inline unsigned long adjust_to_uv_max(unsigned long limit)
+{
+ return limit;
+}
static inline void sanitize_prot_virt_host(void) {}
#endif
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 1023e9d43d44..526c3f40f6a2 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -1049,7 +1049,7 @@ out_err:
return ret;
}
-module_cpu_feature_match(MSA, aes_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, aes_s390_init);
module_exit(aes_s390_fini);
MODULE_ALIAS_CRYPTO("aes-all");
diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
index 2ec51f339cec..7752bd314558 100644
--- a/arch/s390/crypto/chacha-glue.c
+++ b/arch/s390/crypto/chacha-glue.c
@@ -121,7 +121,7 @@ static void __exit chacha_mod_fini(void)
crypto_unregister_skciphers(chacha_algs, ARRAY_SIZE(chacha_algs));
}
-module_cpu_feature_match(VXRS, chacha_mod_init);
+module_cpu_feature_match(S390_CPU_FEATURE_VXRS, chacha_mod_init);
module_exit(chacha_mod_fini);
MODULE_DESCRIPTION("ChaCha20 stream cipher");
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index fafecad20752..017143e9cef7 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -298,7 +298,7 @@ static void __exit crc_vx_mod_exit(void)
crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs));
}
-module_cpu_feature_match(VXRS, crc_vx_mod_init);
+module_cpu_feature_match(S390_CPU_FEATURE_VXRS, crc_vx_mod_init);
module_exit(crc_vx_mod_exit);
MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index e013088b5115..8e75b83a5ddc 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -492,7 +492,7 @@ out_err:
return ret;
}
-module_cpu_feature_match(MSA, des_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, des_s390_init);
module_exit(des_s390_exit);
MODULE_ALIAS_CRYPTO("des");
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 6b07a2f1ce8a..0800a2a5799f 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -145,7 +145,7 @@ static void __exit ghash_mod_exit(void)
crypto_unregister_shash(&ghash_alg);
}
-module_cpu_feature_match(MSA, ghash_mod_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, ghash_mod_init);
module_exit(ghash_mod_exit);
MODULE_ALIAS_CRYPTO("ghash");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index ae382bafc772..a077087bc6cc 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -907,5 +907,5 @@ static void __exit prng_exit(void)
}
}
-module_cpu_feature_match(MSA, prng_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, prng_init);
module_exit(prng_exit);
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index a3fabf310a38..bc3a22704e09 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -95,7 +95,7 @@ static void __exit sha1_s390_fini(void)
crypto_unregister_shash(&alg);
}
-module_cpu_feature_match(MSA, sha1_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha1_s390_init);
module_exit(sha1_s390_fini);
MODULE_ALIAS_CRYPTO("sha1");
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 24983f175676..6f1ccdf93d3e 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -134,7 +134,7 @@ static void __exit sha256_s390_fini(void)
crypto_unregister_shash(&sha256_alg);
}
-module_cpu_feature_match(MSA, sha256_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha256_s390_init);
module_exit(sha256_s390_fini);
MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c
index 30ac49b635bf..e1350e033a32 100644
--- a/arch/s390/crypto/sha3_256_s390.c
+++ b/arch/s390/crypto/sha3_256_s390.c
@@ -137,7 +137,7 @@ static void __exit sha3_256_s390_fini(void)
crypto_unregister_shash(&sha3_256_alg);
}
-module_cpu_feature_match(MSA, sha3_256_s390_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, sha3_256_s390_init);
module_exit(sha3_256_s390_fini);
MODULE_ALIAS_CRYPTO("sha3-256");
diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c
index e70d50f7620f..06c142ed9bb1 100644
--- a/arch/s390/crypto/sha3_512_s390.c
+++ b/arch/s390/crypto/sha3_512_s390.c
@@ -147,7 +147,7 @@ static void __exit fini(void)
crypto_unregister_shash(&sha3_384_alg);
}
-module_cpu_feature_match(MSA, init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, init);
module_exit(fini);
MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 43ce4956df73..04f11c407763 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -142,7 +142,7 @@ static void __exit fini(void)
crypto_unregister_shash(&sha384_alg);
}
-module_cpu_feature_match(MSA, init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, init);
module_exit(fini);
MODULE_LICENSE("GPL");
diff --git a/arch/s390/include/asm/cpufeature.h b/arch/s390/include/asm/cpufeature.h
index 14cfd48d598e..931204613753 100644
--- a/arch/s390/include/asm/cpufeature.h
+++ b/arch/s390/include/asm/cpufeature.h
@@ -2,28 +2,21 @@
/*
* Module interface for CPU features
*
- * Copyright IBM Corp. 2015
+ * Copyright IBM Corp. 2015, 2022
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
*/
#ifndef __ASM_S390_CPUFEATURE_H
#define __ASM_S390_CPUFEATURE_H
-#include <asm/elf.h>
+enum {
+ S390_CPU_FEATURE_MSA,
+ S390_CPU_FEATURE_VXRS,
+ S390_CPU_FEATURE_UV,
+ MAX_CPU_FEATURES
+};
-/* Hardware features on Linux on z Systems are indicated by facility bits that
- * are mapped to the so-called machine flags. Particular machine flags are
- * then used to define ELF hardware capabilities; most notably hardware flags
- * that are essential for user space / glibc.
- *
- * Restrict the set of exposed CPU features to ELF hardware capabilities for
- * now. Additional machine flags can be indicated by values larger than
- * MAX_ELF_HWCAP_FEATURES.
- */
-#define MAX_ELF_HWCAP_FEATURES (8 * sizeof(elf_hwcap))
-#define MAX_CPU_FEATURES MAX_ELF_HWCAP_FEATURES
-
-#define cpu_feature(feat) ilog2(HWCAP_ ## feat)
+#define cpu_feature(feature) (feature)
int cpu_have_feature(unsigned int nr);
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 1572b3634cdd..829d68e2c685 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -42,18 +42,4 @@ typedef struct {
.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
.context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
-static inline int tprot(unsigned long addr)
-{
- int rc = -EFAULT;
-
- asm volatile(
- " tprot 0(%1),0\n"
- "0: ipm %0\n"
- " srl %0,28\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "+d" (rc) : "a" (addr) : "cc");
- return rc;
-}
-
#endif
diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h
index 147a8d547ef9..85248d8fee0c 100644
--- a/arch/s390/include/asm/os_info.h
+++ b/arch/s390/include/asm/os_info.h
@@ -8,6 +8,8 @@
#ifndef _ASM_S390_OS_INFO_H
#define _ASM_S390_OS_INFO_H
+#include <linux/uio.h>
+
#define OS_INFO_VERSION_MAJOR 1
#define OS_INFO_VERSION_MINOR 1
#define OS_INFO_MAGIC 0x4f53494e464f535aULL /* OSINFOSZ */
@@ -39,7 +41,20 @@ u32 os_info_csum(struct os_info *os_info);
#ifdef CONFIG_CRASH_DUMP
void *os_info_old_entry(int nr, unsigned long *size);
-int copy_oldmem_kernel(void *dst, unsigned long src, size_t count);
+size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count);
+
+static inline int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
+{
+ struct iov_iter iter;
+ struct kvec kvec;
+
+ kvec.iov_base = dst;
+ kvec.iov_len = count;
+ iov_iter_kvec(&iter, WRITE, &kvec, 1, count);
+ if (copy_oldmem_iter(&iter, src, count) < count)
+ return -EFAULT;
+ return 0;
+}
#else
static inline void *os_info_old_entry(int nr, unsigned long *size)
{
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index addefe8ccdba..9d4c7f71e070 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -17,6 +17,7 @@
#define EXT_SCCB_READ_CPU (3 * PAGE_SIZE)
#ifndef __ASSEMBLY__
+#include <linux/uio.h>
#include <asm/chpid.h>
#include <asm/cpu.h>
@@ -146,8 +147,7 @@ int sclp_pci_deconfigure(u32 fid);
int sclp_ap_configure(u32 apid);
int sclp_ap_deconfigure(u32 apid);
int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
-int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
-int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
+size_t memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count);
void sclp_ocf_cpc_name_copy(char *dst);
static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index c2c9995466e0..f7038b800cc3 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -285,7 +285,6 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
return __clear_user(to, n);
}
-int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count);
void *s390_kernel_write(void *dst, const void *src, size_t size);
int __noreturn __put_kernel_bad(void);
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index 0bf06f1682d8..02462e7100c1 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -47,7 +47,7 @@ struct unwind_state {
static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state,
unsigned long ip)
{
- ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
+ ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *)state->sp);
if (is_kretprobe_trampoline(ip))
ip = kretprobe_find_ret_addr(state->task, (void *)state->sp, &state->kr_cur);
return ip;
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 27d6b3c7aa06..3cbfa9fddd9a 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -35,7 +35,7 @@ CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
obj-y := traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
-obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o
+obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c
new file mode 100644
index 000000000000..1b2ae42a0c15
--- /dev/null
+++ b/arch/s390/kernel/cpufeature.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2022
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/bug.h>
+#include <asm/elf.h>
+
+enum {
+ TYPE_HWCAP,
+ TYPE_FACILITY,
+};
+
+struct s390_cpu_feature {
+ unsigned int type : 4;
+ unsigned int num : 28;
+};
+
+static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = {
+ [S390_CPU_FEATURE_MSA] = {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA},
+ [S390_CPU_FEATURE_VXRS] = {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS},
+ [S390_CPU_FEATURE_UV] = {.type = TYPE_FACILITY, .num = 158},
+};
+
+/*
+ * cpu_have_feature - Test CPU features on module initialization
+ */
+int cpu_have_feature(unsigned int num)
+{
+ struct s390_cpu_feature *feature;
+
+ if (WARN_ON_ONCE(num >= MAX_CPU_FEATURES))
+ return 0;
+ feature = &s390_cpu_features[num];
+ switch (feature->type) {
+ case TYPE_HWCAP:
+ return !!(elf_hwcap & BIT(feature->num));
+ case TYPE_FACILITY:
+ return test_facility(feature->num);
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+}
+EXPORT_SYMBOL(cpu_have_feature);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 28124d0fa1d5..bad8f47fc5d6 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -53,6 +53,8 @@ struct save_area {
};
static LIST_HEAD(dump_save_areas);
+static DEFINE_MUTEX(memcpy_real_mutex);
+static char memcpy_real_buf[PAGE_SIZE];
/*
* Allocate a save area
@@ -63,7 +65,7 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
sa = memblock_alloc(sizeof(*sa), 8);
if (!sa)
- panic("Failed to allocate save area\n");
+ return NULL;
if (is_boot_cpu)
list_add(&sa->list, &dump_save_areas);
@@ -114,80 +116,35 @@ void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs)
memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128));
}
-/*
- * Return physical address for virtual address
- */
-static inline void *load_real_addr(void *addr)
-{
- unsigned long real_addr;
-
- asm volatile(
- " lra %0,0(%1)\n"
- " jz 0f\n"
- " la %0,0\n"
- "0:"
- : "=a" (real_addr) : "a" (addr) : "cc");
- return (void *)real_addr;
-}
-
-/*
- * Copy memory of the old, dumped system to a kernel space virtual address
- */
-int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
+static size_t copy_to_iter_real(struct iov_iter *iter, unsigned long src, size_t count)
{
- unsigned long len;
- void *ra;
- int rc;
+ size_t len, copied, res = 0;
+ mutex_lock(&memcpy_real_mutex);
while (count) {
- if (!oldmem_data.start && src < sclp.hsa_size) {
- /* Copy from zfcp/nvme dump HSA area */
- len = min(count, sclp.hsa_size - src);
- rc = memcpy_hsa_kernel(dst, src, len);
- if (rc)
- return rc;
- } else {
- /* Check for swapped kdump oldmem areas */
- if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
- src -= oldmem_data.start;
- len = min(count, oldmem_data.size - src);
- } else if (oldmem_data.start && src < oldmem_data.size) {
- len = min(count, oldmem_data.size - src);
- src += oldmem_data.start;
- } else {
- len = count;
- }
- if (is_vmalloc_or_module_addr(dst)) {
- ra = load_real_addr(dst);
- len = min(PAGE_SIZE - offset_in_page(ra), len);
- } else {
- ra = dst;
- }
- if (memcpy_real(ra, src, len))
- return -EFAULT;
- }
- dst += len;
- src += len;
- count -= len;
+ len = min(PAGE_SIZE, count);
+ if (memcpy_real(memcpy_real_buf, src, len))
+ break;
+ copied = copy_to_iter(memcpy_real_buf, len, iter);
+ count -= copied;
+ src += copied;
+ res += copied;
+ if (copied < len)
+ break;
}
- return 0;
+ mutex_unlock(&memcpy_real_mutex);
+ return res;
}
-/*
- * Copy memory of the old, dumped system to a user space virtual address
- */
-static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
+size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
{
- unsigned long len;
- int rc;
+ size_t len, copied, res = 0;
while (count) {
if (!oldmem_data.start && src < sclp.hsa_size) {
/* Copy from zfcp/nvme dump HSA area */
len = min(count, sclp.hsa_size - src);
- rc = memcpy_hsa_user(dst, src, len);
- if (rc)
- return rc;
+ copied = memcpy_hsa_iter(iter, src, len);
} else {
/* Check for swapped kdump oldmem areas */
if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
@@ -199,15 +156,15 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count)
} else {
len = count;
}
- rc = copy_to_user_real(dst, src, count);
- if (rc)
- return rc;
+ copied = copy_to_iter_real(iter, src, len);
}
- dst += len;
- src += len;
- count -= len;
+ count -= copied;
+ src += copied;
+ res += copied;
+ if (copied < len)
+ break;
}
- return 0;
+ return res;
}
/*
@@ -217,26 +174,9 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize,
unsigned long offset)
{
unsigned long src;
- int rc;
- if (!(iter_is_iovec(iter) || iov_iter_is_kvec(iter)))
- return -EINVAL;
- /* Multi-segment iterators are not supported */
- if (iter->nr_segs > 1)
- return -EINVAL;
- if (!csize)
- return 0;
src = pfn_to_phys(pfn) + offset;
-
- /* XXX: pass the iov_iter down to a common function */
- if (iter_is_iovec(iter))
- rc = copy_oldmem_user(iter->iov->iov_base, src, csize);
- else
- rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize);
- if (rc < 0)
- return rc;
- iov_iter_advance(iter, csize);
- return csize;
+ return copy_oldmem_iter(iter, src, csize);
}
/*
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 53ed3884fe64..60ac66aab163 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -11,6 +11,7 @@
#include <linux/kernel_stat.h>
#include <linux/init.h>
#include <linux/errno.h>
+#include <linux/entry-common.h>
#include <linux/hardirq.h>
#include <linux/log2.h>
#include <linux/kprobes.h>
@@ -397,11 +398,12 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
static unsigned long long last_ipd;
struct mcck_struct *mcck;
unsigned long long tmp;
+ irqentry_state_t irq_state;
union mci mci;
unsigned long mcck_dam_code;
int mcck_pending = 0;
- nmi_enter();
+ irq_state = irqentry_nmi_enter(regs);
if (user_mode(regs))
update_timer_mcck();
@@ -504,14 +506,14 @@ int notrace s390_do_machine_check(struct pt_regs *regs)
clear_cpu_flag(CIF_MCCK_GUEST);
if (user_mode(regs) && mcck_pending) {
- nmi_exit();
+ irqentry_nmi_exit(regs, irq_state);
return 1;
}
if (mcck_pending)
schedule_mcck_handler();
- nmi_exit();
+ irqentry_nmi_exit(regs, irq_state);
return 0;
}
NOKPROBE_SYMBOL(s390_do_machine_check);
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index aa0e0e7fc773..a194611ba88c 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -8,7 +8,6 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/stop_machine.h>
-#include <linux/cpufeature.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/random.h>
@@ -96,15 +95,6 @@ void cpu_init(void)
enter_lazy_tlb(&init_mm, current);
}
-/*
- * cpu_have_feature - Test CPU features on module initialization
- */
-int cpu_have_feature(unsigned int num)
-{
- return elf_hwcap & (1UL << num);
-}
-EXPORT_SYMBOL(cpu_have_feature);
-
static void show_facilities(struct seq_file *m)
{
unsigned int bit;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index ebad41afe355..ed4fbbbdd1b0 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -474,19 +474,18 @@ static void __init setup_lowcore_dat_off(void)
lc->restart_data = 0;
lc->restart_source = -1U;
- mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- if (!mcck_stack)
- panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
- __func__, THREAD_SIZE, THREAD_SIZE);
- lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
-
- /* Setup absolute zero lowcore */
put_abs_lowcore(restart_stack, lc->restart_stack);
put_abs_lowcore(restart_fn, lc->restart_fn);
put_abs_lowcore(restart_data, lc->restart_data);
put_abs_lowcore(restart_source, lc->restart_source);
put_abs_lowcore(restart_psw, lc->restart_psw);
+ mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
+ if (!mcck_stack)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, THREAD_SIZE, THREAD_SIZE);
+ lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
+
lc->spinlock_lockval = arch_spin_lockval(0);
lc->spinlock_index = 0;
arch_spin_lock_setup(0);
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 421efa46946b..d6d84e02f35a 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -172,32 +172,6 @@ void memcpy_absolute(void *dest, void *src, size_t count)
}
/*
- * Copy memory from kernel (real) to user (virtual)
- */
-int copy_to_user_real(void __user *dest, unsigned long src, unsigned long count)
-{
- int offs = 0, size, rc;
- char *buf;
-
- buf = (char *) __get_free_page(GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
- rc = -EFAULT;
- while (offs < count) {
- size = min(PAGE_SIZE, count - offs);
- if (memcpy_real(buf, src + offs, size))
- goto out;
- if (copy_to_user(dest + offs, buf, size))
- goto out;
- offs += size;
- }
- rc = 0;
-out:
- free_page((unsigned long) buf);
- return rc;
-}
-
-/*
* Check if physical address is within prefix or zero page
*/
static int is_swapped(phys_addr_t addr)
diff --git a/drivers/char/hw_random/s390-trng.c b/drivers/char/hw_random/s390-trng.c
index 488808dc17a2..795853dfc46b 100644
--- a/drivers/char/hw_random/s390-trng.c
+++ b/drivers/char/hw_random/s390-trng.c
@@ -252,5 +252,5 @@ static void __exit trng_exit(void)
trng_debug_exit();
}
-module_cpu_feature_match(MSA, trng_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, trng_init);
module_exit(trng_exit);
diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig
index 57f41efb8043..7d1749b0d378 100644
--- a/drivers/s390/char/Kconfig
+++ b/drivers/s390/char/Kconfig
@@ -89,7 +89,7 @@ config HMC_DRV
Management Console (HMC) drive CD/DVD-ROM. It is available as a
module, called 'hmcdrv', and also as kernel built-in. There is one
optional parameter for this module: cachesize=N, which modifies the
- transfer cache size from it's default value 0.5MB to N bytes. If N
+ transfer cache size from its default value 0.5MB to N bytes. If N
is zero, then no caching is performed.
config SCLP_OFB
diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c
index 38cc1565d6ae..751945fb6793 100644
--- a/drivers/s390/char/tape_34xx.c
+++ b/drivers/s390/char/tape_34xx.c
@@ -548,7 +548,7 @@ tape_34xx_unit_check(struct tape_device *device, struct tape_request *request,
case 0x2e:
/*
* Not capable. This indicates either that the drive fails
- * reading the format id mark or that that format specified
+ * reading the format id mark or that format specified
* is not supported by the drive.
*/
dev_warn (&device->cdev->dev, "The tape unit cannot process "
diff --git a/drivers/s390/char/uvdevice.c b/drivers/s390/char/uvdevice.c
index 66505d7166a6..1d40457c7b10 100644
--- a/drivers/s390/char/uvdevice.c
+++ b/drivers/s390/char/uvdevice.c
@@ -27,6 +27,7 @@
#include <linux/stddef.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
+#include <linux/cpufeature.h>
#include <asm/uvdevice.h>
#include <asm/uv.h>
@@ -244,12 +245,10 @@ static void __exit uvio_dev_exit(void)
static int __init uvio_dev_init(void)
{
- if (!test_facility(158))
- return -ENXIO;
return misc_register(&uvio_dev_miscdev);
}
-module_init(uvio_dev_init);
+module_cpu_feature_match(S390_CPU_FEATURE_UV, uvio_dev_init);
module_exit(uvio_dev_exit);
MODULE_AUTHOR("IBM Corporation");
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index 516783ba950f..f6da215ccf9f 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -17,6 +17,7 @@
#include <linux/debugfs.h>
#include <linux/panic_notifier.h>
#include <linux/reboot.h>
+#include <linux/uio.h>
#include <asm/asm-offsets.h>
#include <asm/ipl.h>
@@ -50,36 +51,41 @@ static struct dentry *zcore_reipl_file;
static struct dentry *zcore_hsa_file;
static struct ipl_parameter_block *zcore_ipl_block;
+static DEFINE_MUTEX(hsa_buf_mutex);
static char hsa_buf[PAGE_SIZE] __aligned(PAGE_SIZE);
/*
- * Copy memory from HSA to user memory (not reentrant):
+ * Copy memory from HSA to iterator (not reentrant):
*
- * @dest: User buffer where memory should be copied to
+ * @iter: Iterator where memory should be copied to
* @src: Start address within HSA where data should be copied
* @count: Size of buffer, which should be copied
*/
-int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count)
+size_t memcpy_hsa_iter(struct iov_iter *iter, unsigned long src, size_t count)
{
- unsigned long offset, bytes;
+ size_t bytes, copied, res = 0;
+ unsigned long offset;
if (!hsa_available)
- return -ENODATA;
+ return 0;
+ mutex_lock(&hsa_buf_mutex);
while (count) {
if (sclp_sdias_copy(hsa_buf, src / PAGE_SIZE + 2, 1)) {
TRACE("sclp_sdias_copy() failed\n");
- return -EIO;
+ break;
}
offset = src % PAGE_SIZE;
bytes = min(PAGE_SIZE - offset, count);
- if (copy_to_user(dest, hsa_buf + offset, bytes))
- return -EFAULT;
- src += bytes;
- dest += bytes;
- count -= bytes;
+ copied = copy_to_iter(hsa_buf + offset, bytes, iter);
+ count -= copied;
+ src += copied;
+ res += copied;
+ if (copied < bytes)
+ break;
}
- return 0;
+ mutex_unlock(&hsa_buf_mutex);
+ return res;
}
/*
@@ -89,25 +95,16 @@ int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count)
* @src: Start address within HSA where data should be copied
* @count: Size of buffer, which should be copied
*/
-int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count)
+static inline int memcpy_hsa_kernel(void *dst, unsigned long src, size_t count)
{
- unsigned long offset, bytes;
+ struct iov_iter iter;
+ struct kvec kvec;
- if (!hsa_available)
- return -ENODATA;
-
- while (count) {
- if (sclp_sdias_copy(hsa_buf, src / PAGE_SIZE + 2, 1)) {
- TRACE("sclp_sdias_copy() failed\n");
- return -EIO;
- }
- offset = src % PAGE_SIZE;
- bytes = min(PAGE_SIZE - offset, count);
- memcpy(dest, hsa_buf + offset, bytes);
- src += bytes;
- dest += bytes;
- count -= bytes;
- }
+ kvec.iov_base = dst;
+ kvec.iov_len = count;
+ iov_iter_kvec(&iter, WRITE, &kvec, 1, count);
+ if (memcpy_hsa_iter(&iter, src, count) < count)
+ return -EIO;
return 0;
}
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 95cfdeab5baf..8f1d1cf23d44 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -838,6 +838,17 @@ static void ap_bus_revise_bindings(void)
bus_for_each_dev(&ap_bus_type, NULL, NULL, __ap_revise_reserved);
}
+/**
+ * ap_owned_by_def_drv: indicates whether an AP adapter is reserved for the
+ * default host driver or not.
+ * @card: the APID of the adapter card to check
+ * @queue: the APQI of the queue to check
+ *
+ * Note: the ap_perms_mutex must be locked by the caller of this function.
+ *
+ * Return: an int specifying whether the AP adapter is reserved for the host (1)
+ * or not (0).
+ */
int ap_owned_by_def_drv(int card, int queue)
{
int rc = 0;
@@ -845,25 +856,31 @@ int ap_owned_by_def_drv(int card, int queue)
if (card < 0 || card >= AP_DEVICES || queue < 0 || queue >= AP_DOMAINS)
return -EINVAL;
- mutex_lock(&ap_perms_mutex);
-
if (test_bit_inv(card, ap_perms.apm) &&
test_bit_inv(queue, ap_perms.aqm))
rc = 1;
- mutex_unlock(&ap_perms_mutex);
-
return rc;
}
EXPORT_SYMBOL(ap_owned_by_def_drv);
+/**
+ * ap_apqn_in_matrix_owned_by_def_drv: indicates whether every APQN contained in
+ * a set is reserved for the host drivers
+ * or not.
+ * @apm: a bitmap specifying a set of APIDs comprising the APQNs to check
+ * @aqm: a bitmap specifying a set of APQIs comprising the APQNs to check
+ *
+ * Note: the ap_perms_mutex must be locked by the caller of this function.
+ *
+ * Return: an int specifying whether each APQN is reserved for the host (1) or
+ * not (0)
+ */
int ap_apqn_in_matrix_owned_by_def_drv(unsigned long *apm,
unsigned long *aqm)
{
int card, queue, rc = 0;
- mutex_lock(&ap_perms_mutex);
-
for (card = 0; !rc && card < AP_DEVICES; card++)
if (test_bit_inv(card, apm) &&
test_bit_inv(card, ap_perms.apm))
@@ -872,8 +889,6 @@ int ap_apqn_in_matrix_owned_by_def_drv(unsigned long *apm,
test_bit_inv(queue, ap_perms.aqm))
rc = 1;
- mutex_unlock(&ap_perms_mutex);
-
return rc;
}
EXPORT_SYMBOL(ap_apqn_in_matrix_owned_by_def_drv);
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 7329caa7d467..5a05d1cdfec2 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -2115,5 +2115,5 @@ static void __exit pkey_exit(void)
pkey_debug_exit();
}
-module_cpu_feature_match(MSA, pkey_init);
+module_cpu_feature_match(S390_CPU_FEATURE_MSA, pkey_init);
module_exit(pkey_exit);
diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c
index 4ac9c6521ec1..f43cfeabd2cc 100644
--- a/drivers/s390/crypto/vfio_ap_drv.c
+++ b/drivers/s390/crypto/vfio_ap_drv.c
@@ -18,9 +18,6 @@
#define VFIO_AP_ROOT_NAME "vfio_ap"
#define VFIO_AP_DEV_NAME "matrix"
-#define AP_QUEUE_ASSIGNED "assigned"
-#define AP_QUEUE_UNASSIGNED "unassigned"
-#define AP_QUEUE_IN_USE "in use"
MODULE_AUTHOR("IBM Corporation");
MODULE_DESCRIPTION("VFIO AP device driver, Copyright IBM Corp. 2018");
@@ -46,120 +43,12 @@ static struct ap_device_id ap_queue_ids[] = {
{ /* end of sibling */ },
};
-static struct ap_matrix_mdev *vfio_ap_mdev_for_queue(struct vfio_ap_queue *q)
-{
- struct ap_matrix_mdev *matrix_mdev;
- unsigned long apid = AP_QID_CARD(q->apqn);
- unsigned long apqi = AP_QID_QUEUE(q->apqn);
-
- list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) {
- if (test_bit_inv(apid, matrix_mdev->matrix.apm) &&
- test_bit_inv(apqi, matrix_mdev->matrix.aqm))
- return matrix_mdev;
- }
-
- return NULL;
-}
-
-static ssize_t status_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- ssize_t nchars = 0;
- struct vfio_ap_queue *q;
- struct ap_matrix_mdev *matrix_mdev;
- struct ap_device *apdev = to_ap_dev(dev);
-
- mutex_lock(&matrix_dev->lock);
- q = dev_get_drvdata(&apdev->device);
- matrix_mdev = vfio_ap_mdev_for_queue(q);
-
- if (matrix_mdev) {
- if (matrix_mdev->kvm)
- nchars = scnprintf(buf, PAGE_SIZE, "%s\n",
- AP_QUEUE_IN_USE);
- else
- nchars = scnprintf(buf, PAGE_SIZE, "%s\n",
- AP_QUEUE_ASSIGNED);
- } else {
- nchars = scnprintf(buf, PAGE_SIZE, "%s\n",
- AP_QUEUE_UNASSIGNED);
- }
-
- mutex_unlock(&matrix_dev->lock);
-
- return nchars;
-}
-
-static DEVICE_ATTR_RO(status);
-
-static struct attribute *vfio_queue_attrs[] = {
- &dev_attr_status.attr,
- NULL,
-};
-
-static const struct attribute_group vfio_queue_attr_group = {
- .attrs = vfio_queue_attrs,
-};
-
-/**
- * vfio_ap_queue_dev_probe: Allocate a vfio_ap_queue structure and associate it
- * with the device as driver_data.
- *
- * @apdev: the AP device being probed
- *
- * Return: returns 0 if the probe succeeded; otherwise, returns an error if
- * storage could not be allocated for a vfio_ap_queue object or the
- * sysfs 'status' attribute could not be created for the queue device.
- */
-static int vfio_ap_queue_dev_probe(struct ap_device *apdev)
-{
- int ret;
- struct vfio_ap_queue *q;
-
- q = kzalloc(sizeof(*q), GFP_KERNEL);
- if (!q)
- return -ENOMEM;
-
- mutex_lock(&matrix_dev->lock);
- dev_set_drvdata(&apdev->device, q);
- q->apqn = to_ap_queue(&apdev->device)->qid;
- q->saved_isc = VFIO_AP_ISC_INVALID;
-
- ret = sysfs_create_group(&apdev->device.kobj, &vfio_queue_attr_group);
- if (ret) {
- dev_set_drvdata(&apdev->device, NULL);
- kfree(q);
- }
-
- mutex_unlock(&matrix_dev->lock);
-
- return ret;
-}
-
-/**
- * vfio_ap_queue_dev_remove: Free the associated vfio_ap_queue structure.
- *
- * @apdev: the AP device being removed
- *
- * Takes the matrix lock to avoid actions on this device while doing the remove.
- */
-static void vfio_ap_queue_dev_remove(struct ap_device *apdev)
-{
- struct vfio_ap_queue *q;
-
- mutex_lock(&matrix_dev->lock);
- sysfs_remove_group(&apdev->device.kobj, &vfio_queue_attr_group);
- q = dev_get_drvdata(&apdev->device);
- vfio_ap_mdev_reset_queue(q, 1);
- dev_set_drvdata(&apdev->device, NULL);
- kfree(q);
- mutex_unlock(&matrix_dev->lock);
-}
-
static struct ap_driver vfio_ap_drv = {
- .probe = vfio_ap_queue_dev_probe,
- .remove = vfio_ap_queue_dev_remove,
+ .probe = vfio_ap_mdev_probe_queue,
+ .remove = vfio_ap_mdev_remove_queue,
+ .in_use = vfio_ap_mdev_resource_in_use,
+ .on_config_changed = vfio_ap_on_cfg_changed,
+ .on_scan_complete = vfio_ap_on_scan_complete,
.ids = ap_queue_ids,
};
@@ -212,8 +101,9 @@ static int vfio_ap_matrix_dev_create(void)
goto matrix_alloc_err;
}
- mutex_init(&matrix_dev->lock);
+ mutex_init(&matrix_dev->mdevs_lock);
INIT_LIST_HEAD(&matrix_dev->mdev_list);
+ mutex_init(&matrix_dev->guests_lock);
dev_set_name(&matrix_dev->device, "%s", VFIO_AP_DEV_NAME);
matrix_dev->device.parent = root_device;
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 75cd92c291e3..6c8c41fac4e1 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -26,44 +26,193 @@
#define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"
#define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"
-static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev);
+#define AP_QUEUE_ASSIGNED "assigned"
+#define AP_QUEUE_UNASSIGNED "unassigned"
+#define AP_QUEUE_IN_USE "in use"
+
+static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable);
static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
+static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q, unsigned int retry);
-static int match_apqn(struct device *dev, const void *data)
+/**
+ * get_update_locks_for_kvm: Acquire the locks required to dynamically update a
+ * KVM guest's APCB in the proper order.
+ *
+ * @kvm: a pointer to a struct kvm object containing the KVM guest's APCB.
+ *
+ * The proper locking order is:
+ * 1. matrix_dev->guests_lock: required to use the KVM pointer to update a KVM
+ * guest's APCB.
+ * 2. kvm->lock: required to update a guest's APCB
+ * 3. matrix_dev->mdevs_lock: required to access data stored in a matrix_mdev
+ *
+ * Note: If @kvm is NULL, the KVM lock will not be taken.
+ */
+static inline void get_update_locks_for_kvm(struct kvm *kvm)
{
- struct vfio_ap_queue *q = dev_get_drvdata(dev);
+ mutex_lock(&matrix_dev->guests_lock);
+ if (kvm)
+ mutex_lock(&kvm->lock);
+ mutex_lock(&matrix_dev->mdevs_lock);
+}
- return (q->apqn == *(int *)(data)) ? 1 : 0;
+/**
+ * release_update_locks_for_kvm: Release the locks used to dynamically update a
+ * KVM guest's APCB in the proper order.
+ *
+ * @kvm: a pointer to a struct kvm object containing the KVM guest's APCB.
+ *
+ * The proper unlocking order is:
+ * 1. matrix_dev->mdevs_lock
+ * 2. kvm->lock
+ * 3. matrix_dev->guests_lock
+ *
+ * Note: If @kvm is NULL, the KVM lock will not be released.
+ */
+static inline void release_update_locks_for_kvm(struct kvm *kvm)
+{
+ mutex_unlock(&matrix_dev->mdevs_lock);
+ if (kvm)
+ mutex_unlock(&kvm->lock);
+ mutex_unlock(&matrix_dev->guests_lock);
}
/**
- * vfio_ap_get_queue - retrieve a queue with a specific APQN from a list
- * @matrix_mdev: the associated mediated matrix
- * @apqn: The queue APQN
+ * get_update_locks_for_mdev: Acquire the locks required to dynamically update a
+ * KVM guest's APCB in the proper order.
+ *
+ * @matrix_mdev: a pointer to a struct ap_matrix_mdev object containing the AP
+ * configuration data to use to update a KVM guest's APCB.
*
- * Retrieve a queue with a specific APQN from the list of the
- * devices of the vfio_ap_drv.
- * Verify that the APID and the APQI are set in the matrix.
+ * The proper locking order is:
+ * 1. matrix_dev->guests_lock: required to use the KVM pointer to update a KVM
+ * guest's APCB.
+ * 2. matrix_mdev->kvm->lock: required to update a guest's APCB
+ * 3. matrix_dev->mdevs_lock: required to access data stored in a matrix_mdev
*
- * Return: the pointer to the associated vfio_ap_queue
+ * Note: If @matrix_mdev is NULL or is not attached to a KVM guest, the KVM
+ * lock will not be taken.
*/
-static struct vfio_ap_queue *vfio_ap_get_queue(
+static inline void get_update_locks_for_mdev(struct ap_matrix_mdev *matrix_mdev)
+{
+ mutex_lock(&matrix_dev->guests_lock);
+ if (matrix_mdev && matrix_mdev->kvm)
+ mutex_lock(&matrix_mdev->kvm->lock);
+ mutex_lock(&matrix_dev->mdevs_lock);
+}
+
+/**
+ * release_update_locks_for_mdev: Release the locks used to dynamically update a
+ * KVM guest's APCB in the proper order.
+ *
+ * @matrix_mdev: a pointer to a struct ap_matrix_mdev object containing the AP
+ * configuration data to use to update a KVM guest's APCB.
+ *
+ * The proper unlocking order is:
+ * 1. matrix_dev->mdevs_lock
+ * 2. matrix_mdev->kvm->lock
+ * 3. matrix_dev->guests_lock
+ *
+ * Note: If @matrix_mdev is NULL or is not attached to a KVM guest, the KVM
+ * lock will not be released.
+ */
+static inline void release_update_locks_for_mdev(struct ap_matrix_mdev *matrix_mdev)
+{
+ mutex_unlock(&matrix_dev->mdevs_lock);
+ if (matrix_mdev && matrix_mdev->kvm)
+ mutex_unlock(&matrix_mdev->kvm->lock);
+ mutex_unlock(&matrix_dev->guests_lock);
+}
+
+/**
+ * get_update_locks_by_apqn: Find the mdev to which an APQN is assigned and
+ * acquire the locks required to update the APCB of
+ * the KVM guest to which the mdev is attached.
+ *
+ * @apqn: the APQN of a queue device.
+ *
+ * The proper locking order is:
+ * 1. matrix_dev->guests_lock: required to use the KVM pointer to update a KVM
+ * guest's APCB.
+ * 2. matrix_mdev->kvm->lock: required to update a guest's APCB
+ * 3. matrix_dev->mdevs_lock: required to access data stored in a matrix_mdev
+ *
+ * Note: If @apqn is not assigned to a matrix_mdev, the matrix_mdev->kvm->lock
+ * will not be taken.
+ *
+ * Return: the ap_matrix_mdev object to which @apqn is assigned or NULL if @apqn
+ * is not assigned to an ap_matrix_mdev.
+ */
+static struct ap_matrix_mdev *get_update_locks_by_apqn(int apqn)
+{
+ struct ap_matrix_mdev *matrix_mdev;
+
+ mutex_lock(&matrix_dev->guests_lock);
+
+ list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) {
+ if (test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm) &&
+ test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm)) {
+ if (matrix_mdev->kvm)
+ mutex_lock(&matrix_mdev->kvm->lock);
+
+ mutex_lock(&matrix_dev->mdevs_lock);
+
+ return matrix_mdev;
+ }
+ }
+
+ mutex_lock(&matrix_dev->mdevs_lock);
+
+ return NULL;
+}
+
+/**
+ * get_update_locks_for_queue: get the locks required to update the APCB of the
+ * KVM guest to which the matrix mdev linked to a
+ * vfio_ap_queue object is attached.
+ *
+ * @q: a pointer to a vfio_ap_queue object.
+ *
+ * The proper locking order is:
+ * 1. q->matrix_dev->guests_lock: required to use the KVM pointer to update a
+ * KVM guest's APCB.
+ * 2. q->matrix_mdev->kvm->lock: required to update a guest's APCB
+ * 3. matrix_dev->mdevs_lock: required to access data stored in matrix_mdev
+ *
+ * Note: if @queue is not linked to an ap_matrix_mdev object, the KVM lock
+ * will not be taken.
+ */
+static inline void get_update_locks_for_queue(struct vfio_ap_queue *q)
+{
+ mutex_lock(&matrix_dev->guests_lock);
+ if (q->matrix_mdev && q->matrix_mdev->kvm)
+ mutex_lock(&q->matrix_mdev->kvm->lock);
+ mutex_lock(&matrix_dev->mdevs_lock);
+}
+
+/**
+ * vfio_ap_mdev_get_queue - retrieve a queue with a specific APQN from a
+ * hash table of queues assigned to a matrix mdev
+ * @matrix_mdev: the matrix mdev
+ * @apqn: The APQN of a queue device
+ *
+ * Return: the pointer to the vfio_ap_queue struct representing the queue or
+ * NULL if the queue is not assigned to @matrix_mdev
+ */
+static struct vfio_ap_queue *vfio_ap_mdev_get_queue(
struct ap_matrix_mdev *matrix_mdev,
int apqn)
{
struct vfio_ap_queue *q;
- if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm))
- return NULL;
- if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm))
- return NULL;
-
- q = vfio_ap_find_queue(apqn);
- if (q)
- q->matrix_mdev = matrix_mdev;
+ hash_for_each_possible(matrix_mdev->qtable.queues, q, mdev_qnode,
+ apqn) {
+ if (q && q->apqn == apqn)
+ return q;
+ }
- return q;
+ return NULL;
}
/**
@@ -180,7 +329,6 @@ static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
status.response_code);
end_free:
vfio_ap_free_aqic_resources(q);
- q->matrix_mdev = NULL;
return status;
}
@@ -399,10 +547,12 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
- mutex_lock(&matrix_dev->lock);
+ mutex_lock(&matrix_dev->mdevs_lock);
+
if (!vcpu->kvm->arch.crypto.pqap_hook) {
VFIO_AP_DBF_WARN("%s: PQAP(AQIC) hook not registered with the vfio_ap driver: apqn=0x%04x\n",
__func__, apqn);
+
goto out_unlock;
}
@@ -418,7 +568,7 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
goto out_unlock;
}
- q = vfio_ap_get_queue(matrix_mdev, apqn);
+ q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
if (!q) {
VFIO_AP_DBF_WARN("%s: Queue %02x.%04x not bound to the vfio_ap driver\n",
__func__, AP_QID_CARD(apqn),
@@ -437,7 +587,7 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
out_unlock:
memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus));
vcpu->run->s.regs.gprs[1] >>= 32;
- mutex_unlock(&matrix_dev->lock);
+ mutex_unlock(&matrix_dev->mdevs_lock);
return 0;
}
@@ -449,6 +599,91 @@ static void vfio_ap_matrix_init(struct ap_config_info *info,
matrix->adm_max = info->apxa ? info->Nd : 15;
}
+static void vfio_ap_mdev_update_guest_apcb(struct ap_matrix_mdev *matrix_mdev)
+{
+ if (matrix_mdev->kvm)
+ kvm_arch_crypto_set_masks(matrix_mdev->kvm,
+ matrix_mdev->shadow_apcb.apm,
+ matrix_mdev->shadow_apcb.aqm,
+ matrix_mdev->shadow_apcb.adm);
+}
+
+static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev)
+{
+ DECLARE_BITMAP(prev_shadow_adm, AP_DOMAINS);
+
+ bitmap_copy(prev_shadow_adm, matrix_mdev->shadow_apcb.adm, AP_DOMAINS);
+ bitmap_and(matrix_mdev->shadow_apcb.adm, matrix_mdev->matrix.adm,
+ (unsigned long *)matrix_dev->info.adm, AP_DOMAINS);
+
+ return !bitmap_equal(prev_shadow_adm, matrix_mdev->shadow_apcb.adm,
+ AP_DOMAINS);
+}
+
+/*
+ * vfio_ap_mdev_filter_matrix - filter the APQNs assigned to the matrix mdev
+ * to ensure no queue devices are passed through to
+ * the guest that are not bound to the vfio_ap
+ * device driver.
+ *
+ * @matrix_mdev: the matrix mdev whose matrix is to be filtered.
+ *
+ * Note: If an APQN referencing a queue device that is not bound to the vfio_ap
+ * driver, its APID will be filtered from the guest's APCB. The matrix
+ * structure precludes filtering an individual APQN, so its APID will be
+ * filtered.
+ *
+ * Return: a boolean value indicating whether the KVM guest's APCB was changed
+ * by the filtering or not.
+ */
+static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm,
+ struct ap_matrix_mdev *matrix_mdev)
+{
+ unsigned long apid, apqi, apqn;
+ DECLARE_BITMAP(prev_shadow_apm, AP_DEVICES);
+ DECLARE_BITMAP(prev_shadow_aqm, AP_DOMAINS);
+ struct vfio_ap_queue *q;
+
+ bitmap_copy(prev_shadow_apm, matrix_mdev->shadow_apcb.apm, AP_DEVICES);
+ bitmap_copy(prev_shadow_aqm, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS);
+ vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->shadow_apcb);
+
+ /*
+ * Copy the adapters, domains and control domains to the shadow_apcb
+ * from the matrix mdev, but only those that are assigned to the host's
+ * AP configuration.
+ */
+ bitmap_and(matrix_mdev->shadow_apcb.apm, matrix_mdev->matrix.apm,
+ (unsigned long *)matrix_dev->info.apm, AP_DEVICES);
+ bitmap_and(matrix_mdev->shadow_apcb.aqm, matrix_mdev->matrix.aqm,
+ (unsigned long *)matrix_dev->info.aqm, AP_DOMAINS);
+
+ for_each_set_bit_inv(apid, apm, AP_DEVICES) {
+ for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) {
+ /*
+ * If the APQN is not bound to the vfio_ap device
+ * driver, then we can't assign it to the guest's
+ * AP configuration. The AP architecture won't
+ * allow filtering of a single APQN, so let's filter
+ * the APID since an adapter represents a physical
+ * hardware device.
+ */
+ apqn = AP_MKQID(apid, apqi);
+ q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
+ if (!q || q->reset_rc) {
+ clear_bit_inv(apid,
+ matrix_mdev->shadow_apcb.apm);
+ break;
+ }
+ }
+ }
+
+ return !bitmap_equal(prev_shadow_apm, matrix_mdev->shadow_apcb.apm,
+ AP_DEVICES) ||
+ !bitmap_equal(prev_shadow_aqm, matrix_mdev->shadow_apcb.aqm,
+ AP_DOMAINS);
+}
+
static int vfio_ap_mdev_probe(struct mdev_device *mdev)
{
struct ap_matrix_mdev *matrix_mdev;
@@ -468,20 +703,19 @@ static int vfio_ap_mdev_probe(struct mdev_device *mdev)
matrix_mdev->mdev = mdev;
vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
matrix_mdev->pqap_hook = handle_pqap;
- mutex_lock(&matrix_dev->lock);
- list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
- mutex_unlock(&matrix_dev->lock);
+ vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->shadow_apcb);
+ hash_init(matrix_mdev->qtable.queues);
ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev);
if (ret)
goto err_list;
dev_set_drvdata(&mdev->dev, matrix_mdev);
+ mutex_lock(&matrix_dev->mdevs_lock);
+ list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
+ mutex_unlock(&matrix_dev->mdevs_lock);
return 0;
err_list:
- mutex_lock(&matrix_dev->lock);
- list_del(&matrix_mdev->node);
- mutex_unlock(&matrix_dev->lock);
vfio_uninit_group_dev(&matrix_mdev->vdev);
kfree(matrix_mdev);
err_dec_available:
@@ -489,16 +723,62 @@ err_dec_available:
return ret;
}
+static void vfio_ap_mdev_link_queue(struct ap_matrix_mdev *matrix_mdev,
+ struct vfio_ap_queue *q)
+{
+ if (q) {
+ q->matrix_mdev = matrix_mdev;
+ hash_add(matrix_mdev->qtable.queues, &q->mdev_qnode, q->apqn);
+ }
+}
+
+static void vfio_ap_mdev_link_apqn(struct ap_matrix_mdev *matrix_mdev, int apqn)
+{
+ struct vfio_ap_queue *q;
+
+ q = vfio_ap_find_queue(apqn);
+ vfio_ap_mdev_link_queue(matrix_mdev, q);
+}
+
+static void vfio_ap_unlink_queue_fr_mdev(struct vfio_ap_queue *q)
+{
+ hash_del(&q->mdev_qnode);
+}
+
+static void vfio_ap_unlink_mdev_fr_queue(struct vfio_ap_queue *q)
+{
+ q->matrix_mdev = NULL;
+}
+
+static void vfio_ap_mdev_unlink_fr_queues(struct ap_matrix_mdev *matrix_mdev)
+{
+ struct vfio_ap_queue *q;
+ unsigned long apid, apqi;
+
+ for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) {
+ for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
+ AP_DOMAINS) {
+ q = vfio_ap_mdev_get_queue(matrix_mdev,
+ AP_MKQID(apid, apqi));
+ if (q)
+ q->matrix_mdev = NULL;
+ }
+ }
+}
+
static void vfio_ap_mdev_remove(struct mdev_device *mdev)
{
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev);
vfio_unregister_group_dev(&matrix_mdev->vdev);
- mutex_lock(&matrix_dev->lock);
- vfio_ap_mdev_reset_queues(matrix_mdev);
+ mutex_lock(&matrix_dev->guests_lock);
+ mutex_lock(&matrix_dev->mdevs_lock);
+ vfio_ap_mdev_reset_queues(&matrix_mdev->qtable);
+ vfio_ap_mdev_unlink_fr_queues(matrix_mdev);
list_del(&matrix_mdev->node);
- mutex_unlock(&matrix_dev->lock);
+ mutex_unlock(&matrix_dev->mdevs_lock);
+ mutex_unlock(&matrix_dev->guests_lock);
vfio_uninit_group_dev(&matrix_mdev->vdev);
kfree(matrix_mdev);
atomic_inc(&matrix_dev->available_instances);
@@ -547,141 +827,48 @@ static struct attribute_group *vfio_ap_mdev_type_groups[] = {
NULL,
};
-struct vfio_ap_queue_reserved {
- unsigned long *apid;
- unsigned long *apqi;
- bool reserved;
-};
+#define MDEV_SHARING_ERR "Userspace may not re-assign queue %02lx.%04lx " \
+ "already assigned to %s"
-/**
- * vfio_ap_has_queue - determines if the AP queue containing the target in @data
- *
- * @dev: an AP queue device
- * @data: a struct vfio_ap_queue_reserved reference
- *
- * Flags whether the AP queue device (@dev) has a queue ID containing the APQN,
- * apid or apqi specified in @data:
- *
- * - If @data contains both an apid and apqi value, then @data will be flagged
- * as reserved if the APID and APQI fields for the AP queue device matches
- *
- * - If @data contains only an apid value, @data will be flagged as
- * reserved if the APID field in the AP queue device matches
- *
- * - If @data contains only an apqi value, @data will be flagged as
- * reserved if the APQI field in the AP queue device matches
- *
- * Return: 0 to indicate the input to function succeeded. Returns -EINVAL if
- * @data does not contain either an apid or apqi.
- */
-static int vfio_ap_has_queue(struct device *dev, void *data)
+static void vfio_ap_mdev_log_sharing_err(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long *apm,
+ unsigned long *aqm)
{
- struct vfio_ap_queue_reserved *qres = data;
- struct ap_queue *ap_queue = to_ap_queue(dev);
- ap_qid_t qid;
- unsigned long id;
-
- if (qres->apid && qres->apqi) {
- qid = AP_MKQID(*qres->apid, *qres->apqi);
- if (qid == ap_queue->qid)
- qres->reserved = true;
- } else if (qres->apid && !qres->apqi) {
- id = AP_QID_CARD(ap_queue->qid);
- if (id == *qres->apid)
- qres->reserved = true;
- } else if (!qres->apid && qres->apqi) {
- id = AP_QID_QUEUE(ap_queue->qid);
- if (id == *qres->apqi)
- qres->reserved = true;
- } else {
- return -EINVAL;
- }
-
- return 0;
-}
-
-/**
- * vfio_ap_verify_queue_reserved - verifies that the AP queue containing
- * @apid or @aqpi is reserved
- *
- * @apid: an AP adapter ID
- * @apqi: an AP queue index
- *
- * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP device
- * driver according to the following rules:
- *
- * - If both @apid and @apqi are not NULL, then there must be an AP queue
- * device bound to the vfio_ap driver with the APQN identified by @apid and
- * @apqi
- *
- * - If only @apid is not NULL, then there must be an AP queue device bound
- * to the vfio_ap driver with an APQN containing @apid
- *
- * - If only @apqi is not NULL, then there must be an AP queue device bound
- * to the vfio_ap driver with an APQN containing @apqi
- *
- * Return: 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL.
- */
-static int vfio_ap_verify_queue_reserved(unsigned long *apid,
- unsigned long *apqi)
-{
- int ret;
- struct vfio_ap_queue_reserved qres;
-
- qres.apid = apid;
- qres.apqi = apqi;
- qres.reserved = false;
-
- ret = driver_for_each_device(&matrix_dev->vfio_ap_drv->driver, NULL,
- &qres, vfio_ap_has_queue);
- if (ret)
- return ret;
-
- if (qres.reserved)
- return 0;
-
- return -EADDRNOTAVAIL;
-}
-
-static int
-vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev,
- unsigned long apid)
-{
- int ret;
- unsigned long apqi;
- unsigned long nbits = matrix_mdev->matrix.aqm_max + 1;
-
- if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits)
- return vfio_ap_verify_queue_reserved(&apid, NULL);
-
- for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, nbits) {
- ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
- if (ret)
- return ret;
- }
+ unsigned long apid, apqi;
+ const struct device *dev = mdev_dev(matrix_mdev->mdev);
+ const char *mdev_name = dev_name(dev);
- return 0;
+ for_each_set_bit_inv(apid, apm, AP_DEVICES)
+ for_each_set_bit_inv(apqi, aqm, AP_DOMAINS)
+ dev_warn(dev, MDEV_SHARING_ERR, apid, apqi, mdev_name);
}
/**
- * vfio_ap_mdev_verify_no_sharing - verifies that the AP matrix is not configured
+ * vfio_ap_mdev_verify_no_sharing - verify APQNs are not shared by matrix mdevs
*
- * @matrix_mdev: the mediated matrix device
+ * @mdev_apm: mask indicating the APIDs of the APQNs to be verified
+ * @mdev_aqm: mask indicating the APQIs of the APQNs to be verified
*
- * Verifies that the APQNs derived from the cross product of the AP adapter IDs
- * and AP queue indexes comprising the AP matrix are not configured for another
+ * Verifies that each APQN derived from the Cartesian product of a bitmap of
+ * AP adapter IDs and AP queue indexes is not configured for any matrix
* mediated device. AP queue sharing is not allowed.
*
- * Return: 0 if the APQNs are not shared; otherwise returns -EADDRINUSE.
+ * Return: 0 if the APQNs are not shared; otherwise return -EADDRINUSE.
*/
-static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
+static int vfio_ap_mdev_verify_no_sharing(unsigned long *mdev_apm,
+ unsigned long *mdev_aqm)
{
- struct ap_matrix_mdev *lstdev;
+ struct ap_matrix_mdev *matrix_mdev;
DECLARE_BITMAP(apm, AP_DEVICES);
DECLARE_BITMAP(aqm, AP_DOMAINS);
- list_for_each_entry(lstdev, &matrix_dev->mdev_list, node) {
- if (matrix_mdev == lstdev)
+ list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) {
+ /*
+ * If the input apm and aqm are fields of the matrix_mdev
+ * object, then move on to the next matrix_mdev.
+ */
+ if (mdev_apm == matrix_mdev->matrix.apm &&
+ mdev_aqm == matrix_mdev->matrix.aqm)
continue;
memset(apm, 0, sizeof(apm));
@@ -691,14 +878,16 @@ static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
* We work on full longs, as we can only exclude the leftover
* bits in non-inverse order. The leftover is all zeros.
*/
- if (!bitmap_and(apm, matrix_mdev->matrix.apm,
- lstdev->matrix.apm, AP_DEVICES))
+ if (!bitmap_and(apm, mdev_apm, matrix_mdev->matrix.apm,
+ AP_DEVICES))
continue;
- if (!bitmap_and(aqm, matrix_mdev->matrix.aqm,
- lstdev->matrix.aqm, AP_DOMAINS))
+ if (!bitmap_and(aqm, mdev_aqm, matrix_mdev->matrix.aqm,
+ AP_DOMAINS))
continue;
+ vfio_ap_mdev_log_sharing_err(matrix_mdev, apm, aqm);
+
return -EADDRINUSE;
}
@@ -706,6 +895,41 @@ static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
}
/**
+ * vfio_ap_mdev_validate_masks - verify that the APQNs assigned to the mdev are
+ * not reserved for the default zcrypt driver and
+ * are not assigned to another mdev.
+ *
+ * @matrix_mdev: the mdev to which the APQNs being validated are assigned.
+ *
+ * Return: One of the following values:
+ * o the error returned from the ap_apqn_in_matrix_owned_by_def_drv() function,
+ * most likely -EBUSY indicating the ap_perms_mutex lock is already held.
+ * o EADDRNOTAVAIL if an APQN assigned to @matrix_mdev is reserved for the
+ * zcrypt default driver.
+ * o EADDRINUSE if an APQN assigned to @matrix_mdev is assigned to another mdev
+ * o A zero indicating validation succeeded.
+ */
+static int vfio_ap_mdev_validate_masks(struct ap_matrix_mdev *matrix_mdev)
+{
+ if (ap_apqn_in_matrix_owned_by_def_drv(matrix_mdev->matrix.apm,
+ matrix_mdev->matrix.aqm))
+ return -EADDRNOTAVAIL;
+
+ return vfio_ap_mdev_verify_no_sharing(matrix_mdev->matrix.apm,
+ matrix_mdev->matrix.aqm);
+}
+
+static void vfio_ap_mdev_link_adapter(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long apid)
+{
+ unsigned long apqi;
+
+ for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS)
+ vfio_ap_mdev_link_apqn(matrix_mdev,
+ AP_MKQID(apid, apqi));
+}
+
+/**
* assign_adapter_store - parses the APID from @buf and sets the
* corresponding bit in the mediated matrix device's APM
*
@@ -734,6 +958,10 @@ static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
* An APQN derived from the cross product of the APID being assigned
* and the APQIs previously assigned is being used by another mediated
* matrix device
+ *
+ * 5. -EAGAIN
+ * A lock required to validate the mdev's AP configuration could not
+ * be obtained.
*/
static ssize_t assign_adapter_store(struct device *dev,
struct device_attribute *attr,
@@ -741,15 +969,11 @@ static ssize_t assign_adapter_store(struct device *dev,
{
int ret;
unsigned long apid;
+ DECLARE_BITMAP(apm_delta, AP_DEVICES);
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
- mutex_lock(&matrix_dev->lock);
-
- /* If the KVM guest is running, disallow assignment of adapter */
- if (matrix_mdev->kvm) {
- ret = -EBUSY;
- goto done;
- }
+ mutex_lock(&ap_perms_mutex);
+ get_update_locks_for_mdev(matrix_mdev);
ret = kstrtoul(buf, 0, &apid);
if (ret)
@@ -760,33 +984,97 @@ static ssize_t assign_adapter_store(struct device *dev,
goto done;
}
- /*
- * Set the bit in the AP mask (APM) corresponding to the AP adapter
- * number (APID). The bits in the mask, from most significant to least
- * significant bit, correspond to APIDs 0-255.
- */
- ret = vfio_ap_mdev_verify_queues_reserved_for_apid(matrix_mdev, apid);
- if (ret)
+ set_bit_inv(apid, matrix_mdev->matrix.apm);
+
+ ret = vfio_ap_mdev_validate_masks(matrix_mdev);
+ if (ret) {
+ clear_bit_inv(apid, matrix_mdev->matrix.apm);
goto done;
+ }
- set_bit_inv(apid, matrix_mdev->matrix.apm);
+ vfio_ap_mdev_link_adapter(matrix_mdev, apid);
+ memset(apm_delta, 0, sizeof(apm_delta));
+ set_bit_inv(apid, apm_delta);
- ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
- if (ret)
- goto share_err;
+ if (vfio_ap_mdev_filter_matrix(apm_delta,
+ matrix_mdev->matrix.aqm, matrix_mdev))
+ vfio_ap_mdev_update_guest_apcb(matrix_mdev);
ret = count;
- goto done;
-
-share_err:
- clear_bit_inv(apid, matrix_mdev->matrix.apm);
done:
- mutex_unlock(&matrix_dev->lock);
+ release_update_locks_for_mdev(matrix_mdev);
+ mutex_unlock(&ap_perms_mutex);
return ret;
}
static DEVICE_ATTR_WO(assign_adapter);
+static struct vfio_ap_queue
+*vfio_ap_unlink_apqn_fr_mdev(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long apid, unsigned long apqi)
+{
+ struct vfio_ap_queue *q = NULL;
+
+ q = vfio_ap_mdev_get_queue(matrix_mdev, AP_MKQID(apid, apqi));
+ /* If the queue is assigned to the matrix mdev, unlink it. */
+ if (q)
+ vfio_ap_unlink_queue_fr_mdev(q);
+
+ return q;
+}
+
+/**
+ * vfio_ap_mdev_unlink_adapter - unlink all queues associated with unassigned
+ * adapter from the matrix mdev to which the
+ * adapter was assigned.
+ * @matrix_mdev: the matrix mediated device to which the adapter was assigned.
+ * @apid: the APID of the unassigned adapter.
+ * @qtable: table for storing queues associated with unassigned adapter.
+ */
+static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long apid,
+ struct ap_queue_table *qtable)
+{
+ unsigned long apqi;
+ struct vfio_ap_queue *q;
+
+ for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) {
+ q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi);
+
+ if (q && qtable) {
+ if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) &&
+ test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm))
+ hash_add(qtable->queues, &q->mdev_qnode,
+ q->apqn);
+ }
+ }
+}
+
+static void vfio_ap_mdev_hot_unplug_adapter(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long apid)
+{
+ int loop_cursor;
+ struct vfio_ap_queue *q;
+ struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL);
+
+ hash_init(qtable->queues);
+ vfio_ap_mdev_unlink_adapter(matrix_mdev, apid, qtable);
+
+ if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm)) {
+ clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
+ vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ }
+
+ vfio_ap_mdev_reset_queues(qtable);
+
+ hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
+ vfio_ap_unlink_mdev_fr_queue(q);
+ hash_del(&q->mdev_qnode);
+ }
+
+ kfree(qtable);
+}
+
/**
* unassign_adapter_store - parses the APID from @buf and clears the
* corresponding bit in the mediated matrix device's APM
@@ -810,13 +1098,7 @@ static ssize_t unassign_adapter_store(struct device *dev,
unsigned long apid;
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
- mutex_lock(&matrix_dev->lock);
-
- /* If the KVM guest is running, disallow unassignment of adapter */
- if (matrix_mdev->kvm) {
- ret = -EBUSY;
- goto done;
- }
+ get_update_locks_for_mdev(matrix_mdev);
ret = kstrtoul(buf, 0, &apid);
if (ret)
@@ -828,31 +1110,22 @@ static ssize_t unassign_adapter_store(struct device *dev,
}
clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm);
+ vfio_ap_mdev_hot_unplug_adapter(matrix_mdev, apid);
ret = count;
done:
- mutex_unlock(&matrix_dev->lock);
+ release_update_locks_for_mdev(matrix_mdev);
return ret;
}
static DEVICE_ATTR_WO(unassign_adapter);
-static int
-vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev,
- unsigned long apqi)
+static void vfio_ap_mdev_link_domain(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long apqi)
{
- int ret;
unsigned long apid;
- unsigned long nbits = matrix_mdev->matrix.apm_max + 1;
-
- if (find_first_bit_inv(matrix_mdev->matrix.apm, nbits) >= nbits)
- return vfio_ap_verify_queue_reserved(NULL, &apqi);
- for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, nbits) {
- ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
- if (ret)
- return ret;
- }
-
- return 0;
+ for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES)
+ vfio_ap_mdev_link_apqn(matrix_mdev,
+ AP_MKQID(apid, apqi));
}
/**
@@ -884,6 +1157,10 @@ vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev,
* An APQN derived from the cross product of the APQI being assigned
* and the APIDs previously assigned is being used by another mediated
* matrix device
+ *
+ * 5. -EAGAIN
+ * The lock required to validate the mdev's AP configuration could not
+ * be obtained.
*/
static ssize_t assign_domain_store(struct device *dev,
struct device_attribute *attr,
@@ -891,47 +1168,89 @@ static ssize_t assign_domain_store(struct device *dev,
{
int ret;
unsigned long apqi;
+ DECLARE_BITMAP(aqm_delta, AP_DOMAINS);
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
- unsigned long max_apqi = matrix_mdev->matrix.aqm_max;
- mutex_lock(&matrix_dev->lock);
-
- /* If the KVM guest is running, disallow assignment of domain */
- if (matrix_mdev->kvm) {
- ret = -EBUSY;
- goto done;
- }
+ mutex_lock(&ap_perms_mutex);
+ get_update_locks_for_mdev(matrix_mdev);
ret = kstrtoul(buf, 0, &apqi);
if (ret)
goto done;
- if (apqi > max_apqi) {
+
+ if (apqi > matrix_mdev->matrix.aqm_max) {
ret = -ENODEV;
goto done;
}
- ret = vfio_ap_mdev_verify_queues_reserved_for_apqi(matrix_mdev, apqi);
- if (ret)
+ set_bit_inv(apqi, matrix_mdev->matrix.aqm);
+
+ ret = vfio_ap_mdev_validate_masks(matrix_mdev);
+ if (ret) {
+ clear_bit_inv(apqi, matrix_mdev->matrix.aqm);
goto done;
+ }
- set_bit_inv(apqi, matrix_mdev->matrix.aqm);
+ vfio_ap_mdev_link_domain(matrix_mdev, apqi);
+ memset(aqm_delta, 0, sizeof(aqm_delta));
+ set_bit_inv(apqi, aqm_delta);
- ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
- if (ret)
- goto share_err;
+ if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm, aqm_delta,
+ matrix_mdev))
+ vfio_ap_mdev_update_guest_apcb(matrix_mdev);
ret = count;
- goto done;
-
-share_err:
- clear_bit_inv(apqi, matrix_mdev->matrix.aqm);
done:
- mutex_unlock(&matrix_dev->lock);
+ release_update_locks_for_mdev(matrix_mdev);
+ mutex_unlock(&ap_perms_mutex);
return ret;
}
static DEVICE_ATTR_WO(assign_domain);
+static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long apqi,
+ struct ap_queue_table *qtable)
+{
+ unsigned long apid;
+ struct vfio_ap_queue *q;
+
+ for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) {
+ q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi);
+
+ if (q && qtable) {
+ if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) &&
+ test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm))
+ hash_add(qtable->queues, &q->mdev_qnode,
+ q->apqn);
+ }
+ }
+}
+
+static void vfio_ap_mdev_hot_unplug_domain(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long apqi)
+{
+ int loop_cursor;
+ struct vfio_ap_queue *q;
+ struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL);
+
+ hash_init(qtable->queues);
+ vfio_ap_mdev_unlink_domain(matrix_mdev, apqi, qtable);
+
+ if (test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) {
+ clear_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm);
+ vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ }
+
+ vfio_ap_mdev_reset_queues(qtable);
+
+ hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
+ vfio_ap_unlink_mdev_fr_queue(q);
+ hash_del(&q->mdev_qnode);
+ }
+
+ kfree(qtable);
+}
/**
* unassign_domain_store - parses the APQI from @buf and clears the
@@ -956,13 +1275,7 @@ static ssize_t unassign_domain_store(struct device *dev,
unsigned long apqi;
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
- mutex_lock(&matrix_dev->lock);
-
- /* If the KVM guest is running, disallow unassignment of domain */
- if (matrix_mdev->kvm) {
- ret = -EBUSY;
- goto done;
- }
+ get_update_locks_for_mdev(matrix_mdev);
ret = kstrtoul(buf, 0, &apqi);
if (ret)
@@ -974,10 +1287,11 @@ static ssize_t unassign_domain_store(struct device *dev,
}
clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm);
+ vfio_ap_mdev_hot_unplug_domain(matrix_mdev, apqi);
ret = count;
done:
- mutex_unlock(&matrix_dev->lock);
+ release_update_locks_for_mdev(matrix_mdev);
return ret;
}
static DEVICE_ATTR_WO(unassign_domain);
@@ -1004,13 +1318,7 @@ static ssize_t assign_control_domain_store(struct device *dev,
unsigned long id;
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
- mutex_lock(&matrix_dev->lock);
-
- /* If the KVM guest is running, disallow assignment of control domain */
- if (matrix_mdev->kvm) {
- ret = -EBUSY;
- goto done;
- }
+ get_update_locks_for_mdev(matrix_mdev);
ret = kstrtoul(buf, 0, &id);
if (ret)
@@ -1027,9 +1335,12 @@ static ssize_t assign_control_domain_store(struct device *dev,
* number of control domains that can be assigned.
*/
set_bit_inv(id, matrix_mdev->matrix.adm);
+ if (vfio_ap_mdev_filter_cdoms(matrix_mdev))
+ vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+
ret = count;
done:
- mutex_unlock(&matrix_dev->lock);
+ release_update_locks_for_mdev(matrix_mdev);
return ret;
}
static DEVICE_ATTR_WO(assign_control_domain);
@@ -1055,28 +1366,28 @@ static ssize_t unassign_control_domain_store(struct device *dev,
int ret;
unsigned long domid;
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
- unsigned long max_domid = matrix_mdev->matrix.adm_max;
- mutex_lock(&matrix_dev->lock);
-
- /* If a KVM guest is running, disallow unassignment of control domain */
- if (matrix_mdev->kvm) {
- ret = -EBUSY;
- goto done;
- }
+ get_update_locks_for_mdev(matrix_mdev);
ret = kstrtoul(buf, 0, &domid);
if (ret)
goto done;
- if (domid > max_domid) {
+
+ if (domid > matrix_mdev->matrix.adm_max) {
ret = -ENODEV;
goto done;
}
clear_bit_inv(domid, matrix_mdev->matrix.adm);
+
+ if (test_bit_inv(domid, matrix_mdev->shadow_apcb.adm)) {
+ clear_bit_inv(domid, matrix_mdev->shadow_apcb.adm);
+ vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ }
+
ret = count;
done:
- mutex_unlock(&matrix_dev->lock);
+ release_update_locks_for_mdev(matrix_mdev);
return ret;
}
static DEVICE_ATTR_WO(unassign_control_domain);
@@ -1092,40 +1403,36 @@ static ssize_t control_domains_show(struct device *dev,
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
unsigned long max_domid = matrix_mdev->matrix.adm_max;
- mutex_lock(&matrix_dev->lock);
+ mutex_lock(&matrix_dev->mdevs_lock);
for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) {
n = sprintf(bufpos, "%04lx\n", id);
bufpos += n;
nchars += n;
}
- mutex_unlock(&matrix_dev->lock);
+ mutex_unlock(&matrix_dev->mdevs_lock);
return nchars;
}
static DEVICE_ATTR_RO(control_domains);
-static ssize_t matrix_show(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t vfio_ap_mdev_matrix_show(struct ap_matrix *matrix, char *buf)
{
- struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
char *bufpos = buf;
unsigned long apid;
unsigned long apqi;
unsigned long apid1;
unsigned long apqi1;
- unsigned long napm_bits = matrix_mdev->matrix.apm_max + 1;
- unsigned long naqm_bits = matrix_mdev->matrix.aqm_max + 1;
+ unsigned long napm_bits = matrix->apm_max + 1;
+ unsigned long naqm_bits = matrix->aqm_max + 1;
int nchars = 0;
int n;
- apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits);
- apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits);
-
- mutex_lock(&matrix_dev->lock);
+ apid1 = find_first_bit_inv(matrix->apm, napm_bits);
+ apqi1 = find_first_bit_inv(matrix->aqm, naqm_bits);
if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) {
- for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
- for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
+ for_each_set_bit_inv(apid, matrix->apm, napm_bits) {
+ for_each_set_bit_inv(apqi, matrix->aqm,
naqm_bits) {
n = sprintf(bufpos, "%02lx.%04lx\n", apid,
apqi);
@@ -1134,25 +1441,50 @@ static ssize_t matrix_show(struct device *dev, struct device_attribute *attr,
}
}
} else if (apid1 < napm_bits) {
- for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
+ for_each_set_bit_inv(apid, matrix->apm, napm_bits) {
n = sprintf(bufpos, "%02lx.\n", apid);
bufpos += n;
nchars += n;
}
} else if (apqi1 < naqm_bits) {
- for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, naqm_bits) {
+ for_each_set_bit_inv(apqi, matrix->aqm, naqm_bits) {
n = sprintf(bufpos, ".%04lx\n", apqi);
bufpos += n;
nchars += n;
}
}
- mutex_unlock(&matrix_dev->lock);
+ return nchars;
+}
+
+static ssize_t matrix_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t nchars;
+ struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
+
+ mutex_lock(&matrix_dev->mdevs_lock);
+ nchars = vfio_ap_mdev_matrix_show(&matrix_mdev->matrix, buf);
+ mutex_unlock(&matrix_dev->mdevs_lock);
return nchars;
}
static DEVICE_ATTR_RO(matrix);
+static ssize_t guest_matrix_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ ssize_t nchars;
+ struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
+
+ mutex_lock(&matrix_dev->mdevs_lock);
+ nchars = vfio_ap_mdev_matrix_show(&matrix_mdev->shadow_apcb, buf);
+ mutex_unlock(&matrix_dev->mdevs_lock);
+
+ return nchars;
+}
+static DEVICE_ATTR_RO(guest_matrix);
+
static struct attribute *vfio_ap_mdev_attrs[] = {
&dev_attr_assign_adapter.attr,
&dev_attr_unassign_adapter.attr,
@@ -1162,6 +1494,7 @@ static struct attribute *vfio_ap_mdev_attrs[] = {
&dev_attr_unassign_control_domain.attr,
&dev_attr_control_domains.attr,
&dev_attr_matrix.attr,
+ &dev_attr_guest_matrix.attr,
NULL,
};
@@ -1194,26 +1527,20 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
up_write(&kvm->arch.crypto.pqap_hook_rwsem);
- mutex_lock(&kvm->lock);
- mutex_lock(&matrix_dev->lock);
+ get_update_locks_for_kvm(kvm);
list_for_each_entry(m, &matrix_dev->mdev_list, node) {
if (m != matrix_mdev && m->kvm == kvm) {
- mutex_unlock(&kvm->lock);
- mutex_unlock(&matrix_dev->lock);
+ release_update_locks_for_kvm(kvm);
return -EPERM;
}
}
kvm_get_kvm(kvm);
matrix_mdev->kvm = kvm;
- kvm_arch_crypto_set_masks(kvm,
- matrix_mdev->matrix.apm,
- matrix_mdev->matrix.aqm,
- matrix_mdev->matrix.adm);
+ vfio_ap_mdev_update_guest_apcb(matrix_mdev);
- mutex_unlock(&kvm->lock);
- mutex_unlock(&matrix_dev->lock);
+ release_update_locks_for_kvm(kvm);
}
return 0;
@@ -1243,36 +1570,36 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
kvm->arch.crypto.pqap_hook = NULL;
up_write(&kvm->arch.crypto.pqap_hook_rwsem);
- mutex_lock(&kvm->lock);
- mutex_lock(&matrix_dev->lock);
+ get_update_locks_for_kvm(kvm);
kvm_arch_crypto_clear_masks(kvm);
- vfio_ap_mdev_reset_queues(matrix_mdev);
+ vfio_ap_mdev_reset_queues(&matrix_mdev->qtable);
kvm_put_kvm(kvm);
matrix_mdev->kvm = NULL;
- mutex_unlock(&kvm->lock);
- mutex_unlock(&matrix_dev->lock);
+ release_update_locks_for_kvm(kvm);
}
}
static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
{
- struct device *dev;
+ struct ap_queue *queue;
struct vfio_ap_queue *q = NULL;
- dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
- &apqn, match_apqn);
- if (dev) {
- q = dev_get_drvdata(dev);
- put_device(dev);
- }
+ queue = ap_get_qdev(apqn);
+ if (!queue)
+ return NULL;
+
+ if (queue->ap_dev.device.driver == &matrix_dev->vfio_ap_drv->driver)
+ q = dev_get_drvdata(&queue->ap_dev.device);
+
+ put_device(&queue->ap_dev.device);
return q;
}
-int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q,
- unsigned int retry)
+static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q,
+ unsigned int retry)
{
struct ap_queue_status status;
int ret;
@@ -1280,9 +1607,9 @@ int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q,
if (!q)
return 0;
-
retry_zapq:
status = ap_zapq(q->apqn);
+ q->reset_rc = status.response_code;
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
ret = 0;
@@ -1297,12 +1624,17 @@ retry_zapq:
case AP_RESPONSE_Q_NOT_AVAIL:
case AP_RESPONSE_DECONFIGURED:
case AP_RESPONSE_CHECKSTOPPED:
- WARN_ON_ONCE(status.irq_enabled);
+ WARN_ONCE(status.irq_enabled,
+ "PQAP/ZAPQ for %02x.%04x failed with rc=%u while IRQ enabled",
+ AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
+ status.response_code);
ret = -EBUSY;
goto free_resources;
default:
/* things are really broken, give up */
- WARN(true, "PQAP/ZAPQ completed with invalid rc (%x)\n",
+ WARN(true,
+ "PQAP/ZAPQ for %02x.%04x failed with invalid rc=%u\n",
+ AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
status.response_code);
return -EIO;
}
@@ -1314,7 +1646,8 @@ retry_zapq:
msleep(20);
status = ap_tapq(q->apqn, NULL);
}
- WARN_ON_ONCE(retry2 <= 0);
+ WARN_ONCE(retry2 <= 0, "unable to verify reset of queue %02x.%04x",
+ AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn));
free_resources:
vfio_ap_free_aqic_resources(q);
@@ -1322,27 +1655,20 @@ free_resources:
return ret;
}
-static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev)
+static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable)
{
- int ret;
- int rc = 0;
- unsigned long apid, apqi;
+ int ret, loop_cursor, rc = 0;
struct vfio_ap_queue *q;
- for_each_set_bit_inv(apid, matrix_mdev->matrix.apm,
- matrix_mdev->matrix.apm_max + 1) {
- for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
- matrix_mdev->matrix.aqm_max + 1) {
- q = vfio_ap_find_queue(AP_MKQID(apid, apqi));
- ret = vfio_ap_mdev_reset_queue(q, 1);
- /*
- * Regardless whether a queue turns out to be busy, or
- * is not operational, we need to continue resetting
- * the remaining queues.
- */
- if (ret)
- rc = ret;
- }
+ hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
+ ret = vfio_ap_mdev_reset_queue(q, 1);
+ /*
+ * Regardless whether a queue turns out to be busy, or
+ * is not operational, we need to continue resetting
+ * the remaining queues.
+ */
+ if (ret)
+ rc = ret;
}
return rc;
@@ -1394,23 +1720,79 @@ static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev,
container_of(vdev, struct ap_matrix_mdev, vdev);
int ret;
- mutex_lock(&matrix_dev->lock);
+ mutex_lock(&matrix_dev->mdevs_lock);
switch (cmd) {
case VFIO_DEVICE_GET_INFO:
ret = vfio_ap_mdev_get_device_info(arg);
break;
case VFIO_DEVICE_RESET:
- ret = vfio_ap_mdev_reset_queues(matrix_mdev);
+ ret = vfio_ap_mdev_reset_queues(&matrix_mdev->qtable);
break;
default:
ret = -EOPNOTSUPP;
break;
}
- mutex_unlock(&matrix_dev->lock);
+ mutex_unlock(&matrix_dev->mdevs_lock);
return ret;
}
+static struct ap_matrix_mdev *vfio_ap_mdev_for_queue(struct vfio_ap_queue *q)
+{
+ struct ap_matrix_mdev *matrix_mdev;
+ unsigned long apid = AP_QID_CARD(q->apqn);
+ unsigned long apqi = AP_QID_QUEUE(q->apqn);
+
+ list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) {
+ if (test_bit_inv(apid, matrix_mdev->matrix.apm) &&
+ test_bit_inv(apqi, matrix_mdev->matrix.aqm))
+ return matrix_mdev;
+ }
+
+ return NULL;
+}
+
+static ssize_t status_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t nchars = 0;
+ struct vfio_ap_queue *q;
+ struct ap_matrix_mdev *matrix_mdev;
+ struct ap_device *apdev = to_ap_dev(dev);
+
+ mutex_lock(&matrix_dev->mdevs_lock);
+ q = dev_get_drvdata(&apdev->device);
+ matrix_mdev = vfio_ap_mdev_for_queue(q);
+
+ if (matrix_mdev) {
+ if (matrix_mdev->kvm)
+ nchars = scnprintf(buf, PAGE_SIZE, "%s\n",
+ AP_QUEUE_IN_USE);
+ else
+ nchars = scnprintf(buf, PAGE_SIZE, "%s\n",
+ AP_QUEUE_ASSIGNED);
+ } else {
+ nchars = scnprintf(buf, PAGE_SIZE, "%s\n",
+ AP_QUEUE_UNASSIGNED);
+ }
+
+ mutex_unlock(&matrix_dev->mdevs_lock);
+
+ return nchars;
+}
+
+static DEVICE_ATTR_RO(status);
+
+static struct attribute *vfio_queue_attrs[] = {
+ &dev_attr_status.attr,
+ NULL,
+};
+
+static const struct attribute_group vfio_queue_attr_group = {
+ .attrs = vfio_queue_attrs,
+};
+
static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
.open_device = vfio_ap_mdev_open_device,
.close_device = vfio_ap_mdev_close_device,
@@ -1455,3 +1837,432 @@ void vfio_ap_mdev_unregister(void)
mdev_unregister_device(&matrix_dev->device);
mdev_unregister_driver(&vfio_ap_matrix_driver);
}
+
+int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
+{
+ int ret;
+ struct vfio_ap_queue *q;
+ struct ap_matrix_mdev *matrix_mdev;
+
+ ret = sysfs_create_group(&apdev->device.kobj, &vfio_queue_attr_group);
+ if (ret)
+ return ret;
+
+ q = kzalloc(sizeof(*q), GFP_KERNEL);
+ if (!q)
+ return -ENOMEM;
+
+ q->apqn = to_ap_queue(&apdev->device)->qid;
+ q->saved_isc = VFIO_AP_ISC_INVALID;
+ matrix_mdev = get_update_locks_by_apqn(q->apqn);
+
+ if (matrix_mdev) {
+ vfio_ap_mdev_link_queue(matrix_mdev, q);
+
+ if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm,
+ matrix_mdev->matrix.aqm,
+ matrix_mdev))
+ vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ }
+ dev_set_drvdata(&apdev->device, q);
+ release_update_locks_for_mdev(matrix_mdev);
+
+ return 0;
+}
+
+void vfio_ap_mdev_remove_queue(struct ap_device *apdev)
+{
+ unsigned long apid, apqi;
+ struct vfio_ap_queue *q;
+ struct ap_matrix_mdev *matrix_mdev;
+
+ sysfs_remove_group(&apdev->device.kobj, &vfio_queue_attr_group);
+ q = dev_get_drvdata(&apdev->device);
+ get_update_locks_for_queue(q);
+ matrix_mdev = q->matrix_mdev;
+
+ if (matrix_mdev) {
+ vfio_ap_unlink_queue_fr_mdev(q);
+
+ apid = AP_QID_CARD(q->apqn);
+ apqi = AP_QID_QUEUE(q->apqn);
+
+ /*
+ * If the queue is assigned to the guest's APCB, then remove
+ * the adapter's APID from the APCB and hot it into the guest.
+ */
+ if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) &&
+ test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) {
+ clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
+ vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ }
+ }
+
+ vfio_ap_mdev_reset_queue(q, 1);
+ dev_set_drvdata(&apdev->device, NULL);
+ kfree(q);
+ release_update_locks_for_mdev(matrix_mdev);
+}
+
+/**
+ * vfio_ap_mdev_resource_in_use: check whether any of a set of APQNs is
+ * assigned to a mediated device under the control
+ * of the vfio_ap device driver.
+ *
+ * @apm: a bitmap specifying a set of APIDs comprising the APQNs to check.
+ * @aqm: a bitmap specifying a set of APQIs comprising the APQNs to check.
+ *
+ * Return:
+ * * -EADDRINUSE if one or more of the APQNs specified via @apm/@aqm are
+ * assigned to a mediated device under the control of the vfio_ap
+ * device driver.
+ * * Otherwise, return 0.
+ */
+int vfio_ap_mdev_resource_in_use(unsigned long *apm, unsigned long *aqm)
+{
+ int ret;
+
+ mutex_lock(&matrix_dev->guests_lock);
+ mutex_lock(&matrix_dev->mdevs_lock);
+ ret = vfio_ap_mdev_verify_no_sharing(apm, aqm);
+ mutex_unlock(&matrix_dev->mdevs_lock);
+ mutex_unlock(&matrix_dev->guests_lock);
+
+ return ret;
+}
+
+/**
+ * vfio_ap_mdev_hot_unplug_cfg - hot unplug the adapters, domains and control
+ * domains that have been removed from the host's
+ * AP configuration from a guest.
+ *
+ * @matrix_mdev: an ap_matrix_mdev object attached to a KVM guest.
+ * @aprem: the adapters that have been removed from the host's AP configuration
+ * @aqrem: the domains that have been removed from the host's AP configuration
+ * @cdrem: the control domains that have been removed from the host's AP
+ * configuration.
+ */
+static void vfio_ap_mdev_hot_unplug_cfg(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long *aprem,
+ unsigned long *aqrem,
+ unsigned long *cdrem)
+{
+ int do_hotplug = 0;
+
+ if (!bitmap_empty(aprem, AP_DEVICES)) {
+ do_hotplug |= bitmap_andnot(matrix_mdev->shadow_apcb.apm,
+ matrix_mdev->shadow_apcb.apm,
+ aprem, AP_DEVICES);
+ }
+
+ if (!bitmap_empty(aqrem, AP_DOMAINS)) {
+ do_hotplug |= bitmap_andnot(matrix_mdev->shadow_apcb.aqm,
+ matrix_mdev->shadow_apcb.aqm,
+ aqrem, AP_DEVICES);
+ }
+
+ if (!bitmap_empty(cdrem, AP_DOMAINS))
+ do_hotplug |= bitmap_andnot(matrix_mdev->shadow_apcb.adm,
+ matrix_mdev->shadow_apcb.adm,
+ cdrem, AP_DOMAINS);
+
+ if (do_hotplug)
+ vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+}
+
+/**
+ * vfio_ap_mdev_cfg_remove - determines which guests are using the adapters,
+ * domains and control domains that have been removed
+ * from the host AP configuration and unplugs them
+ * from those guests.
+ *
+ * @ap_remove: bitmap specifying which adapters have been removed from the host
+ * config.
+ * @aq_remove: bitmap specifying which domains have been removed from the host
+ * config.
+ * @cd_remove: bitmap specifying which control domains have been removed from
+ * the host config.
+ */
+static void vfio_ap_mdev_cfg_remove(unsigned long *ap_remove,
+ unsigned long *aq_remove,
+ unsigned long *cd_remove)
+{
+ struct ap_matrix_mdev *matrix_mdev;
+ DECLARE_BITMAP(aprem, AP_DEVICES);
+ DECLARE_BITMAP(aqrem, AP_DOMAINS);
+ DECLARE_BITMAP(cdrem, AP_DOMAINS);
+ int do_remove = 0;
+
+ list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) {
+ mutex_lock(&matrix_mdev->kvm->lock);
+ mutex_lock(&matrix_dev->mdevs_lock);
+
+ do_remove |= bitmap_and(aprem, ap_remove,
+ matrix_mdev->matrix.apm,
+ AP_DEVICES);
+ do_remove |= bitmap_and(aqrem, aq_remove,
+ matrix_mdev->matrix.aqm,
+ AP_DOMAINS);
+ do_remove |= bitmap_andnot(cdrem, cd_remove,
+ matrix_mdev->matrix.adm,
+ AP_DOMAINS);
+
+ if (do_remove)
+ vfio_ap_mdev_hot_unplug_cfg(matrix_mdev, aprem, aqrem,
+ cdrem);
+
+ mutex_unlock(&matrix_dev->mdevs_lock);
+ mutex_unlock(&matrix_mdev->kvm->lock);
+ }
+}
+
+/**
+ * vfio_ap_mdev_on_cfg_remove - responds to the removal of adapters, domains and
+ * control domains from the host AP configuration
+ * by unplugging them from the guests that are
+ * using them.
+ * @cur_config_info: the current host AP configuration information
+ * @prev_config_info: the previous host AP configuration information
+ */
+static void vfio_ap_mdev_on_cfg_remove(struct ap_config_info *cur_config_info,
+ struct ap_config_info *prev_config_info)
+{
+ int do_remove;
+ DECLARE_BITMAP(aprem, AP_DEVICES);
+ DECLARE_BITMAP(aqrem, AP_DOMAINS);
+ DECLARE_BITMAP(cdrem, AP_DOMAINS);
+
+ do_remove = bitmap_andnot(aprem,
+ (unsigned long *)prev_config_info->apm,
+ (unsigned long *)cur_config_info->apm,
+ AP_DEVICES);
+ do_remove |= bitmap_andnot(aqrem,
+ (unsigned long *)prev_config_info->aqm,
+ (unsigned long *)cur_config_info->aqm,
+ AP_DEVICES);
+ do_remove |= bitmap_andnot(cdrem,
+ (unsigned long *)prev_config_info->adm,
+ (unsigned long *)cur_config_info->adm,
+ AP_DEVICES);
+
+ if (do_remove)
+ vfio_ap_mdev_cfg_remove(aprem, aqrem, cdrem);
+}
+
+/**
+ * vfio_ap_filter_apid_by_qtype: filter APIDs from an AP mask for adapters that
+ * are older than AP type 10 (CEX4).
+ * @apm: a bitmap of the APIDs to examine
+ * @aqm: a bitmap of the APQIs of the queues to query for the AP type.
+ */
+static void vfio_ap_filter_apid_by_qtype(unsigned long *apm, unsigned long *aqm)
+{
+ bool apid_cleared;
+ struct ap_queue_status status;
+ unsigned long apid, apqi, info;
+ int qtype, qtype_mask = 0xff000000;
+
+ for_each_set_bit_inv(apid, apm, AP_DEVICES) {
+ apid_cleared = false;
+
+ for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) {
+ status = ap_test_queue(AP_MKQID(apid, apqi), 1, &info);
+ switch (status.response_code) {
+ /*
+ * According to the architecture in each case
+ * below, the queue's info should be filled.
+ */
+ case AP_RESPONSE_NORMAL:
+ case AP_RESPONSE_RESET_IN_PROGRESS:
+ case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
+ case AP_RESPONSE_BUSY:
+ qtype = info & qtype_mask;
+
+ /*
+ * The vfio_ap device driver only
+ * supports CEX4 and newer adapters, so
+ * remove the APID if the adapter is
+ * older than a CEX4.
+ */
+ if (qtype < AP_DEVICE_TYPE_CEX4) {
+ clear_bit_inv(apid, apm);
+ apid_cleared = true;
+ }
+
+ break;
+
+ default:
+ /*
+ * If we don't know the adapter type,
+ * clear its APID since it can't be
+ * determined whether the vfio_ap
+ * device driver supports it.
+ */
+ clear_bit_inv(apid, apm);
+ apid_cleared = true;
+ break;
+ }
+
+ /*
+ * If we've already cleared the APID from the apm, there
+ * is no need to continue examining the remainin AP
+ * queues to determine the type of the adapter.
+ */
+ if (apid_cleared)
+ continue;
+ }
+ }
+}
+
+/**
+ * vfio_ap_mdev_cfg_add - store bitmaps specifying the adapters, domains and
+ * control domains that have been added to the host's
+ * AP configuration for each matrix mdev to which they
+ * are assigned.
+ *
+ * @apm_add: a bitmap specifying the adapters that have been added to the AP
+ * configuration.
+ * @aqm_add: a bitmap specifying the domains that have been added to the AP
+ * configuration.
+ * @adm_add: a bitmap specifying the control domains that have been added to the
+ * AP configuration.
+ */
+static void vfio_ap_mdev_cfg_add(unsigned long *apm_add, unsigned long *aqm_add,
+ unsigned long *adm_add)
+{
+ struct ap_matrix_mdev *matrix_mdev;
+
+ if (list_empty(&matrix_dev->mdev_list))
+ return;
+
+ vfio_ap_filter_apid_by_qtype(apm_add, aqm_add);
+
+ list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) {
+ bitmap_and(matrix_mdev->apm_add,
+ matrix_mdev->matrix.apm, apm_add, AP_DEVICES);
+ bitmap_and(matrix_mdev->aqm_add,
+ matrix_mdev->matrix.aqm, aqm_add, AP_DOMAINS);
+ bitmap_and(matrix_mdev->adm_add,
+ matrix_mdev->matrix.adm, adm_add, AP_DEVICES);
+ }
+}
+
+/**
+ * vfio_ap_mdev_on_cfg_add - responds to the addition of adapters, domains and
+ * control domains to the host AP configuration
+ * by updating the bitmaps that specify what adapters,
+ * domains and control domains have been added so they
+ * can be hot plugged into the guest when the AP bus
+ * scan completes (see vfio_ap_on_scan_complete
+ * function).
+ * @cur_config_info: the current AP configuration information
+ * @prev_config_info: the previous AP configuration information
+ */
+static void vfio_ap_mdev_on_cfg_add(struct ap_config_info *cur_config_info,
+ struct ap_config_info *prev_config_info)
+{
+ bool do_add;
+ DECLARE_BITMAP(apm_add, AP_DEVICES);
+ DECLARE_BITMAP(aqm_add, AP_DOMAINS);
+ DECLARE_BITMAP(adm_add, AP_DOMAINS);
+
+ do_add = bitmap_andnot(apm_add,
+ (unsigned long *)cur_config_info->apm,
+ (unsigned long *)prev_config_info->apm,
+ AP_DEVICES);
+ do_add |= bitmap_andnot(aqm_add,
+ (unsigned long *)cur_config_info->aqm,
+ (unsigned long *)prev_config_info->aqm,
+ AP_DOMAINS);
+ do_add |= bitmap_andnot(adm_add,
+ (unsigned long *)cur_config_info->adm,
+ (unsigned long *)prev_config_info->adm,
+ AP_DOMAINS);
+
+ if (do_add)
+ vfio_ap_mdev_cfg_add(apm_add, aqm_add, adm_add);
+}
+
+/**
+ * vfio_ap_on_cfg_changed - handles notification of changes to the host AP
+ * configuration.
+ *
+ * @cur_cfg_info: the current host AP configuration
+ * @prev_cfg_info: the previous host AP configuration
+ */
+void vfio_ap_on_cfg_changed(struct ap_config_info *cur_cfg_info,
+ struct ap_config_info *prev_cfg_info)
+{
+ if (!cur_cfg_info || !prev_cfg_info)
+ return;
+
+ mutex_lock(&matrix_dev->guests_lock);
+
+ vfio_ap_mdev_on_cfg_remove(cur_cfg_info, prev_cfg_info);
+ vfio_ap_mdev_on_cfg_add(cur_cfg_info, prev_cfg_info);
+ memcpy(&matrix_dev->info, cur_cfg_info, sizeof(*cur_cfg_info));
+
+ mutex_unlock(&matrix_dev->guests_lock);
+}
+
+static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev)
+{
+ bool do_hotplug = false;
+ int filter_domains = 0;
+ int filter_adapters = 0;
+ DECLARE_BITMAP(apm, AP_DEVICES);
+ DECLARE_BITMAP(aqm, AP_DOMAINS);
+
+ mutex_lock(&matrix_mdev->kvm->lock);
+ mutex_lock(&matrix_dev->mdevs_lock);
+
+ filter_adapters = bitmap_and(apm, matrix_mdev->matrix.apm,
+ matrix_mdev->apm_add, AP_DEVICES);
+ filter_domains = bitmap_and(aqm, matrix_mdev->matrix.aqm,
+ matrix_mdev->aqm_add, AP_DOMAINS);
+
+ if (filter_adapters && filter_domains)
+ do_hotplug |= vfio_ap_mdev_filter_matrix(apm, aqm, matrix_mdev);
+ else if (filter_adapters)
+ do_hotplug |=
+ vfio_ap_mdev_filter_matrix(apm,
+ matrix_mdev->shadow_apcb.aqm,
+ matrix_mdev);
+ else
+ do_hotplug |=
+ vfio_ap_mdev_filter_matrix(matrix_mdev->shadow_apcb.apm,
+ aqm, matrix_mdev);
+
+ if (bitmap_intersects(matrix_mdev->matrix.adm, matrix_mdev->adm_add,
+ AP_DOMAINS))
+ do_hotplug |= vfio_ap_mdev_filter_cdoms(matrix_mdev);
+
+ if (do_hotplug)
+ vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+
+ mutex_unlock(&matrix_dev->mdevs_lock);
+ mutex_unlock(&matrix_mdev->kvm->lock);
+}
+
+void vfio_ap_on_scan_complete(struct ap_config_info *new_config_info,
+ struct ap_config_info *old_config_info)
+{
+ struct ap_matrix_mdev *matrix_mdev;
+
+ mutex_lock(&matrix_dev->guests_lock);
+
+ list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) {
+ if (bitmap_empty(matrix_mdev->apm_add, AP_DEVICES) &&
+ bitmap_empty(matrix_mdev->aqm_add, AP_DOMAINS) &&
+ bitmap_empty(matrix_mdev->adm_add, AP_DOMAINS))
+ continue;
+
+ vfio_ap_mdev_hot_plug_cfg(matrix_mdev);
+ bitmap_clear(matrix_mdev->apm_add, 0, AP_DEVICES);
+ bitmap_clear(matrix_mdev->aqm_add, 0, AP_DOMAINS);
+ bitmap_clear(matrix_mdev->adm_add, 0, AP_DOMAINS);
+ }
+
+ mutex_unlock(&matrix_dev->guests_lock);
+}
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index d912487175d3..d782cf463eab 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -19,6 +19,7 @@
#include <linux/mutex.h>
#include <linux/kvm_host.h>
#include <linux/vfio.h>
+#include <linux/hashtable.h>
#include "ap_bus.h"
@@ -32,20 +33,26 @@
* @available_instances: number of mediated matrix devices that can be created
* @info: the struct containing the output from the PQAP(QCI) instruction
* @mdev_list: the list of mediated matrix devices created
- * @lock: mutex for locking the AP matrix device. This lock will be
+ * @mdevs_lock: mutex for locking the AP matrix device. This lock will be
* taken every time we fiddle with state managed by the vfio_ap
* driver, be it using @mdev_list or writing the state of a
* single ap_matrix_mdev device. It's quite coarse but we don't
* expect much contention.
* @vfio_ap_drv: the vfio_ap device driver
+ * @guests_lock: mutex for controlling access to a guest that is using AP
+ * devices passed through by the vfio_ap device driver. This lock
+ * will be taken when the AP devices are plugged into or unplugged
+ * from a guest, and when an ap_matrix_mdev device is added to or
+ * removed from @mdev_list or the list is iterated.
*/
struct ap_matrix_dev {
struct device device;
atomic_t available_instances;
struct ap_config_info info;
struct list_head mdev_list;
- struct mutex lock;
+ struct mutex mdevs_lock; /* serializes access to each ap_matrix_mdev */
struct ap_driver *vfio_ap_drv;
+ struct mutex guests_lock; /* serializes access to each KVM guest */
};
extern struct ap_matrix_dev *matrix_dev;
@@ -75,24 +82,44 @@ struct ap_matrix {
};
/**
+ * struct ap_queue_table - a table of queue objects.
+ *
+ * @queues: a hashtable of queues (struct vfio_ap_queue).
+ */
+struct ap_queue_table {
+ DECLARE_HASHTABLE(queues, 8);
+};
+
+/**
* struct ap_matrix_mdev - Contains the data associated with a matrix mediated
* device.
* @vdev: the vfio device
* @node: allows the ap_matrix_mdev struct to be added to a list
* @matrix: the adapters, usage domains and control domains assigned to the
* mediated matrix device.
+ * @shadow_apcb: the shadow copy of the APCB field of the KVM guest's CRYCB
* @kvm: the struct holding guest's state
* @pqap_hook: the function pointer to the interception handler for the
* PQAP(AQIC) instruction.
* @mdev: the mediated device
+ * @qtable: table of queues (struct vfio_ap_queue) assigned to the mdev
+ * @apm_add: bitmap of APIDs added to the host's AP configuration
+ * @aqm_add: bitmap of APQIs added to the host's AP configuration
+ * @adm_add: bitmap of control domain numbers added to the host's AP
+ * configuration
*/
struct ap_matrix_mdev {
struct vfio_device vdev;
struct list_head node;
struct ap_matrix matrix;
+ struct ap_matrix shadow_apcb;
struct kvm *kvm;
crypto_hook pqap_hook;
struct mdev_device *mdev;
+ struct ap_queue_table qtable;
+ DECLARE_BITMAP(apm_add, AP_DEVICES);
+ DECLARE_BITMAP(aqm_add, AP_DOMAINS);
+ DECLARE_BITMAP(adm_add, AP_DOMAINS);
};
/**
@@ -102,6 +129,8 @@ struct ap_matrix_mdev {
* @saved_iova: the notification indicator byte (nib) address
* @apqn: the APQN of the AP queue device
* @saved_isc: the guest ISC registered with the GIB interface
+ * @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable
+ * @reset_rc: the status response code from the last reset of the queue
*/
struct vfio_ap_queue {
struct ap_matrix_mdev *matrix_mdev;
@@ -109,11 +138,21 @@ struct vfio_ap_queue {
int apqn;
#define VFIO_AP_ISC_INVALID 0xff
unsigned char saved_isc;
+ struct hlist_node mdev_qnode;
+ unsigned int reset_rc;
};
int vfio_ap_mdev_register(void);
void vfio_ap_mdev_unregister(void);
-int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q,
- unsigned int retry);
+
+int vfio_ap_mdev_probe_queue(struct ap_device *queue);
+void vfio_ap_mdev_remove_queue(struct ap_device *queue);
+
+int vfio_ap_mdev_resource_in_use(unsigned long *apm, unsigned long *aqm);
+
+void vfio_ap_on_cfg_changed(struct ap_config_info *new_config_info,
+ struct ap_config_info *old_config_info);
+void vfio_ap_on_scan_complete(struct ap_config_info *new_config_info,
+ struct ap_config_info *old_config_info);
#endif /* _VFIO_AP_PRIVATE_H_ */
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 2b9f5e9985e5..c7b056af9ef0 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -420,6 +420,7 @@ typedef struct elf64_shdr {
#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */
#define NT_S390_GS_BC 0x30c /* s390 guarded storage broadcast control block */
#define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */
+#define NT_S390_PV_CPU_DATA 0x30e /* s390 protvirt cpu dump data */
#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */
#define NT_ARM_TLS 0x401 /* ARM TLS register */
#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */
diff --git a/tools/testing/crypto/chacha20-s390/test-cipher.c b/tools/testing/crypto/chacha20-s390/test-cipher.c
index 34e8b855266f..8141d45df51a 100644
--- a/tools/testing/crypto/chacha20-s390/test-cipher.c
+++ b/tools/testing/crypto/chacha20-s390/test-cipher.c
@@ -252,29 +252,26 @@ static int __init chacha_s390_test_init(void)
memset(plain, 'a', data_size);
get_random_bytes(plain, (data_size > 256 ? 256 : data_size));
- cipher_generic = vmalloc(data_size);
+ cipher_generic = vzalloc(data_size);
if (!cipher_generic) {
pr_info("could not allocate cipher_generic buffer\n");
ret = -2;
goto out;
}
- memset(cipher_generic, 0, data_size);
- cipher_s390 = vmalloc(data_size);
+ cipher_s390 = vzalloc(data_size);
if (!cipher_s390) {
pr_info("could not allocate cipher_s390 buffer\n");
ret = -2;
goto out;
}
- memset(cipher_s390, 0, data_size);
- revert = vmalloc(data_size);
+ revert = vzalloc(data_size);
if (!revert) {
pr_info("could not allocate revert buffer\n");
ret = -2;
goto out;
}
- memset(revert, 0, data_size);
if (debug)
print_hex_dump(KERN_INFO, "src: ", DUMP_PREFIX_OFFSET,