diff options
Diffstat (limited to 'arch/s390')
75 files changed, 1381 insertions, 1045 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2167bce993ff..194824932a60 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -59,6 +59,7 @@ config KASAN_SHADOW_OFFSET config S390 def_bool y select ARCH_BINFMT_ELF_STATE + select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE @@ -162,7 +163,6 @@ config S390 select HAVE_LIVEPATCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select HAVE_MEMBLOCK_NODE_MAP select HAVE_MEMBLOCK_PHYS_MAP select MMU_GATHER_NO_GATHER select HAVE_MOD_ARCH_SPECIFIC @@ -696,7 +696,7 @@ menu "I/O subsystem" config QDIO def_tristate y prompt "QDIO support" - ---help--- + help This driver provides the Queued Direct I/O base support for IBM System z. diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 190527560b2c..761fe2b0b2f6 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -7,7 +7,7 @@ config S390_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL select DEBUG_FS - ---help--- + help Say Y here if you want to show the kernel pagetable layout in a debugfs file. This information is only useful for kernel developers who are working in architecture specific areas of the kernel. diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index aa738cad1338..d74a4c7d5df6 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -51,10 +51,9 @@ static struct platform_device *appldata_pdev; */ static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata"; static int appldata_timer_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); static int appldata_interval_handler(struct ctl_table *ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); static struct ctl_table_header *appldata_sysctl_header; static struct ctl_table appldata_table[] = { @@ -217,7 +216,7 @@ static void __appldata_vtimer_setup(int cmd) */ static int appldata_timer_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int timer_active = appldata_timer_active; int rc; @@ -250,7 +249,7 @@ appldata_timer_handler(struct ctl_table *ctl, int write, */ static int appldata_interval_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int interval = appldata_interval; int rc; @@ -280,7 +279,7 @@ appldata_interval_handler(struct ctl_table *ctl, int write, */ static int appldata_generic_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct appldata_ops *ops = NULL, *tmp_ops; struct list_head *lh; diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index e68136c3c23a..21c3147bd92a 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -29,10 +29,6 @@ * the structure version (product ID, see appldata_base.c) needs to be changed * as well and all documentation and z/VM applications using it must be * updated. - * - * The record layout is documented in the Linux for zSeries Device Drivers - * book: - * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml */ struct appldata_mem_data { u64 timestamp; diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 8bc14b0d1def..59c282ca002f 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -25,10 +25,6 @@ * This is accessed as binary data by z/VM. If changes to it can't be avoided, * the structure version (product ID, see appldata_base.c) needs to be changed * as well and all documentation and z/VM applications using it must be updated. - * - * The record layout is documented in the Linux for zSeries Device Drivers - * book: - * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml */ struct appldata_net_sum_data { u64 timestamp; diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 8bf46d705957..5503217366ec 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -32,10 +32,6 @@ * the structure version (product ID, see appldata_base.c) needs to be changed * as well and all documentation and z/VM applications using it must be * updated. - * - * The record layout is documented in the Linux for zSeries Device Drivers - * book: - * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml */ struct appldata_os_per_cpu { u32 per_cpu_user; /* timer ticks spent in user mode */ diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 357adad991d2..8e222a666025 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -2,12 +2,12 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/ctype.h> +#include <linux/pgtable.h> #include <asm/ebcdic.h> #include <asm/sclp.h> #include <asm/sections.h> #include <asm/boot_data.h> #include <asm/facility.h> -#include <asm/pgtable.h> #include <asm/uv.h> #include "boot.h" diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 5591243d673e..d4442163ffa9 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -2,8 +2,8 @@ /* * Copyright IBM Corp. 2019 */ +#include <linux/pgtable.h> #include <asm/mem_detect.h> -#include <asm/pgtable.h> #include <asm/cpacf.h> #include <asm/timex.h> #include <asm/sclp.h> diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index 7c15542d3685..698b1e6d3c14 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -27,7 +27,7 @@ #include "sha.h" -static int sha1_init(struct shash_desc *desc) +static int s390_sha1_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); @@ -42,7 +42,7 @@ static int sha1_init(struct shash_desc *desc) return 0; } -static int sha1_export(struct shash_desc *desc, void *out) +static int s390_sha1_export(struct shash_desc *desc, void *out) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); struct sha1_state *octx = out; @@ -53,7 +53,7 @@ static int sha1_export(struct shash_desc *desc, void *out) return 0; } -static int sha1_import(struct shash_desc *desc, const void *in) +static int s390_sha1_import(struct shash_desc *desc, const void *in) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); const struct sha1_state *ictx = in; @@ -67,11 +67,11 @@ static int sha1_import(struct shash_desc *desc, const void *in) static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, - .init = sha1_init, + .init = s390_sha1_init, .update = s390_sha_update, .final = s390_sha_final, - .export = sha1_export, - .import = sha1_import, + .export = s390_sha1_export, + .import = s390_sha1_import, .descsize = sizeof(struct s390_sha_ctx), .statesize = sizeof(struct sha1_state), .base = { diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index 865ce1cb86d5..3cfe1eb89838 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -11,6 +11,7 @@ #include <linux/device.h> #include <linux/mod_devicetable.h> +#include <asm/chsc.h> #include <asm/fcx.h> #include <asm/irq.h> #include <asm/schid.h> @@ -236,4 +237,8 @@ extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *); struct channel_path_desc_fmt0 *ccw_device_get_chp_desc(struct ccw_device *, int); u8 *ccw_device_get_util_str(struct ccw_device *cdev, int chp_idx); +int ccw_device_pnso(struct ccw_device *cdev, + struct chsc_pnso_area *pnso_area, + struct chsc_pnso_resume_token resume_token, + int cnc); #endif /* _S390_CCWDEV_H_ */ diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index 7293c139dd79..ad3acb1e882b 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -36,11 +36,6 @@ struct ccwgroup_device { * @set_online: function called when device is set online * @set_offline: function called when device is set offline * @shutdown: function called when device is shut down - * @prepare: prepare for pm state transition - * @complete: undo work done in @prepare - * @freeze: callback for freezing during hibernation snapshotting - * @thaw: undo work done in @freeze - * @restore: callback for restoring after hibernation * @driver: embedded driver structure * @ccw_driver: supported ccw_driver (optional) */ @@ -50,11 +45,6 @@ struct ccwgroup_driver { int (*set_online) (struct ccwgroup_device *); int (*set_offline) (struct ccwgroup_device *); void (*shutdown)(struct ccwgroup_device *); - int (*prepare) (struct ccwgroup_device *); - void (*complete) (struct ccwgroup_device *); - int (*freeze)(struct ccwgroup_device *); - int (*thaw) (struct ccwgroup_device *); - int (*restore)(struct ccwgroup_device *); struct device_driver driver; struct ccw_driver *ccw_driver; diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index 91e376b0d28c..6d01c96aeb5c 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -39,25 +39,6 @@ csum_partial(const void *buff, int len, __wsum sum) return sum; } -/* - * the same as csum_partial_copy, but copies from user space. - * - * here even more important to align src and dst on a 32-bit (or even - * better 64-bit) boundary - * - * Copy from userspace and compute checksum. - */ -static inline __wsum -csum_partial_copy_from_user(const void __user *src, void *dst, - int len, __wsum sum, - int *err_ptr) -{ - if (unlikely(copy_from_user(dst, src, len))) - *err_ptr = -EFAULT; - return csum_partial(dst, len, sum); -} - - static inline __wsum csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum) { diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/asm/chsc.h new file mode 100644 index 000000000000..36ce2d25a5fc --- /dev/null +++ b/arch/s390/include/asm/chsc.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2020 + * + * Author(s): Alexandra Winter <wintera@linux.ibm.com> + * + * Interface for Channel Subsystem Call + */ +#ifndef _ASM_S390_CHSC_H +#define _ASM_S390_CHSC_H + +#include <uapi/asm/chsc.h> + +/** + * struct chsc_pnso_naid_l2 - network address information descriptor + * @nit: Network interface token + * @addr_lnid: network address and logical network id (VLAN ID) + */ +struct chsc_pnso_naid_l2 { + u64 nit; + struct { u8 mac[6]; u16 lnid; } addr_lnid; +} __packed; + +struct chsc_pnso_resume_token { + u64 t1; + u64 t2; +} __packed; + +struct chsc_pnso_naihdr { + struct chsc_pnso_resume_token resume_token; + u32:32; + u32 instance; + u32:24; + u8 naids; + u32 reserved[3]; +} __packed; + +struct chsc_pnso_area { + struct chsc_header request; + u8:2; + u8 m:1; + u8:5; + u8:2; + u8 ssid:2; + u8 fmt:4; + u16 sch; + u8:8; + u8 cssid; + u16:16; + u8 oc; + u32:24; + struct chsc_pnso_resume_token resume_token; + u32 n:1; + u32:31; + u32 reserved[3]; + struct chsc_header response; + u32:32; + struct chsc_pnso_naihdr naihdr; + struct chsc_pnso_naid_l2 entries[0]; +} __packed __aligned(PAGE_SIZE); + +#endif /* _ASM_S390_CHSC_H */ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index de8f0bf5f238..60f9241e5e4a 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -9,8 +9,8 @@ #ifndef _ASM_S390_HUGETLB_H #define _ASM_S390_HUGETLB_H +#include <linux/pgtable.h> #include <asm/page.h> -#include <asm/pgtable.h> #define hugetlb_free_pgd_range free_pgd_range #define hugepages_supported() (MACHINE_HAS_EDAT1) @@ -21,13 +21,6 @@ pte_t huge_ptep_get(pte_t *ptep); pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); -static inline bool is_hugepage_only_range(struct mm_struct *mm, - unsigned long addr, - unsigned long len) -{ - return false; -} - /* * If the arch doesn't supply something else, assume that hugepage * size aligned regions are ok without further preparation. @@ -46,6 +39,7 @@ static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_arch_1, &page->flags); } +#define arch_clear_hugepage_flags arch_clear_hugepage_flags static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz) diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index 5a16f500515a..da014e4f8113 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -26,7 +26,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); #define IO_SPACE_LIMIT 0 -void __iomem *ioremap(unsigned long offset, unsigned long size); +void __iomem *ioremap(phys_addr_t addr, size_t size); void iounmap(volatile void __iomem *addr); static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index b63bd66404b8..7d5cfdda5277 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -21,6 +21,7 @@ struct ipl_parameter_block { struct ipl_pb0_common common; struct ipl_pb0_fcp fcp; struct ipl_pb0_ccw ccw; + struct ipl_pb0_nvme nvme; char raw[PAGE_SIZE - sizeof(struct ipl_pl_hdr)]; }; } __packed __aligned(PAGE_SIZE); @@ -30,6 +31,11 @@ struct ipl_parameter_block { #define IPL_BP_FCP_LEN (sizeof(struct ipl_pl_hdr) + \ sizeof(struct ipl_pb0_fcp)) #define IPL_BP0_FCP_LEN (sizeof(struct ipl_pb0_fcp)) + +#define IPL_BP_NVME_LEN (sizeof(struct ipl_pl_hdr) + \ + sizeof(struct ipl_pb0_nvme)) +#define IPL_BP0_NVME_LEN (sizeof(struct ipl_pb0_nvme)) + #define IPL_BP_CCW_LEN (sizeof(struct ipl_pl_hdr) + \ sizeof(struct ipl_pb0_ccw)) #define IPL_BP0_CCW_LEN (sizeof(struct ipl_pb0_ccw)) @@ -59,6 +65,7 @@ enum ipl_type { IPL_TYPE_FCP = 4, IPL_TYPE_FCP_DUMP = 8, IPL_TYPE_NSS = 16, + IPL_TYPE_NVME = 32, }; struct ipl_info @@ -74,6 +81,10 @@ struct ipl_info u64 lun; } fcp; struct { + u32 fid; + u32 nsid; + } nvme; + struct { char name[NSS_NAME_SIZE + 1]; } nss; } data; diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h index 70930fe5c496..89d6886040c8 100644 --- a/arch/s390/include/asm/kasan.h +++ b/arch/s390/include/asm/kasan.h @@ -2,8 +2,6 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H -#include <asm/pgtable.h> - #ifdef CONFIG_KASAN #define KASAN_SHADOW_SCALE_SHIFT 3 diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d6bcd34f3ec3..cee3cb6455a2 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -375,6 +375,8 @@ struct kvm_vcpu_stat { u64 halt_poll_invalid; u64 halt_no_poll_steal; u64 halt_wakeup; + u64 halt_poll_success_ns; + u64 halt_poll_fail_ns; u64 instruction_lctl; u64 instruction_lctlg; u64 instruction_stctl; @@ -971,17 +973,19 @@ struct kvm_arch_async_pf { unsigned long pfault_token; }; -bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); +bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu); void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); -void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, +bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); +static inline void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu) {} + void kvm_arch_crypto_clear_masks(struct kvm *kvm); void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, unsigned long *aqm, unsigned long *adm); diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index b160da8fa14b..5afee80cff58 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -99,7 +99,7 @@ int nmi_alloc_per_cpu(struct lowcore *lc); void nmi_free_per_cpu(struct lowcore *lc); void s390_handle_mcck(void); -void s390_do_machine_check(struct pt_regs *regs); +int s390_do_machine_check(struct pt_regs *regs); #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_NMI_H */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 7485ee561fec..99b92c3e46b0 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -22,12 +22,17 @@ int pci_domain_nr(struct pci_bus *); int pci_proc_domain(struct pci_bus *); #define ZPCI_BUS_NR 0 /* default bus number */ -#define ZPCI_DEVFN 0 /* default device number */ #define ZPCI_NR_DMA_SPACES 1 #define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS #define ZPCI_DOMAIN_BITMAP_SIZE (1 << 16) +#ifdef PCI +#if (ZPCI_NR_DEVICES > ZPCI_DOMAIN_BITMAP_SIZE) +# error ZPCI_NR_DEVICES can not be bigger than ZPCI_DOMAIN_BITMAP_SIZE +#endif +#endif /* PCI */ + /* PCI Function Controls */ #define ZPCI_FC_FN_ENABLED 0x80 #define ZPCI_FC_ERROR 0x40 @@ -94,10 +99,26 @@ struct zpci_bar_struct { struct s390_domain; +#define ZPCI_FUNCTIONS_PER_BUS 256 +struct zpci_bus { + struct kref kref; + struct pci_bus *bus; + struct zpci_dev *function[ZPCI_FUNCTIONS_PER_BUS]; + struct list_head resources; + struct list_head bus_next; + struct resource bus_resource; + int pchid; + int domain_nr; + bool multifunction; + enum pci_bus_speed max_bus_speed; +}; + /* Private data per function */ struct zpci_dev { - struct pci_bus *bus; + struct zpci_bus *zbus; struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ + struct list_head bus_next; + struct kref kref; struct hotplug_slot hotplug_slot; enum zpci_state state; @@ -107,7 +128,12 @@ struct zpci_dev { u16 pchid; /* physical channel ID */ u8 pfgid; /* function group ID */ u8 pft; /* pci function type */ - u16 domain; + u8 port; + u8 rid_available : 1; + u8 has_hp_slot : 1; + u8 is_physfn : 1; + u8 reserved : 5; + unsigned int devfn; /* DEVFN part of the RID*/ struct mutex lock; u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ @@ -167,6 +193,7 @@ static inline bool zdev_enabled(struct zpci_dev *zdev) extern const struct attribute_group *zpci_attr_groups[]; extern unsigned int s390_pci_force_floating __initdata; +extern unsigned int s390_pci_no_rid; /* ----------------------------------------------------------------------------- Prototypes @@ -227,7 +254,14 @@ static inline void zpci_exit_slot(struct zpci_dev *zdev) {} /* Helpers */ static inline struct zpci_dev *to_zpci(struct pci_dev *pdev) { - return pdev->sysdata; + struct zpci_bus *zbus = pdev->sysdata; + + return zbus->function[pdev->devfn]; +} + +static inline struct zpci_dev *to_zpci_dev(struct device *dev) +{ + return to_zpci(to_pci_dev(dev)); } struct zpci_dev *get_zdev_by_fid(u32); diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h index bd2cb4ea7d93..eb51272dd2cc 100644 --- a/arch/s390/include/asm/pci_clp.h +++ b/arch/s390/include/asm/pci_clp.h @@ -93,7 +93,10 @@ struct clp_req_query_pci { struct clp_rsp_query_pci { struct clp_rsp_hdr hdr; u16 vfn; /* virtual fn number */ - u16 : 6; + u16 : 3; + u16 rid_avail : 1; + u16 is_physfn : 1; + u16 reserved1 : 1; u16 mio_addr_avail : 1; u16 util_str_avail : 1; /* utility string available? */ u16 pfgid : 8; /* pci function group id */ @@ -102,12 +105,16 @@ struct clp_rsp_query_pci { u16 pchid; __le32 bar[PCI_STD_NUM_BARS]; u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ - u32 : 16; + u16 : 12; + u16 port : 4; u8 fmb_len; u8 pft; /* pci function type */ u64 sdma; /* start dma as */ u64 edma; /* end dma as */ - u32 reserved[11]; +#define ZPCI_RID_MASK_DEVFN 0x00ff + u16 rid; /* BUS/DEVFN PCI address */ + u16 reserved0; + u32 reserved[10]; u32 uid; /* user defined id */ u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */ u32 reserved2[16]; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 6076c8c912d2..19d603bd1f36 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1229,7 +1229,6 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) #define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1)) #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) -#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) #define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) #define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) @@ -1260,7 +1259,6 @@ static inline pgd_t *pgd_offset_raw(pgd_t *pgd, unsigned long address) } #define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address) -#define pgd_offset_k(address) pgd_offset(&init_mm, address) static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) { @@ -1275,6 +1273,7 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) return (pud_t *) p4d_deref(*p4d) + pud_index(address); return (pud_t *) p4d; } +#define pud_offset pud_offset static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) { @@ -1282,17 +1281,13 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) return (pmd_t *) pud_deref(*pud) + pmd_index(address); return (pmd_t *) pud; } +#define pmd_offset pmd_offset -static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address) +static inline unsigned long pmd_page_vaddr(pmd_t pmd) { - return (pte_t *) pmd_deref(*pmd) + pte_index(address); + return (unsigned long) pmd_deref(pmd); } -#define pte_offset_kernel(pmd, address) pte_offset(pmd, address) -#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) - -static inline void pte_unmap(pte_t *pte) { } - static inline bool gup_fast_permitted(unsigned long start, unsigned long end) { return end <= current->mm->context.asce_limit; @@ -1560,7 +1555,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, } #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL -static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, +static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp, int full) { @@ -1569,7 +1564,7 @@ static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); return pmd; } - return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); + return pmdp_xchg_lazy(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); } #define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH @@ -1683,6 +1678,4 @@ extern void s390_reset_cmma(struct mm_struct *mm); #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN -#include <asm-generic/pgtable.h> - #endif /* _S390_PAGE_H */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 555d148ccf32..962da04234af 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -14,17 +14,15 @@ #include <linux/bits.h> -#define CIF_MCCK_PENDING 0 /* machine check handling is pending */ -#define CIF_ASCE_PRIMARY 1 /* primary asce needs fixup / uaccess */ -#define CIF_ASCE_SECONDARY 2 /* secondary asce needs fixup / uaccess */ -#define CIF_NOHZ_DELAY 3 /* delay HZ disable for a tick */ -#define CIF_FPU 4 /* restore FPU registers */ -#define CIF_IGNORE_IRQ 5 /* ignore interrupt (for udelay) */ -#define CIF_ENABLED_WAIT 6 /* in enabled wait state */ -#define CIF_MCCK_GUEST 7 /* machine check happening in guest */ -#define CIF_DEDICATED_CPU 8 /* this CPU is dedicated */ - -#define _CIF_MCCK_PENDING BIT(CIF_MCCK_PENDING) +#define CIF_ASCE_PRIMARY 0 /* primary asce needs fixup / uaccess */ +#define CIF_ASCE_SECONDARY 1 /* secondary asce needs fixup / uaccess */ +#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ +#define CIF_FPU 3 /* restore FPU registers */ +#define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */ +#define CIF_ENABLED_WAIT 5 /* in enabled wait state */ +#define CIF_MCCK_GUEST 6 /* machine check happening in guest */ +#define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */ + #define _CIF_ASCE_PRIMARY BIT(CIF_ASCE_PRIMARY) #define _CIF_ASCE_SECONDARY BIT(CIF_ASCE_SECONDARY) #define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY) diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 86a3796e9be8..e69dbf438f99 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -365,34 +365,6 @@ struct qdio_initialize { struct qdio_outbuf_state *output_sbal_state_array; }; -/** - * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc() - * @l3_ipv6_addr: entry contains IPv6 address - * @l3_ipv4_addr: entry contains IPv4 address - * @l2_addr_lnid: entry contains MAC address and VLAN ID - */ -enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid}; - -/** - * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc() - * @nit: Network interface token - * @addr: Address of one of the three types - * - * The struct is passed to the callback function by qdio_brinfo_desc() - */ -struct qdio_brinfo_entry_l3_ipv6 { - u64 nit; - struct { unsigned char _s6_addr[16]; } addr; -} __packed; -struct qdio_brinfo_entry_l3_ipv4 { - u64 nit; - struct { uint32_t _s_addr; } addr; -} __packed; -struct qdio_brinfo_entry_l2 { - u64 nit; - struct { u8 mac[6]; u16 lnid; } addr_lnid; -} __packed; - #define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */ #define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */ #define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */ @@ -423,10 +395,5 @@ extern int qdio_inspect_queue(struct ccw_device *cdev, unsigned int nr, extern int qdio_shutdown(struct ccw_device *, int); extern int qdio_free(struct ccw_device *); extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *); -extern int qdio_pnso_brinfo(struct subchannel_id schid, - int cnc, u16 *response, - void (*cb)(void *priv, enum qdio_brinfo_entry_type type, - void *entry), - void *priv); #endif /* __QDIO_H__ */ diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 231a51e870fe..7326f110d48c 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -58,5 +58,6 @@ extern int smp_rescan_cpus(void); extern void __noreturn cpu_die(void); extern void __cpu_die(unsigned int cpu); extern int __cpu_disable(void); +extern void schedule_mcck_handler(void); #endif /* __ASM_SMP_H */ diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 82703e03f35d..2204704840ea 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -6,7 +6,6 @@ #include <linux/sched.h> #include <asm/processor.h> #include <asm/pgalloc.h> -#include <asm/pgtable.h> /* * Flush all TLB entries on the local CPU. diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index a470f1fa9f2a..324438889fe1 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -276,6 +276,6 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo } int copy_to_user_real(void __user *dest, void *src, unsigned long count); -void s390_kernel_write(void *dst, const void *src, size_t size); +void *s390_kernel_write(void *dst, const void *src, size_t size); #endif /* __S390_UACCESS_H */ diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h index 451ba7d08905..d1ecd5d722a0 100644 --- a/arch/s390/include/uapi/asm/ipl.h +++ b/arch/s390/include/uapi/asm/ipl.h @@ -27,6 +27,7 @@ enum ipl_pbt { IPL_PBT_FCP = 0, IPL_PBT_SCP_DATA = 1, IPL_PBT_CCW = 2, + IPL_PBT_NVME = 4, }; /* IPL Parameter Block 0 with common fields */ @@ -67,6 +68,30 @@ struct ipl_pb0_fcp { #define IPL_PB0_FCP_OPT_IPL 0x10 #define IPL_PB0_FCP_OPT_DUMP 0x20 +/* IPL Parameter Block 0 for NVMe */ +struct ipl_pb0_nvme { + __u32 len; + __u8 pbt; + __u8 reserved1[3]; + __u8 loadparm[8]; + __u8 reserved2[304]; + __u8 opt; + __u8 reserved3[3]; + __u32 fid; + __u8 reserved4[12]; + __u32 nsid; + __u8 reserved5[4]; + __u32 bootprog; + __u8 reserved6[12]; + __u64 br_lba; + __u32 scp_data_len; + __u8 reserved7[260]; + __u8 scp_data[]; +} __packed; + +#define IPL_PB0_NVME_OPT_IPL 0x10 +#define IPL_PB0_NVME_OPT_DUMP 0x20 + /* IPL Parameter Block 0 for CCW */ struct ipl_pb0_ccw { __u32 len; diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 75f26d775027..a8f136943deb 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -33,11 +33,6 @@ CFLAGS_stacktrace.o += -fno-optimize-sibling-calls CFLAGS_dumpstack.o += -fno-optimize-sibling-calls CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls -# -# Pass UTS_MACHINE for user_regset definition -# -CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' - obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index e80f0e6f5972..165031bd3370 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -11,9 +11,9 @@ #include <linux/kvm_host.h> #include <linux/sched.h> #include <linux/purgatory.h> +#include <linux/pgtable.h> #include <asm/idle.h> #include <asm/vdso.h> -#include <asm/pgtable.h> #include <asm/gmap.h> #include <asm/nmi.h> #include <asm/stacktrace.h> diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 6d321f5f101d..636446003a06 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -867,7 +867,7 @@ static int debug_active = 1; * if debug_active is already off */ static int s390dbf_procactive(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { if (!write || debug_stoppable || !debug_active) return proc_dointvec(table, write, buffer, lenp, ppos); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 2c122d8bab93..0dc4b258b98d 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -126,15 +126,16 @@ unknown: return -EINVAL; } -void show_stack(struct task_struct *task, unsigned long *stack) +void show_stack(struct task_struct *task, unsigned long *stack, + const char *loglvl) { struct unwind_state state; - printk("Call Trace:\n"); + printk("%sCall Trace:\n", loglvl); unwind_for_each_frame(&state, task, NULL, (unsigned long) stack) - printk(state.reliable ? " [<%016lx>] %pSR \n" : - "([<%016lx>] %pSR)\n", - state.ip, (void *) state.ip); + printk(state.reliable ? "%s [<%016lx>] %pSR \n" : + "%s([<%016lx>] %pSR)\n", + loglvl, state.ip, (void *) state.ip); debug_show_held_locks(task ? : current); } @@ -175,7 +176,7 @@ void show_regs(struct pt_regs *regs) show_registers(regs); /* Show stack backtrace if pt_regs is from kernel mode */ if (!user_mode(regs)) - show_stack(NULL, (unsigned long *) regs->gprs[15]); + show_stack(NULL, (unsigned long *) regs->gprs[15], KERN_DEFAULT); show_last_breaking_event(regs); } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3ae64914bd14..50ff6dd0f995 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -55,14 +55,11 @@ _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) -_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \ - _CIF_ASCE_SECONDARY | _CIF_FPU) +_CIF_WORK = (_CIF_ASCE_PRIMARY | _CIF_ASCE_SECONDARY | _CIF_FPU) _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) _LPP_OFFSET = __LC_LPP -#define BASED(name) name-cleanup_critical(%r13) - .macro TRACE_IRQS_ON #ifdef CONFIG_TRACE_IRQFLAGS basr %r2,%r0 @@ -116,17 +113,39 @@ _LPP_OFFSET = __LC_LPP .macro SWITCH_ASYNC savearea,timer tmhh %r8,0x0001 # interrupting from user ? jnz 2f +#if IS_ENABLED(CONFIG_KVM) lgr %r14,%r9 - cghi %r14,__LC_RETURN_LPSWE - je 0f - slg %r14,BASED(.Lcritical_start) - clg %r14,BASED(.Lcritical_length) - jhe 1f -0: + larl %r13,.Lsie_gmap + slgr %r14,%r13 + lghi %r13,.Lsie_done - .Lsie_gmap + clgr %r14,%r13 + jhe 0f lghi %r11,\savearea # inside critical section, do cleanup - brasl %r14,cleanup_critical - tmhh %r8,0x0001 # retest problem state after cleanup - jnz 2f + brasl %r14,.Lcleanup_sie +#endif +0: larl %r13,.Lpsw_idle_exit + cgr %r13,%r9 + jne 1f + + mvc __CLOCK_IDLE_EXIT(8,%r2), __LC_INT_CLOCK + mvc __TIMER_IDLE_EXIT(8,%r2), __LC_ASYNC_ENTER_TIMER + # account system time going idle + ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT + + lg %r13,__LC_STEAL_TIMER + alg %r13,__CLOCK_IDLE_ENTER(%r2) + slg %r13,__LC_LAST_UPDATE_CLOCK + stg %r13,__LC_STEAL_TIMER + + mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2) + + lg %r13,__LC_SYSTEM_TIMER + alg %r13,__LC_LAST_UPDATE_TIMER + slg %r13,__TIMER_IDLE_ENTER(%r2) + stg %r13,__LC_SYSTEM_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) + + nihh %r8,0xfcfd # clear wait state and irq bits 1: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? slgr %r14,%r15 srag %r14,%r14,STACK_SHIFT @@ -152,12 +171,30 @@ _LPP_OFFSET = __LC_LPP mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer .endm - .macro REENABLE_IRQS + .macro RESTORE_SM_CLEAR_PER stg %r8,__LC_RETURN_PSW ni __LC_RETURN_PSW,0xbf ssm __LC_RETURN_PSW .endm + .macro ENABLE_INTS + stosm __SF_EMPTY(%r15),3 + .endm + + .macro ENABLE_INTS_TRACE + TRACE_IRQS_ON + ENABLE_INTS + .endm + + .macro DISABLE_INTS + stnsm __SF_EMPTY(%r15),0xfc + .endm + + .macro DISABLE_INTS_TRACE + DISABLE_INTS + TRACE_IRQS_OFF + .endm + .macro STCK savearea #ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES .insn s,0xb27c0000,\savearea # store clock fast @@ -254,8 +291,6 @@ ENTRY(__switch_to) BR_EX %r14 ENDPROC(__switch_to) -.L__critical_start: - #if IS_ENABLED(CONFIG_KVM) /* * sie64a calling convention: @@ -288,7 +323,6 @@ ENTRY(sie64a) BPEXIT __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_entry: sie 0(%r14) -.Lsie_exit: BPOFF BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_skip: @@ -341,7 +375,6 @@ EXPORT_SYMBOL(sie_exit) ENTRY(system_call) stpt __LC_SYNC_ENTER_TIMER -.Lsysc_stmg: stmg %r8,%r15,__LC_SAVE_AREA_SYNC BPOFF lg %r12,__LC_CURRENT @@ -350,7 +383,6 @@ ENTRY(system_call) .Lsysc_per: lg %r15,__LC_KERNEL_STACK la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs -.Lsysc_vtime: UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP stmg %r0,%r7,__PT_R0(%r11) @@ -358,6 +390,7 @@ ENTRY(system_call) mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC stg %r14,__PT_FLAGS(%r11) + ENABLE_INTS .Lsysc_do_svc: # clear user controlled register to prevent speculative use xgr %r0,%r0 @@ -393,26 +426,26 @@ ENTRY(system_call) jnz .Lsysc_work TSTMSK __TI_flags(%r12),_TIF_WORK jnz .Lsysc_work # check for work - TSTMSK __LC_CPU_FLAGS,_CIF_WORK + TSTMSK __LC_CPU_FLAGS,(_CIF_WORK-_CIF_FPU) jnz .Lsysc_work BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP .Lsysc_restore: + DISABLE_INTS + TSTMSK __LC_CPU_FLAGS, _CIF_FPU + jz .Lsysc_skip_fpu + brasl %r14,load_fpu_regs +.Lsysc_skip_fpu: lg %r14,__LC_VDSO_PER_CPU - lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) -.Lsysc_exit_timer: stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER - lmg %r11,%r15,__PT_R11(%r11) - b __LC_RETURN_LPSWE(%r0) -.Lsysc_done: + lmg %r0,%r15,__PT_R0(%r11) + b __LC_RETURN_LPSWE # # One of the work bits is on. Find out which one. # .Lsysc_work: - TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING - jo .Lsysc_mcck_pending TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED jo .Lsysc_reschedule TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART @@ -436,11 +469,9 @@ ENTRY(system_call) jo .Lsysc_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lsysc_notify_resume - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - jo .Lsysc_vxrs TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) jnz .Lsysc_asce - j .Lsysc_return # beware of critical section cleanup + j .Lsysc_return # # _TIF_NEED_RESCHED is set, call schedule @@ -450,13 +481,6 @@ ENTRY(system_call) jg schedule # -# _CIF_MCCK_PENDING is set, call handler -# -.Lsysc_mcck_pending: - larl %r14,.Lsysc_return - jg s390_handle_mcck # TIF bit will be cleared by handler - -# # _CIF_ASCE_PRIMARY and/or _CIF_ASCE_SECONDARY set, load user space asce # .Lsysc_asce: @@ -475,12 +499,6 @@ ENTRY(system_call) larl %r14,.Lsysc_return jg set_fs_fixup -# -# CIF_FPU is set, restore floating-point controls and floating-point registers. -# -.Lsysc_vxrs: - larl %r14,.Lsysc_return - jg load_fpu_regs # # _TIF_SIGPENDING is set, call do_signal @@ -564,7 +582,6 @@ ENTRY(system_call) jnh .Lsysc_tracenogo sllg %r8,%r2,3 lg %r9,0(%r8,%r10) -.Lsysc_tracego: lmg %r3,%r7,__PT_R3(%r11) stg %r7,STACK_FRAME_OVERHEAD(%r15) lg %r2,__PT_ORIG_GPR2(%r11) @@ -585,8 +602,6 @@ ENTRY(ret_from_fork) la %r11,STACK_FRAME_OVERHEAD(%r15) lg %r12,__LC_CURRENT brasl %r14,schedule_tail - TRACE_IRQS_ON - ssm __LC_SVC_NEW_PSW # reenable interrupts tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ? jne .Lsysc_tracenogo # it's a kernel thread @@ -620,15 +635,16 @@ ENTRY(pgm_check_handler) lghi %r10,1 0: lg %r12,__LC_CURRENT lghi %r11,0 - larl %r13,cleanup_critical lmg %r8,%r9,__LC_PGM_OLD_PSW tmhh %r8,0x0001 # test problem state bit jnz 3f # -> fault in user space #if IS_ENABLED(CONFIG_KVM) # cleanup critical section for program checks in sie64a lgr %r14,%r9 - slg %r14,BASED(.Lsie_critical_start) - clg %r14,BASED(.Lsie_critical_length) + larl %r13,.Lsie_gmap + slgr %r14,%r13 + lghi %r13,.Lsie_done - .Lsie_gmap + clgr %r14,%r13 jhe 1f lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE @@ -680,7 +696,7 @@ ENTRY(pgm_check_handler) mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID -6: REENABLE_IRQS +6: RESTORE_SM_CLEAR_PER xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) larl %r1,pgm_check_table llgh %r10,__PT_INT_CODE+2(%r11) @@ -702,7 +718,7 @@ ENTRY(pgm_check_handler) # PER event in supervisor state, must be kprobes # .Lpgm_kprobe: - REENABLE_IRQS + RESTORE_SM_CLEAR_PER xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,do_per_trap @@ -713,11 +729,10 @@ ENTRY(pgm_check_handler) # .Lpgm_svcper: mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW - lghi %r13,__TASK_thread larl %r14,.Lsysc_per stg %r14,__LC_RETURN_PSW+8 lghi %r14,_PIF_SYSCALL | _PIF_PER_TRAP - lpswe __LC_RETURN_PSW # branch to .Lsysc_per and enable irqs + lpswe __LC_RETURN_PSW # branch to .Lsysc_per ENDPROC(pgm_check_handler) /* @@ -729,7 +744,6 @@ ENTRY(io_int_handler) BPOFF stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT - larl %r13,cleanup_critical lmg %r8,%r9,__LC_IO_OLD_PSW SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER stmg %r0,%r7,__PT_R0(%r11) @@ -749,7 +763,12 @@ ENTRY(io_int_handler) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ jo .Lio_restore +#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) + tmhh %r8,0x300 + jz 1f TRACE_IRQS_OFF +1: +#endif xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) .Lio_loop: lgr %r2,%r11 # pass pointer to pt_regs @@ -767,25 +786,27 @@ ENTRY(io_int_handler) j .Lio_loop .Lio_return: LOCKDEP_SYS_EXIT - TRACE_IRQS_ON -.Lio_tif: TSTMSK __TI_flags(%r12),_TIF_WORK jnz .Lio_work # there is work to do (signals etc.) TSTMSK __LC_CPU_FLAGS,_CIF_WORK jnz .Lio_work .Lio_restore: +#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) + tm __PT_PSW(%r11),3 + jno 0f + TRACE_IRQS_ON +0: +#endif lg %r14,__LC_VDSO_PER_CPU - lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) tm __PT_PSW+1(%r11),0x01 # returning to user ? jno .Lio_exit_kernel BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP -.Lio_exit_timer: stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER .Lio_exit_kernel: - lmg %r11,%r15,__PT_R11(%r11) - b __LC_RETURN_LPSWE(%r0) + lmg %r0,%r15,__PT_R0(%r11) + b __LC_RETURN_LPSWE .Lio_done: # @@ -813,9 +834,6 @@ ENTRY(io_int_handler) xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 - # TRACE_IRQS_ON already done at .Lio_return, call - # TRACE_IRQS_OFF to keep things symmetrical - TRACE_IRQS_OFF brasl %r14,preempt_schedule_irq j .Lio_return #else @@ -835,9 +853,6 @@ ENTRY(io_int_handler) # # One of the work bits is on. Find out which one. # -.Lio_work_tif: - TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING - jo .Lio_mcck_pending TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED jo .Lio_reschedule #ifdef CONFIG_LIVEPATCH @@ -854,15 +869,6 @@ ENTRY(io_int_handler) jo .Lio_vxrs TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) jnz .Lio_asce - j .Lio_return # beware of critical section cleanup - -# -# _CIF_MCCK_PENDING is set, call handler -# -.Lio_mcck_pending: - # TRACE_IRQS_ON already done at .Lio_return - brasl %r14,s390_handle_mcck # TIF bit will be cleared by handler - TRACE_IRQS_OFF j .Lio_return # @@ -895,23 +901,19 @@ ENTRY(io_int_handler) # _TIF_GUARDED_STORAGE is set, call guarded_storage_load # .Lio_guarded_storage: - # TRACE_IRQS_ON already done at .Lio_return - ssm __LC_SVC_NEW_PSW # reenable interrupts + ENABLE_INTS_TRACE lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,gs_load_bc_cb - ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts - TRACE_IRQS_OFF + DISABLE_INTS_TRACE j .Lio_return # # _TIF_NEED_RESCHED is set, call schedule # .Lio_reschedule: - # TRACE_IRQS_ON already done at .Lio_return - ssm __LC_SVC_NEW_PSW # reenable interrupts + ENABLE_INTS_TRACE brasl %r14,schedule # call scheduler - ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts - TRACE_IRQS_OFF + DISABLE_INTS_TRACE j .Lio_return # @@ -928,24 +930,20 @@ ENTRY(io_int_handler) # _TIF_SIGPENDING or is set, call do_signal # .Lio_sigpending: - # TRACE_IRQS_ON already done at .Lio_return - ssm __LC_SVC_NEW_PSW # reenable interrupts + ENABLE_INTS_TRACE lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,do_signal - ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts - TRACE_IRQS_OFF + DISABLE_INTS_TRACE j .Lio_return # # _TIF_NOTIFY_RESUME or is set, call do_notify_resume # .Lio_notify_resume: - # TRACE_IRQS_ON already done at .Lio_return - ssm __LC_SVC_NEW_PSW # reenable interrupts + ENABLE_INTS_TRACE lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,do_notify_resume - ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts - TRACE_IRQS_OFF + DISABLE_INTS_TRACE j .Lio_return ENDPROC(io_int_handler) @@ -958,7 +956,6 @@ ENTRY(ext_int_handler) BPOFF stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r12,__LC_CURRENT - larl %r13,cleanup_critical lmg %r8,%r9,__LC_EXT_OLD_PSW SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER stmg %r0,%r7,__PT_R0(%r11) @@ -981,7 +978,12 @@ ENTRY(ext_int_handler) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ jo .Lio_restore +#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) + tmhh %r8,0x300 + jz 1f TRACE_IRQS_OFF +1: +#endif xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs lghi %r3,EXT_INTERRUPT @@ -990,11 +992,11 @@ ENTRY(ext_int_handler) ENDPROC(ext_int_handler) /* - * Load idle PSW. The second "half" of this function is in .Lcleanup_idle. + * Load idle PSW. */ ENTRY(psw_idle) stg %r3,__SF_EMPTY(%r15) - larl %r1,.Lpsw_idle_lpsw+4 + larl %r1,.Lpsw_idle_exit stg %r1,__SF_EMPTY+8(%r15) larl %r1,smp_cpu_mtid llgf %r1,0(%r1) @@ -1006,10 +1008,9 @@ ENTRY(psw_idle) BPON STCK __CLOCK_IDLE_ENTER(%r2) stpt __TIMER_IDLE_ENTER(%r2) -.Lpsw_idle_lpsw: lpswe __SF_EMPTY(%r15) +.Lpsw_idle_exit: BR_EX %r14 -.Lpsw_idle_end: ENDPROC(psw_idle) /* @@ -1020,6 +1021,7 @@ ENDPROC(psw_idle) * of the register contents at return from io or a system call. */ ENTRY(save_fpu_regs) + stnsm __SF_EMPTY(%r15),0xfc lg %r2,__LC_CURRENT aghi %r2,__TASK_thread TSTMSK __LC_CPU_FLAGS,_CIF_FPU @@ -1051,6 +1053,7 @@ ENTRY(save_fpu_regs) .Lsave_fpu_regs_done: oi __LC_CPU_FLAGS+7,_CIF_FPU .Lsave_fpu_regs_exit: + ssm __SF_EMPTY(%r15) BR_EX %r14 .Lsave_fpu_regs_end: ENDPROC(save_fpu_regs) @@ -1102,8 +1105,6 @@ load_fpu_regs: .Lload_fpu_regs_end: ENDPROC(load_fpu_regs) -.L__critical_end: - /* * Machine check handler routines */ @@ -1116,7 +1117,6 @@ ENTRY(mcck_int_handler) lam %a0,%a15,__LC_AREGS_SAVE_AREA-4095(%r1) # validate acrs lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs lg %r12,__LC_CURRENT - larl %r13,cleanup_critical lmg %r8,%r9,__LC_MCK_OLD_PSW TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE jo .Lmcck_panic # yes -> rest of mcck code invalid @@ -1202,15 +1202,13 @@ ENTRY(mcck_int_handler) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,s390_do_machine_check - tm __PT_PSW+1(%r11),0x01 # returning to user ? - jno .Lmcck_return + cghi %r2,0 + je .Lmcck_return lg %r1,__LC_KERNEL_STACK # switch to kernel stack mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11) xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 - TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING - jno .Lmcck_return TRACE_IRQS_OFF brasl %r14,s390_handle_mcck TRACE_IRQS_ON @@ -1280,265 +1278,23 @@ ENTRY(stack_overflow) ENDPROC(stack_overflow) #endif -ENTRY(cleanup_critical) - cghi %r9,__LC_RETURN_LPSWE - je .Lcleanup_lpswe -#if IS_ENABLED(CONFIG_KVM) - clg %r9,BASED(.Lcleanup_table_sie) # .Lsie_gmap - jl 0f - clg %r9,BASED(.Lcleanup_table_sie+8)# .Lsie_done - jl .Lcleanup_sie -#endif - clg %r9,BASED(.Lcleanup_table) # system_call - jl 0f - clg %r9,BASED(.Lcleanup_table+8) # .Lsysc_do_svc - jl .Lcleanup_system_call - clg %r9,BASED(.Lcleanup_table+16) # .Lsysc_tif - jl 0f - clg %r9,BASED(.Lcleanup_table+24) # .Lsysc_restore - jl .Lcleanup_sysc_tif - clg %r9,BASED(.Lcleanup_table+32) # .Lsysc_done - jl .Lcleanup_sysc_restore - clg %r9,BASED(.Lcleanup_table+40) # .Lio_tif - jl 0f - clg %r9,BASED(.Lcleanup_table+48) # .Lio_restore - jl .Lcleanup_io_tif - clg %r9,BASED(.Lcleanup_table+56) # .Lio_done - jl .Lcleanup_io_restore - clg %r9,BASED(.Lcleanup_table+64) # psw_idle - jl 0f - clg %r9,BASED(.Lcleanup_table+72) # .Lpsw_idle_end - jl .Lcleanup_idle - clg %r9,BASED(.Lcleanup_table+80) # save_fpu_regs - jl 0f - clg %r9,BASED(.Lcleanup_table+88) # .Lsave_fpu_regs_end - jl .Lcleanup_save_fpu_regs - clg %r9,BASED(.Lcleanup_table+96) # load_fpu_regs - jl 0f - clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end - jl .Lcleanup_load_fpu_regs -0: BR_EX %r14,%r11 -ENDPROC(cleanup_critical) - - .align 8 -.Lcleanup_table: - .quad system_call - .quad .Lsysc_do_svc - .quad .Lsysc_tif - .quad .Lsysc_restore - .quad .Lsysc_done - .quad .Lio_tif - .quad .Lio_restore - .quad .Lio_done - .quad psw_idle - .quad .Lpsw_idle_end - .quad save_fpu_regs - .quad .Lsave_fpu_regs_end - .quad load_fpu_regs - .quad .Lload_fpu_regs_end - #if IS_ENABLED(CONFIG_KVM) -.Lcleanup_table_sie: - .quad .Lsie_gmap - .quad .Lsie_done - .Lcleanup_sie: - cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt? - je 1f - slg %r9,BASED(.Lsie_crit_mcck_start) - clg %r9,BASED(.Lsie_crit_mcck_length) - jh 1f - oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST -1: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) + cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt? + je 1f + larl %r13,.Lsie_entry + slgr %r9,%r13 + larl %r13,.Lsie_skip + clgr %r9,%r13 + jh 1f + oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST +1: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit BR_EX %r14,%r11 -#endif -.Lcleanup_system_call: - # check if stpt has been executed - clg %r9,BASED(.Lcleanup_system_call_insn) - jh 0f - mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER - cghi %r11,__LC_SAVE_AREA_ASYNC - je 0f - mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER -0: # check if stmg has been executed - clg %r9,BASED(.Lcleanup_system_call_insn+8) - jh 0f - mvc __LC_SAVE_AREA_SYNC(64),0(%r11) -0: # check if base register setup + TIF bit load has been done - clg %r9,BASED(.Lcleanup_system_call_insn+16) - jhe 0f - # set up saved register r12 task struct pointer - stg %r12,32(%r11) - # set up saved register r13 __TASK_thread offset - mvc 40(8,%r11),BASED(.Lcleanup_system_call_const) -0: # check if the user time update has been done - clg %r9,BASED(.Lcleanup_system_call_insn+24) - jh 0f - lg %r15,__LC_EXIT_TIMER - slg %r15,__LC_SYNC_ENTER_TIMER - alg %r15,__LC_USER_TIMER - stg %r15,__LC_USER_TIMER -0: # check if the system time update has been done - clg %r9,BASED(.Lcleanup_system_call_insn+32) - jh 0f - lg %r15,__LC_LAST_UPDATE_TIMER - slg %r15,__LC_EXIT_TIMER - alg %r15,__LC_SYSTEM_TIMER - stg %r15,__LC_SYSTEM_TIMER -0: # update accounting time stamp - mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP - # set up saved register r11 - lg %r15,__LC_KERNEL_STACK - la %r9,STACK_FRAME_OVERHEAD(%r15) - stg %r9,24(%r11) # r11 pt_regs pointer - # fill pt_regs - mvc __PT_R8(64,%r9),__LC_SAVE_AREA_SYNC - stmg %r0,%r7,__PT_R0(%r9) - mvc __PT_PSW(16,%r9),__LC_SVC_OLD_PSW - mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC - xc __PT_FLAGS(8,%r9),__PT_FLAGS(%r9) - mvi __PT_FLAGS+7(%r9),_PIF_SYSCALL - # setup saved register r15 - stg %r15,56(%r11) # r15 stack pointer - # set new psw address and exit - larl %r9,.Lsysc_do_svc - BR_EX %r14,%r11 -.Lcleanup_system_call_insn: - .quad system_call - .quad .Lsysc_stmg - .quad .Lsysc_per - .quad .Lsysc_vtime+36 - .quad .Lsysc_vtime+42 -.Lcleanup_system_call_const: - .quad __TASK_thread - -.Lcleanup_sysc_tif: - larl %r9,.Lsysc_tif - BR_EX %r14,%r11 - -.Lcleanup_sysc_restore: - # check if stpt has been executed - clg %r9,BASED(.Lcleanup_sysc_restore_insn) - jh 0f - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER - cghi %r11,__LC_SAVE_AREA_ASYNC - je 0f - mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER -0: clg %r9,BASED(.Lcleanup_sysc_restore_insn+8) - je 1f - lg %r9,24(%r11) # get saved pointer to pt_regs - mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) - mvc 0(64,%r11),__PT_R8(%r9) - lmg %r0,%r7,__PT_R0(%r9) -.Lcleanup_lpswe: -1: lmg %r8,%r9,__LC_RETURN_PSW - BR_EX %r14,%r11 -.Lcleanup_sysc_restore_insn: - .quad .Lsysc_exit_timer - .quad .Lsysc_done - 4 - -.Lcleanup_io_tif: - larl %r9,.Lio_tif - BR_EX %r14,%r11 - -.Lcleanup_io_restore: - # check if stpt has been executed - clg %r9,BASED(.Lcleanup_io_restore_insn) - jh 0f - mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER -0: clg %r9,BASED(.Lcleanup_io_restore_insn+8) - je 1f - lg %r9,24(%r11) # get saved r11 pointer to pt_regs - mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) - mvc 0(64,%r11),__PT_R8(%r9) - lmg %r0,%r7,__PT_R0(%r9) -1: lmg %r8,%r9,__LC_RETURN_PSW - BR_EX %r14,%r11 -.Lcleanup_io_restore_insn: - .quad .Lio_exit_timer - .quad .Lio_done - 4 - -.Lcleanup_idle: - ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT - # copy interrupt clock & cpu timer - mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK - mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER - cghi %r11,__LC_SAVE_AREA_ASYNC - je 0f - mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK - mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER -0: # check if stck & stpt have been executed - clg %r9,BASED(.Lcleanup_idle_insn) - jhe 1f - mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2) - mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2) -1: # calculate idle cycles - clg %r9,BASED(.Lcleanup_idle_insn) - jl 3f - larl %r1,smp_cpu_mtid - llgf %r1,0(%r1) - ltgr %r1,%r1 - jz 3f - .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15) - larl %r3,mt_cycles - ag %r3,__LC_PERCPU_OFFSET - la %r4,__SF_EMPTY+16(%r15) -2: lg %r0,0(%r3) - slg %r0,0(%r4) - alg %r0,64(%r4) - stg %r0,0(%r3) - la %r3,8(%r3) - la %r4,8(%r4) - brct %r1,2b -3: # account system time going idle - lg %r9,__LC_STEAL_TIMER - alg %r9,__CLOCK_IDLE_ENTER(%r2) - slg %r9,__LC_LAST_UPDATE_CLOCK - stg %r9,__LC_STEAL_TIMER - mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2) - lg %r9,__LC_SYSTEM_TIMER - alg %r9,__LC_LAST_UPDATE_TIMER - slg %r9,__TIMER_IDLE_ENTER(%r2) - stg %r9,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) - # prepare return psw - nihh %r8,0xfcfd # clear irq & wait state bits - lg %r9,48(%r11) # return from psw_idle - BR_EX %r14,%r11 -.Lcleanup_idle_insn: - .quad .Lpsw_idle_lpsw - -.Lcleanup_save_fpu_regs: - larl %r9,save_fpu_regs - BR_EX %r14,%r11 - -.Lcleanup_load_fpu_regs: - larl %r9,load_fpu_regs - BR_EX %r14,%r11 - -/* - * Integer constants - */ - .align 8 -.Lcritical_start: - .quad .L__critical_start -.Lcritical_length: - .quad .L__critical_end - .L__critical_start -#if IS_ENABLED(CONFIG_KVM) -.Lsie_critical_start: - .quad .Lsie_gmap -.Lsie_critical_length: - .quad .Lsie_done - .Lsie_gmap -.Lsie_crit_mcck_start: - .quad .Lsie_entry -.Lsie_crit_mcck_length: - .quad .Lsie_skip - .Lsie_entry #endif .section .rodata, "a" #define SYSCALL(esame,emu) .quad __s390x_ ## esame diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 4cd9b1ada834..44e01dd1e624 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -72,22 +72,6 @@ static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn) #endif } -static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn) -{ -#ifdef CONFIG_KPROBES - insn->opc = BREAKPOINT_INSTRUCTION; - insn->disp = KPROBE_ON_FTRACE_NOP; -#endif -} - -static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn) -{ -#ifdef CONFIG_KPROBES - insn->opc = BREAKPOINT_INSTRUCTION; - insn->disp = KPROBE_ON_FTRACE_CALL; -#endif -} - int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 8f8456816d83..0d7fbdfe995a 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -24,19 +24,19 @@ void enabled_wait(void) { struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); unsigned long long idle_time; - unsigned long psw_mask; + unsigned long psw_mask, flags; - trace_hardirqs_on(); /* Wait for external, I/O or machine check interrupt. */ psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; clear_cpu_flag(CIF_NOHZ_DELAY); + local_irq_save(flags); /* Call the assembler magic in entry.S */ psw_idle(idle, psw_mask); + local_irq_restore(flags); - trace_hardirqs_off(); /* Account time spent with enabled wait psw loaded as idle time. */ write_seqcount_begin(&idle->seqcount); @@ -118,22 +118,16 @@ u64 arch_cpu_idle_time(int cpu) void arch_cpu_idle_enter(void) { - local_mcck_disable(); } void arch_cpu_idle(void) { - if (!test_cpu_flag(CIF_MCCK_PENDING)) - /* Halt the cpu and keep track of cpu time accounting. */ - enabled_wait(); + enabled_wait(); local_irq_enable(); } void arch_cpu_idle_exit(void) { - local_mcck_enable(); - if (test_cpu_flag(CIF_MCCK_PENDING)) - s390_handle_mcck(); } void arch_cpu_idle_dead(void) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 4a71061974fd..ccea9a245867 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -39,6 +39,7 @@ #define IPL_CCW_STR "ccw" #define IPL_FCP_STR "fcp" #define IPL_FCP_DUMP_STR "fcp_dump" +#define IPL_NVME_STR "nvme" #define IPL_NSS_STR "nss" #define DUMP_CCW_STR "ccw" @@ -93,6 +94,8 @@ static char *ipl_type_str(enum ipl_type type) return IPL_FCP_DUMP_STR; case IPL_TYPE_NSS: return IPL_NSS_STR; + case IPL_TYPE_NVME: + return IPL_NVME_STR; case IPL_TYPE_UNKNOWN: default: return IPL_UNKNOWN_STR; @@ -133,6 +136,7 @@ static int reipl_capabilities = IPL_TYPE_UNKNOWN; static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN; static struct ipl_parameter_block *reipl_block_fcp; +static struct ipl_parameter_block *reipl_block_nvme; static struct ipl_parameter_block *reipl_block_ccw; static struct ipl_parameter_block *reipl_block_nss; static struct ipl_parameter_block *reipl_block_actual; @@ -261,6 +265,8 @@ static __init enum ipl_type get_ipl_type(void) return IPL_TYPE_FCP_DUMP; else return IPL_TYPE_FCP; + case IPL_PBT_NVME: + return IPL_TYPE_NVME; } return IPL_TYPE_UNKNOWN; } @@ -317,6 +323,8 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj, case IPL_TYPE_FCP: case IPL_TYPE_FCP_DUMP: return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno); + case IPL_TYPE_NVME: + return sprintf(page, "%08ux\n", ipl_block.nvme.fid); default: return 0; } @@ -345,15 +353,35 @@ static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj, return memory_read_from_buffer(buf, count, &off, scp_data, size); } + +static ssize_t ipl_nvme_scp_data_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + unsigned int size = ipl_block.nvme.scp_data_len; + void *scp_data = &ipl_block.nvme.scp_data; + + return memory_read_from_buffer(buf, count, &off, scp_data, size); +} + static struct bin_attribute ipl_scp_data_attr = __BIN_ATTR(scp_data, S_IRUGO, ipl_scp_data_read, NULL, PAGE_SIZE); +static struct bin_attribute ipl_nvme_scp_data_attr = + __BIN_ATTR(scp_data, S_IRUGO, ipl_nvme_scp_data_read, NULL, PAGE_SIZE); + static struct bin_attribute *ipl_fcp_bin_attrs[] = { &ipl_parameter_attr, &ipl_scp_data_attr, NULL, }; +static struct bin_attribute *ipl_nvme_bin_attrs[] = { + &ipl_parameter_attr, + &ipl_nvme_scp_data_attr, + NULL, +}; + /* FCP ipl device attributes */ DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n", @@ -365,6 +393,16 @@ DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n", DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n", (unsigned long long)ipl_block.fcp.br_lba); +/* NVMe ipl device attributes */ +DEFINE_IPL_ATTR_RO(ipl_nvme, fid, "0x%08llx\n", + (unsigned long long)ipl_block.nvme.fid); +DEFINE_IPL_ATTR_RO(ipl_nvme, nsid, "0x%08llx\n", + (unsigned long long)ipl_block.nvme.nsid); +DEFINE_IPL_ATTR_RO(ipl_nvme, bootprog, "%lld\n", + (unsigned long long)ipl_block.nvme.bootprog); +DEFINE_IPL_ATTR_RO(ipl_nvme, br_lba, "%lld\n", + (unsigned long long)ipl_block.nvme.br_lba); + static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { @@ -399,6 +437,24 @@ static struct attribute_group ipl_fcp_attr_group = { .bin_attrs = ipl_fcp_bin_attrs, }; +static struct attribute *ipl_nvme_attrs[] = { + &sys_ipl_type_attr.attr, + &sys_ipl_nvme_fid_attr.attr, + &sys_ipl_nvme_nsid_attr.attr, + &sys_ipl_nvme_bootprog_attr.attr, + &sys_ipl_nvme_br_lba_attr.attr, + &sys_ipl_ccw_loadparm_attr.attr, + &sys_ipl_secure_attr.attr, + &sys_ipl_has_secure_attr.attr, + NULL, +}; + +static struct attribute_group ipl_nvme_attr_group = { + .attrs = ipl_nvme_attrs, + .bin_attrs = ipl_nvme_bin_attrs, +}; + + /* CCW ipl device attributes */ static struct attribute *ipl_ccw_attrs_vm[] = { @@ -474,6 +530,9 @@ static int __init ipl_init(void) case IPL_TYPE_FCP_DUMP: rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group); break; + case IPL_TYPE_NVME: + rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group); + break; default: rc = sysfs_create_group(&ipl_kset->kobj, &ipl_unknown_attr_group); @@ -727,6 +786,93 @@ static struct attribute_group reipl_fcp_attr_group = { static struct kobj_attribute sys_reipl_fcp_clear_attr = __ATTR(clear, 0644, reipl_fcp_clear_show, reipl_fcp_clear_store); +/* NVME reipl device attributes */ + +static ssize_t reipl_nvme_scpdata_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + size_t size = reipl_block_nvme->nvme.scp_data_len; + void *scp_data = reipl_block_nvme->nvme.scp_data; + + return memory_read_from_buffer(buf, count, &off, scp_data, size); +} + +static ssize_t reipl_nvme_scpdata_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + size_t scpdata_len = count; + size_t padding; + + if (off) + return -EINVAL; + + memcpy(reipl_block_nvme->nvme.scp_data, buf, count); + if (scpdata_len % 8) { + padding = 8 - (scpdata_len % 8); + memset(reipl_block_nvme->nvme.scp_data + scpdata_len, + 0, padding); + scpdata_len += padding; + } + + reipl_block_nvme->hdr.len = IPL_BP_FCP_LEN + scpdata_len; + reipl_block_nvme->nvme.len = IPL_BP0_FCP_LEN + scpdata_len; + reipl_block_nvme->nvme.scp_data_len = scpdata_len; + + return count; +} + +static struct bin_attribute sys_reipl_nvme_scp_data_attr = + __BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_nvme_scpdata_read, + reipl_nvme_scpdata_write, DIAG308_SCPDATA_SIZE); + +static struct bin_attribute *reipl_nvme_bin_attrs[] = { + &sys_reipl_nvme_scp_data_attr, + NULL, +}; + +DEFINE_IPL_ATTR_RW(reipl_nvme, fid, "0x%08llx\n", "%llx\n", + reipl_block_nvme->nvme.fid); +DEFINE_IPL_ATTR_RW(reipl_nvme, nsid, "0x%08llx\n", "%llx\n", + reipl_block_nvme->nvme.nsid); +DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n", + reipl_block_nvme->nvme.bootprog); +DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n", + reipl_block_nvme->nvme.br_lba); + +/* nvme wrapper */ +static ssize_t reipl_nvme_loadparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return reipl_generic_loadparm_show(reipl_block_nvme, page); +} + +static ssize_t reipl_nvme_loadparm_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return reipl_generic_loadparm_store(reipl_block_nvme, buf, len); +} + +static struct kobj_attribute sys_reipl_nvme_loadparm_attr = + __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nvme_loadparm_show, + reipl_nvme_loadparm_store); + +static struct attribute *reipl_nvme_attrs[] = { + &sys_reipl_nvme_fid_attr.attr, + &sys_reipl_nvme_nsid_attr.attr, + &sys_reipl_nvme_bootprog_attr.attr, + &sys_reipl_nvme_br_lba_attr.attr, + &sys_reipl_nvme_loadparm_attr.attr, + NULL, +}; + +static struct attribute_group reipl_nvme_attr_group = { + .attrs = reipl_nvme_attrs, + .bin_attrs = reipl_nvme_bin_attrs +}; + /* CCW reipl device attributes */ DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw); @@ -891,6 +1037,9 @@ static int reipl_set_type(enum ipl_type type) case IPL_TYPE_FCP: reipl_block_actual = reipl_block_fcp; break; + case IPL_TYPE_NVME: + reipl_block_actual = reipl_block_nvme; + break; case IPL_TYPE_NSS: reipl_block_actual = reipl_block_nss; break; @@ -917,6 +1066,8 @@ static ssize_t reipl_type_store(struct kobject *kobj, rc = reipl_set_type(IPL_TYPE_CCW); else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0) rc = reipl_set_type(IPL_TYPE_FCP); + else if (strncmp(buf, IPL_NVME_STR, strlen(IPL_NVME_STR)) == 0) + rc = reipl_set_type(IPL_TYPE_NVME); else if (strncmp(buf, IPL_NSS_STR, strlen(IPL_NSS_STR)) == 0) rc = reipl_set_type(IPL_TYPE_NSS); return (rc != 0) ? rc : len; @@ -927,6 +1078,7 @@ static struct kobj_attribute reipl_type_attr = static struct kset *reipl_kset; static struct kset *reipl_fcp_kset; +static struct kset *reipl_nvme_kset; static void __reipl_run(void *unused) { @@ -945,6 +1097,10 @@ static void __reipl_run(void *unused) else diag308(DIAG308_LOAD_NORMAL, NULL); break; + case IPL_TYPE_NVME: + diag308(DIAG308_SET, reipl_block_nvme); + diag308(DIAG308_LOAD_CLEAR, NULL); + break; case IPL_TYPE_NSS: diag308(DIAG308_SET, reipl_block_nss); diag308(DIAG308_LOAD_CLEAR, NULL); @@ -1093,6 +1249,49 @@ out1: return rc; } +static int __init reipl_nvme_init(void) +{ + int rc; + + reipl_block_nvme = (void *) get_zeroed_page(GFP_KERNEL); + if (!reipl_block_nvme) + return -ENOMEM; + + /* sysfs: create kset for mixing attr group and bin attrs */ + reipl_nvme_kset = kset_create_and_add(IPL_NVME_STR, NULL, + &reipl_kset->kobj); + if (!reipl_nvme_kset) { + free_page((unsigned long) reipl_block_nvme); + return -ENOMEM; + } + + rc = sysfs_create_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group); + if (rc) { + kset_unregister(reipl_nvme_kset); + free_page((unsigned long) reipl_block_nvme); + return rc; + } + + if (ipl_info.type == IPL_TYPE_NVME) { + memcpy(reipl_block_nvme, &ipl_block, sizeof(ipl_block)); + /* + * Fix loadparm: There are systems where the (SCSI) LOADPARM + * is invalid in the IPL parameter block, so take it + * always from sclp_ipl_info. + */ + memcpy(reipl_block_nvme->nvme.loadparm, sclp_ipl_info.loadparm, + LOADPARM_LEN); + } else { + reipl_block_nvme->hdr.len = IPL_BP_NVME_LEN; + reipl_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION; + reipl_block_nvme->nvme.len = IPL_BP0_NVME_LEN; + reipl_block_nvme->nvme.pbt = IPL_PBT_NVME; + reipl_block_nvme->nvme.opt = IPL_PB0_NVME_OPT_IPL; + } + reipl_capabilities |= IPL_TYPE_NVME; + return 0; +} + static int __init reipl_type_init(void) { enum ipl_type reipl_type = ipl_info.type; @@ -1108,6 +1307,9 @@ static int __init reipl_type_init(void) if (reipl_block->pb0_hdr.pbt == IPL_PBT_FCP) { memcpy(reipl_block_fcp, reipl_block, size); reipl_type = IPL_TYPE_FCP; + } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_NVME) { + memcpy(reipl_block_nvme, reipl_block, size); + reipl_type = IPL_TYPE_NVME; } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_CCW) { memcpy(reipl_block_ccw, reipl_block, size); reipl_type = IPL_TYPE_CCW; @@ -1134,6 +1336,9 @@ static int __init reipl_init(void) rc = reipl_fcp_init(); if (rc) return rc; + rc = reipl_nvme_init(); + if (rc) + return rc; rc = reipl_nss_init(); if (rc) return rc; @@ -1750,6 +1955,10 @@ void __init setup_ipl(void) ipl_info.data.fcp.wwpn = ipl_block.fcp.wwpn; ipl_info.data.fcp.lun = ipl_block.fcp.lun; break; + case IPL_TYPE_NVME: + ipl_info.data.nvme.fid = ipl_block.nvme.fid; + ipl_info.data.nvme.nsid = ipl_block.nvme.nsid; + break; case IPL_TYPE_NSS: case IPL_TYPE_UNKNOWN: /* We have no info to copy */ diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 3a854cb5a4c6..93c6b8932fbd 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -16,7 +16,6 @@ #include <linux/debug_locks.h> #include <asm/cio.h> #include <asm/setup.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/smp.h> #include <asm/ipl.h> diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index ba8f19bb438b..4055f1c49814 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -19,6 +19,7 @@ #include <linux/kasan.h> #include <linux/moduleloader.h> #include <linux/bug.h> +#include <linux/memory.h> #include <asm/alternative.h> #include <asm/nospec-branch.h> #include <asm/facility.h> @@ -174,10 +175,12 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, } static int apply_rela_bits(Elf_Addr loc, Elf_Addr val, - int sign, int bits, int shift) + int sign, int bits, int shift, + void *(*write)(void *dest, const void *src, size_t len)) { unsigned long umax; long min, max; + void *dest = (void *)loc; if (val & ((1UL << shift) - 1)) return -ENOEXEC; @@ -194,26 +197,33 @@ static int apply_rela_bits(Elf_Addr loc, Elf_Addr val, return -ENOEXEC; } - if (bits == 8) - *(unsigned char *) loc = val; - else if (bits == 12) - *(unsigned short *) loc = (val & 0xfff) | + if (bits == 8) { + unsigned char tmp = val; + write(dest, &tmp, 1); + } else if (bits == 12) { + unsigned short tmp = (val & 0xfff) | (*(unsigned short *) loc & 0xf000); - else if (bits == 16) - *(unsigned short *) loc = val; - else if (bits == 20) - *(unsigned int *) loc = (val & 0xfff) << 16 | - (val & 0xff000) >> 4 | - (*(unsigned int *) loc & 0xf00000ff); - else if (bits == 32) - *(unsigned int *) loc = val; - else if (bits == 64) - *(unsigned long *) loc = val; + write(dest, &tmp, 2); + } else if (bits == 16) { + unsigned short tmp = val; + write(dest, &tmp, 2); + } else if (bits == 20) { + unsigned int tmp = (val & 0xfff) << 16 | + (val & 0xff000) >> 4 | (*(unsigned int *) loc & 0xf00000ff); + write(dest, &tmp, 4); + } else if (bits == 32) { + unsigned int tmp = val; + write(dest, &tmp, 4); + } else if (bits == 64) { + unsigned long tmp = val; + write(dest, &tmp, 8); + } return 0; } static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, - const char *strtab, struct module *me) + const char *strtab, struct module *me, + void *(*write)(void *dest, const void *src, size_t len)) { struct mod_arch_syminfo *info; Elf_Addr loc, val; @@ -241,17 +251,17 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_64: /* Direct 64 bit. */ val += rela->r_addend; if (r_type == R_390_8) - rc = apply_rela_bits(loc, val, 0, 8, 0); + rc = apply_rela_bits(loc, val, 0, 8, 0, write); else if (r_type == R_390_12) - rc = apply_rela_bits(loc, val, 0, 12, 0); + rc = apply_rela_bits(loc, val, 0, 12, 0, write); else if (r_type == R_390_16) - rc = apply_rela_bits(loc, val, 0, 16, 0); + rc = apply_rela_bits(loc, val, 0, 16, 0, write); else if (r_type == R_390_20) - rc = apply_rela_bits(loc, val, 1, 20, 0); + rc = apply_rela_bits(loc, val, 1, 20, 0, write); else if (r_type == R_390_32) - rc = apply_rela_bits(loc, val, 0, 32, 0); + rc = apply_rela_bits(loc, val, 0, 32, 0, write); else if (r_type == R_390_64) - rc = apply_rela_bits(loc, val, 0, 64, 0); + rc = apply_rela_bits(loc, val, 0, 64, 0, write); break; case R_390_PC16: /* PC relative 16 bit. */ case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */ @@ -260,15 +270,15 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PC64: /* PC relative 64 bit. */ val += rela->r_addend - loc; if (r_type == R_390_PC16) - rc = apply_rela_bits(loc, val, 1, 16, 0); + rc = apply_rela_bits(loc, val, 1, 16, 0, write); else if (r_type == R_390_PC16DBL) - rc = apply_rela_bits(loc, val, 1, 16, 1); + rc = apply_rela_bits(loc, val, 1, 16, 1, write); else if (r_type == R_390_PC32DBL) - rc = apply_rela_bits(loc, val, 1, 32, 1); + rc = apply_rela_bits(loc, val, 1, 32, 1, write); else if (r_type == R_390_PC32) - rc = apply_rela_bits(loc, val, 1, 32, 0); + rc = apply_rela_bits(loc, val, 1, 32, 0, write); else if (r_type == R_390_PC64) - rc = apply_rela_bits(loc, val, 1, 64, 0); + rc = apply_rela_bits(loc, val, 1, 64, 0, write); break; case R_390_GOT12: /* 12 bit GOT offset. */ case R_390_GOT16: /* 16 bit GOT offset. */ @@ -283,33 +293,33 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_GOTPLT64: /* 64 bit offset to jump slot. */ case R_390_GOTPLTENT: /* 32 bit rel. offset to jump slot >> 1. */ if (info->got_initialized == 0) { - Elf_Addr *gotent; + Elf_Addr *gotent = me->core_layout.base + + me->arch.got_offset + + info->got_offset; - gotent = me->core_layout.base + me->arch.got_offset + - info->got_offset; - *gotent = val; + write(gotent, &val, sizeof(*gotent)); info->got_initialized = 1; } val = info->got_offset + rela->r_addend; if (r_type == R_390_GOT12 || r_type == R_390_GOTPLT12) - rc = apply_rela_bits(loc, val, 0, 12, 0); + rc = apply_rela_bits(loc, val, 0, 12, 0, write); else if (r_type == R_390_GOT16 || r_type == R_390_GOTPLT16) - rc = apply_rela_bits(loc, val, 0, 16, 0); + rc = apply_rela_bits(loc, val, 0, 16, 0, write); else if (r_type == R_390_GOT20 || r_type == R_390_GOTPLT20) - rc = apply_rela_bits(loc, val, 1, 20, 0); + rc = apply_rela_bits(loc, val, 1, 20, 0, write); else if (r_type == R_390_GOT32 || r_type == R_390_GOTPLT32) - rc = apply_rela_bits(loc, val, 0, 32, 0); + rc = apply_rela_bits(loc, val, 0, 32, 0, write); else if (r_type == R_390_GOT64 || r_type == R_390_GOTPLT64) - rc = apply_rela_bits(loc, val, 0, 64, 0); + rc = apply_rela_bits(loc, val, 0, 64, 0, write); else if (r_type == R_390_GOTENT || r_type == R_390_GOTPLTENT) { val += (Elf_Addr) me->core_layout.base - loc; - rc = apply_rela_bits(loc, val, 1, 32, 1); + rc = apply_rela_bits(loc, val, 1, 32, 1, write); } break; case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */ @@ -320,25 +330,29 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */ case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ if (info->plt_initialized == 0) { - unsigned int *ip; - ip = me->core_layout.base + me->arch.plt_offset + - info->plt_offset; - ip[0] = 0x0d10e310; /* basr 1,0 */ - ip[1] = 0x100a0004; /* lg 1,10(1) */ + unsigned int insn[5]; + unsigned int *ip = me->core_layout.base + + me->arch.plt_offset + + info->plt_offset; + + insn[0] = 0x0d10e310; /* basr 1,0 */ + insn[1] = 0x100a0004; /* lg 1,10(1) */ if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) { unsigned int *ij; ij = me->core_layout.base + me->arch.plt_offset + me->arch.plt_size - PLT_ENTRY_SIZE; - ip[2] = 0xa7f40000 + /* j __jump_r1 */ + insn[2] = 0xa7f40000 + /* j __jump_r1 */ (unsigned int)(u16) (((unsigned long) ij - 8 - (unsigned long) ip) / 2); } else { - ip[2] = 0x07f10000; /* br %r1 */ + insn[2] = 0x07f10000; /* br %r1 */ } - ip[3] = (unsigned int) (val >> 32); - ip[4] = (unsigned int) val; + insn[3] = (unsigned int) (val >> 32); + insn[4] = (unsigned int) val; + + write(ip, insn, sizeof(insn)); info->plt_initialized = 1; } if (r_type == R_390_PLTOFF16 || @@ -357,17 +371,17 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val += rela->r_addend - loc; } if (r_type == R_390_PLT16DBL) - rc = apply_rela_bits(loc, val, 1, 16, 1); + rc = apply_rela_bits(loc, val, 1, 16, 1, write); else if (r_type == R_390_PLTOFF16) - rc = apply_rela_bits(loc, val, 0, 16, 0); + rc = apply_rela_bits(loc, val, 0, 16, 0, write); else if (r_type == R_390_PLT32DBL) - rc = apply_rela_bits(loc, val, 1, 32, 1); + rc = apply_rela_bits(loc, val, 1, 32, 1, write); else if (r_type == R_390_PLT32 || r_type == R_390_PLTOFF32) - rc = apply_rela_bits(loc, val, 0, 32, 0); + rc = apply_rela_bits(loc, val, 0, 32, 0, write); else if (r_type == R_390_PLT64 || r_type == R_390_PLTOFF64) - rc = apply_rela_bits(loc, val, 0, 64, 0); + rc = apply_rela_bits(loc, val, 0, 64, 0, write); break; case R_390_GOTOFF16: /* 16 bit offset to GOT. */ case R_390_GOTOFF32: /* 32 bit offset to GOT. */ @@ -375,20 +389,20 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val = val + rela->r_addend - ((Elf_Addr) me->core_layout.base + me->arch.got_offset); if (r_type == R_390_GOTOFF16) - rc = apply_rela_bits(loc, val, 0, 16, 0); + rc = apply_rela_bits(loc, val, 0, 16, 0, write); else if (r_type == R_390_GOTOFF32) - rc = apply_rela_bits(loc, val, 0, 32, 0); + rc = apply_rela_bits(loc, val, 0, 32, 0, write); else if (r_type == R_390_GOTOFF64) - rc = apply_rela_bits(loc, val, 0, 64, 0); + rc = apply_rela_bits(loc, val, 0, 64, 0, write); break; case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */ case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */ val = (Elf_Addr) me->core_layout.base + me->arch.got_offset + rela->r_addend - loc; if (r_type == R_390_GOTPC) - rc = apply_rela_bits(loc, val, 1, 32, 0); + rc = apply_rela_bits(loc, val, 1, 32, 0, write); else if (r_type == R_390_GOTPCDBL) - rc = apply_rela_bits(loc, val, 1, 32, 1); + rc = apply_rela_bits(loc, val, 1, 32, 1, write); break; case R_390_COPY: case R_390_GLOB_DAT: /* Create GOT entry. */ @@ -412,9 +426,10 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, return 0; } -int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, +static int __apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, unsigned int symindex, unsigned int relsec, - struct module *me) + struct module *me, + void *(*write)(void *dest, const void *src, size_t len)) { Elf_Addr base; Elf_Sym *symtab; @@ -430,13 +445,27 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, n = sechdrs[relsec].sh_size / sizeof(Elf_Rela); for (i = 0; i < n; i++, rela++) { - rc = apply_rela(rela, base, symtab, strtab, me); + rc = apply_rela(rela, base, symtab, strtab, me, write); if (rc) return rc; } return 0; } +int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, + unsigned int symindex, unsigned int relsec, + struct module *me) +{ + bool early = me->state == MODULE_STATE_UNFORMED; + void *(*write)(void *, const void *, size_t) = memcpy; + + if (!early) + write = s390_kernel_write; + + return __apply_relocate_add(sechdrs, strtab, symindex, relsec, me, + write); +} + int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 0a487fae763e..86c8d5370e7f 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -148,7 +148,6 @@ void s390_handle_mcck(void) local_mcck_disable(); mcck = *this_cpu_ptr(&cpu_mcck); memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck)); - clear_cpu_flag(CIF_MCCK_PENDING); local_mcck_enable(); local_irq_restore(flags); @@ -333,7 +332,7 @@ NOKPROBE_SYMBOL(s390_backup_mcck_info); /* * machine check handler. */ -void notrace s390_do_machine_check(struct pt_regs *regs) +int notrace s390_do_machine_check(struct pt_regs *regs) { static int ipd_count; static DEFINE_SPINLOCK(ipd_lock); @@ -342,6 +341,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) unsigned long long tmp; union mci mci; unsigned long mcck_dam_code; + int mcck_pending = 0; nmi_enter(); inc_irq_stat(NMI_NMI); @@ -400,7 +400,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) */ mcck->kill_task = 1; mcck->mcck_code = mci.val; - set_cpu_flag(CIF_MCCK_PENDING); + mcck_pending = 1; } /* @@ -420,8 +420,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) mcck->stp_queue |= stp_sync_check(); if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND)) mcck->stp_queue |= stp_island_check(); - if (mcck->stp_queue) - set_cpu_flag(CIF_MCCK_PENDING); + mcck_pending = 1; } /* @@ -442,12 +441,12 @@ void notrace s390_do_machine_check(struct pt_regs *regs) if (mci.cp) { /* Channel report word pending */ mcck->channel_report = 1; - set_cpu_flag(CIF_MCCK_PENDING); + mcck_pending = 1; } if (mci.w) { /* Warning pending */ mcck->warning = 1; - set_cpu_flag(CIF_MCCK_PENDING); + mcck_pending = 1; } /* @@ -462,7 +461,17 @@ void notrace s390_do_machine_check(struct pt_regs *regs) *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR; } clear_cpu_flag(CIF_MCCK_GUEST); + + if (user_mode(regs) && mcck_pending) { + nmi_exit(); + return 1; + } + + if (mcck_pending) + schedule_mcck_handler(); + nmi_exit(); + return 0; } NOKPROBE_SYMBOL(s390_do_machine_check); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 58faa12542a1..ce60a459a143 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -25,7 +25,6 @@ #include <linux/compat.h> #include <trace/syscall.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -1416,7 +1415,7 @@ static const struct user_regset s390_regsets[] = { }; static const struct user_regset_view user_s390_view = { - .name = UTS_MACHINE, + .name = "s390x", .e_machine = EM_S390, .regsets = s390_regsets, .n = ARRAY_SIZE(s390_regsets) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 36445dd40fdb..5853c9872dfe 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -305,12 +305,9 @@ void *restart_stack __section(.data); unsigned long stack_alloc(void) { #ifdef CONFIG_VMAP_STACK - return (unsigned long) - __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE, - VMALLOC_START, VMALLOC_END, - THREADINFO_GFP, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE, + THREADINFO_GFP, NUMA_NO_NODE, + __builtin_return_address(0)); #else return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); #endif @@ -384,8 +381,7 @@ static void __init setup_lowcore_dat_off(void) lc->restart_psw.addr = (unsigned long) restart_int_handler; lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->external_new_psw.addr = (unsigned long) ext_int_handler; - lc->svc_new_psw.mask = PSW_KERNEL_BITS | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->svc_new_psw.addr = (unsigned long) system_call; lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->program_new_psw.addr = (unsigned long) pgm_check_handler; @@ -597,9 +593,10 @@ static void __init setup_memory_end(void) #ifdef CONFIG_CRASH_DUMP /* - * When kdump is enabled, we have to ensure that no memory from - * the area [0 - crashkernel memory size] and - * [crashk_res.start - crashk_res.end] is set offline. + * When kdump is enabled, we have to ensure that no memory from the area + * [0 - crashkernel memory size] is set offline - it will be exchanged with + * the crashkernel memory region when kdump is triggered. The crashkernel + * memory region can never get offlined (pages are unmovable). */ static int kdump_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) @@ -610,11 +607,7 @@ static int kdump_mem_notifier(struct notifier_block *nb, return NOTIFY_OK; if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) return NOTIFY_BAD; - if (arg->start_pfn > PFN_DOWN(crashk_res.end)) - return NOTIFY_OK; - if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start)) - return NOTIFY_OK; - return NOTIFY_BAD; + return NOTIFY_OK; } static struct notifier_block kdump_mem_nb = { diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 10dbb12eb14d..e6be63ff162a 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -61,6 +61,7 @@ enum { ec_schedule = 0, ec_call_function_single, ec_stop_cpu, + ec_mcck_pending, }; enum { @@ -403,6 +404,11 @@ int smp_find_processor_id(u16 address) return -1; } +void schedule_mcck_handler(void) +{ + pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending); +} + bool notrace arch_vcpu_is_preempted(int cpu) { if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) @@ -497,6 +503,8 @@ static void smp_handle_ext_call(void) scheduler_ipi(); if (test_bit(ec_call_function_single, &bits)) generic_smp_call_function_single_interrupt(); + if (test_bit(ec_mcck_pending, &bits)) + s390_handle_mcck(); } static void do_ext_call_interrupt(struct ext_code ext_code, diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index bd7bd3581a0f..bfdcb7633957 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -440,3 +440,4 @@ 435 common clone3 sys_clone3 sys_clone3 437 common openat2 sys_openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 sys_faccessat2 diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 5f70cefc13e4..332b542548cd 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -594,7 +594,7 @@ static int __init topology_setup(char *str) early_param("topology", topology_setup); static int topology_ctl_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int enabled = topology_is_enabled(); int new_mode; diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 4c0677fc8904..66e89b2866d7 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -205,7 +205,7 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) again: rc = -EFAULT; - down_read(&gmap->mm->mmap_sem); + mmap_read_lock(gmap->mm); uaddr = __gmap_translate(gmap, gaddr); if (IS_ERR_VALUE(uaddr)) @@ -234,7 +234,7 @@ again: pte_unmap_unlock(ptep, ptelock); unlock_page(page); out: - up_read(&gmap->mm->mmap_sem); + mmap_read_unlock(gmap->mm); if (rc == -EAGAIN) { wait_on_page_writeback(page); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index bcc9bdb39ba2..c4baefaa6e34 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -21,7 +21,6 @@ #include <linux/memblock.h> #include <linux/compat.h> #include <asm/asm-offsets.h> -#include <asm/pgtable.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/mmu_context.h> @@ -208,7 +207,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * it at vdso_base which is the "natural" base for it, but we might * fail and end up putting it elsewhere. */ - if (down_write_killable(&mm->mmap_sem)) + if (mmap_write_lock_killable(mm)) return -EINTR; vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0); if (IS_ERR_VALUE(vdso_base)) { @@ -239,7 +238,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) rc = 0; out_up: - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); return rc; } diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index def3b60f1fe8..67a8e770e369 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -7,7 +7,7 @@ source "virt/kvm/Kconfig" menuconfig VIRTUALIZATION def_bool y prompt "KVM" - ---help--- + help Say Y here to get to see options for using your Linux host to run other operating systems inside virtual machines (guests). This option alone does not add any kernel code. @@ -33,7 +33,7 @@ config KVM select HAVE_KVM_NO_POLL select SRCU select KVM_VFIO - ---help--- + help Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work on any 64bit machine. @@ -49,7 +49,7 @@ config KVM config KVM_S390_UCONTROL bool "Userspace controlled virtual machines" depends on KVM - ---help--- + help Allow CAP_SYS_ADMIN users to create KVM virtual machines that are controlled by userspace. diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 47a67a958107..6d6b57059493 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -9,8 +9,8 @@ #include <linux/vmalloc.h> #include <linux/mm_types.h> #include <linux/err.h> +#include <linux/pgtable.h> -#include <asm/pgtable.h> #include <asm/gmap.h> #include "kvm-s390.h" #include "gaccess.h" @@ -1173,7 +1173,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, int dat_protection, fake; int rc; - down_read(&sg->mm->mmap_sem); + mmap_read_lock(sg->mm); /* * We don't want any guest-2 tables to change - so the parent * tables/pointers we read stay valid - unshadowing is however @@ -1202,6 +1202,6 @@ shadow_page: if (!rc) rc = gmap_shadow_page(sg, saddr, __pte(pte.val)); ipte_unlock(vcpu); - up_read(&sg->mm->mmap_sem); + mmap_read_unlock(sg->mm); return rc; } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index bfb481134994..1608fd99bbee 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2767,10 +2767,10 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr) { struct page *page = NULL; - down_read(&kvm->mm->mmap_sem); + mmap_read_lock(kvm->mm); get_user_pages_remote(NULL, kvm->mm, uaddr, 1, FOLL_WRITE, &page, NULL, NULL); - up_read(&kvm->mm->mmap_sem); + mmap_read_unlock(kvm->mm); return page; } @@ -3082,7 +3082,7 @@ static enum hrtimer_restart gisa_vcpu_kicker(struct hrtimer *timer) __airqs_kick_single_vcpu(kvm, pending_mask); hrtimer_forward_now(timer, ns_to_ktime(gi->expires)); return HRTIMER_RESTART; - }; + } return HRTIMER_NORESTART; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d05bb040fd42..d47c19718615 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -31,11 +31,11 @@ #include <linux/bitmap.h> #include <linux/sched/signal.h> #include <linux/string.h> +#include <linux/pgtable.h> #include <asm/asm-offsets.h> #include <asm/lowcore.h> #include <asm/stp.h> -#include <asm/pgtable.h> #include <asm/gmap.h> #include <asm/nmi.h> #include <asm/switch_to.h> @@ -57,110 +57,109 @@ #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ (KVM_MAX_VCPUS + LOCAL_IRQS)) -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM - struct kvm_stats_debugfs_item debugfs_entries[] = { - { "userspace_handled", VCPU_STAT(exit_userspace) }, - { "exit_null", VCPU_STAT(exit_null) }, - { "exit_validity", VCPU_STAT(exit_validity) }, - { "exit_stop_request", VCPU_STAT(exit_stop_request) }, - { "exit_external_request", VCPU_STAT(exit_external_request) }, - { "exit_io_request", VCPU_STAT(exit_io_request) }, - { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, - { "exit_instruction", VCPU_STAT(exit_instruction) }, - { "exit_pei", VCPU_STAT(exit_pei) }, - { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, - { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, - { "exit_operation_exception", VCPU_STAT(exit_operation_exception) }, - { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, - { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, - { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, - { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) }, - { "halt_wakeup", VCPU_STAT(halt_wakeup) }, - { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, - { "instruction_lctl", VCPU_STAT(instruction_lctl) }, - { "instruction_stctl", VCPU_STAT(instruction_stctl) }, - { "instruction_stctg", VCPU_STAT(instruction_stctg) }, - { "deliver_ckc", VCPU_STAT(deliver_ckc) }, - { "deliver_cputm", VCPU_STAT(deliver_cputm) }, - { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, - { "deliver_external_call", VCPU_STAT(deliver_external_call) }, - { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, - { "deliver_virtio", VCPU_STAT(deliver_virtio) }, - { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, - { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, - { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, - { "deliver_program", VCPU_STAT(deliver_program) }, - { "deliver_io", VCPU_STAT(deliver_io) }, - { "deliver_machine_check", VCPU_STAT(deliver_machine_check) }, - { "exit_wait_state", VCPU_STAT(exit_wait_state) }, - { "inject_ckc", VCPU_STAT(inject_ckc) }, - { "inject_cputm", VCPU_STAT(inject_cputm) }, - { "inject_external_call", VCPU_STAT(inject_external_call) }, - { "inject_float_mchk", VM_STAT(inject_float_mchk) }, - { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) }, - { "inject_io", VM_STAT(inject_io) }, - { "inject_mchk", VCPU_STAT(inject_mchk) }, - { "inject_pfault_done", VM_STAT(inject_pfault_done) }, - { "inject_program", VCPU_STAT(inject_program) }, - { "inject_restart", VCPU_STAT(inject_restart) }, - { "inject_service_signal", VM_STAT(inject_service_signal) }, - { "inject_set_prefix", VCPU_STAT(inject_set_prefix) }, - { "inject_stop_signal", VCPU_STAT(inject_stop_signal) }, - { "inject_pfault_init", VCPU_STAT(inject_pfault_init) }, - { "inject_virtio", VM_STAT(inject_virtio) }, - { "instruction_epsw", VCPU_STAT(instruction_epsw) }, - { "instruction_gs", VCPU_STAT(instruction_gs) }, - { "instruction_io_other", VCPU_STAT(instruction_io_other) }, - { "instruction_lpsw", VCPU_STAT(instruction_lpsw) }, - { "instruction_lpswe", VCPU_STAT(instruction_lpswe) }, - { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, - { "instruction_ptff", VCPU_STAT(instruction_ptff) }, - { "instruction_stidp", VCPU_STAT(instruction_stidp) }, - { "instruction_sck", VCPU_STAT(instruction_sck) }, - { "instruction_sckpf", VCPU_STAT(instruction_sckpf) }, - { "instruction_spx", VCPU_STAT(instruction_spx) }, - { "instruction_stpx", VCPU_STAT(instruction_stpx) }, - { "instruction_stap", VCPU_STAT(instruction_stap) }, - { "instruction_iske", VCPU_STAT(instruction_iske) }, - { "instruction_ri", VCPU_STAT(instruction_ri) }, - { "instruction_rrbe", VCPU_STAT(instruction_rrbe) }, - { "instruction_sske", VCPU_STAT(instruction_sske) }, - { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, - { "instruction_essa", VCPU_STAT(instruction_essa) }, - { "instruction_stsi", VCPU_STAT(instruction_stsi) }, - { "instruction_stfl", VCPU_STAT(instruction_stfl) }, - { "instruction_tb", VCPU_STAT(instruction_tb) }, - { "instruction_tpi", VCPU_STAT(instruction_tpi) }, - { "instruction_tprot", VCPU_STAT(instruction_tprot) }, - { "instruction_tsch", VCPU_STAT(instruction_tsch) }, - { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, - { "instruction_sie", VCPU_STAT(instruction_sie) }, - { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, - { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, - { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, - { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, - { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) }, - { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) }, - { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, - { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) }, - { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) }, - { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) }, - { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, - { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, - { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, - { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) }, - { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) }, - { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) }, - { "instruction_diag_10", VCPU_STAT(diagnose_10) }, - { "instruction_diag_44", VCPU_STAT(diagnose_44) }, - { "instruction_diag_9c", VCPU_STAT(diagnose_9c) }, - { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) }, - { "instruction_diag_258", VCPU_STAT(diagnose_258) }, - { "instruction_diag_308", VCPU_STAT(diagnose_308) }, - { "instruction_diag_500", VCPU_STAT(diagnose_500) }, - { "instruction_diag_other", VCPU_STAT(diagnose_other) }, + VCPU_STAT("userspace_handled", exit_userspace), + VCPU_STAT("exit_null", exit_null), + VCPU_STAT("exit_validity", exit_validity), + VCPU_STAT("exit_stop_request", exit_stop_request), + VCPU_STAT("exit_external_request", exit_external_request), + VCPU_STAT("exit_io_request", exit_io_request), + VCPU_STAT("exit_external_interrupt", exit_external_interrupt), + VCPU_STAT("exit_instruction", exit_instruction), + VCPU_STAT("exit_pei", exit_pei), + VCPU_STAT("exit_program_interruption", exit_program_interruption), + VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program), + VCPU_STAT("exit_operation_exception", exit_operation_exception), + VCPU_STAT("halt_successful_poll", halt_successful_poll), + VCPU_STAT("halt_attempted_poll", halt_attempted_poll), + VCPU_STAT("halt_poll_invalid", halt_poll_invalid), + VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal), + VCPU_STAT("halt_wakeup", halt_wakeup), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + VCPU_STAT("instruction_lctlg", instruction_lctlg), + VCPU_STAT("instruction_lctl", instruction_lctl), + VCPU_STAT("instruction_stctl", instruction_stctl), + VCPU_STAT("instruction_stctg", instruction_stctg), + VCPU_STAT("deliver_ckc", deliver_ckc), + VCPU_STAT("deliver_cputm", deliver_cputm), + VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal), + VCPU_STAT("deliver_external_call", deliver_external_call), + VCPU_STAT("deliver_service_signal", deliver_service_signal), + VCPU_STAT("deliver_virtio", deliver_virtio), + VCPU_STAT("deliver_stop_signal", deliver_stop_signal), + VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal), + VCPU_STAT("deliver_restart_signal", deliver_restart_signal), + VCPU_STAT("deliver_program", deliver_program), + VCPU_STAT("deliver_io", deliver_io), + VCPU_STAT("deliver_machine_check", deliver_machine_check), + VCPU_STAT("exit_wait_state", exit_wait_state), + VCPU_STAT("inject_ckc", inject_ckc), + VCPU_STAT("inject_cputm", inject_cputm), + VCPU_STAT("inject_external_call", inject_external_call), + VM_STAT("inject_float_mchk", inject_float_mchk), + VCPU_STAT("inject_emergency_signal", inject_emergency_signal), + VM_STAT("inject_io", inject_io), + VCPU_STAT("inject_mchk", inject_mchk), + VM_STAT("inject_pfault_done", inject_pfault_done), + VCPU_STAT("inject_program", inject_program), + VCPU_STAT("inject_restart", inject_restart), + VM_STAT("inject_service_signal", inject_service_signal), + VCPU_STAT("inject_set_prefix", inject_set_prefix), + VCPU_STAT("inject_stop_signal", inject_stop_signal), + VCPU_STAT("inject_pfault_init", inject_pfault_init), + VM_STAT("inject_virtio", inject_virtio), + VCPU_STAT("instruction_epsw", instruction_epsw), + VCPU_STAT("instruction_gs", instruction_gs), + VCPU_STAT("instruction_io_other", instruction_io_other), + VCPU_STAT("instruction_lpsw", instruction_lpsw), + VCPU_STAT("instruction_lpswe", instruction_lpswe), + VCPU_STAT("instruction_pfmf", instruction_pfmf), + VCPU_STAT("instruction_ptff", instruction_ptff), + VCPU_STAT("instruction_stidp", instruction_stidp), + VCPU_STAT("instruction_sck", instruction_sck), + VCPU_STAT("instruction_sckpf", instruction_sckpf), + VCPU_STAT("instruction_spx", instruction_spx), + VCPU_STAT("instruction_stpx", instruction_stpx), + VCPU_STAT("instruction_stap", instruction_stap), + VCPU_STAT("instruction_iske", instruction_iske), + VCPU_STAT("instruction_ri", instruction_ri), + VCPU_STAT("instruction_rrbe", instruction_rrbe), + VCPU_STAT("instruction_sske", instruction_sske), + VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock), + VCPU_STAT("instruction_essa", instruction_essa), + VCPU_STAT("instruction_stsi", instruction_stsi), + VCPU_STAT("instruction_stfl", instruction_stfl), + VCPU_STAT("instruction_tb", instruction_tb), + VCPU_STAT("instruction_tpi", instruction_tpi), + VCPU_STAT("instruction_tprot", instruction_tprot), + VCPU_STAT("instruction_tsch", instruction_tsch), + VCPU_STAT("instruction_sthyi", instruction_sthyi), + VCPU_STAT("instruction_sie", instruction_sie), + VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense), + VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running), + VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call), + VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency), + VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency), + VCPU_STAT("instruction_sigp_start", instruction_sigp_start), + VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop), + VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status), + VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status), + VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status), + VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch), + VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix), + VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart), + VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset), + VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset), + VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown), + VCPU_STAT("instruction_diag_10", diagnose_10), + VCPU_STAT("instruction_diag_44", diagnose_44), + VCPU_STAT("instruction_diag_9c", diagnose_9c), + VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored), + VCPU_STAT("instruction_diag_258", diagnose_258), + VCPU_STAT("instruction_diag_308", diagnose_308), + VCPU_STAT("instruction_diag_500", diagnose_500), + VCPU_STAT("instruction_diag_other", diagnose_other), { NULL } }; @@ -764,9 +763,9 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) r = -EINVAL; else { r = 0; - down_write(&kvm->mm->mmap_sem); + mmap_write_lock(kvm->mm); kvm->mm->context.allow_gmap_hpage_1m = 1; - up_write(&kvm->mm->mmap_sem); + mmap_write_unlock(kvm->mm); /* * We might have to create fake 4k page * tables. To avoid that the hardware works on @@ -1816,7 +1815,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (!keys) return -ENOMEM; - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); srcu_idx = srcu_read_lock(&kvm->srcu); for (i = 0; i < args->count; i++) { hva = gfn_to_hva(kvm, args->start_gfn + i); @@ -1830,7 +1829,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) break; } srcu_read_unlock(&kvm->srcu, srcu_idx); - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); if (!r) { r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, @@ -1874,7 +1873,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) goto out; i = 0; - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); srcu_idx = srcu_read_lock(&kvm->srcu); while (i < args->count) { unlocked = false; @@ -1901,7 +1900,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) i++; } srcu_read_unlock(&kvm->srcu, srcu_idx); - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); out: kvfree(keys); return r; @@ -2090,14 +2089,14 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm, if (!values) return -ENOMEM; - down_read(&kvm->mm->mmap_sem); + mmap_read_lock(kvm->mm); srcu_idx = srcu_read_lock(&kvm->srcu); if (peek) ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); else ret = kvm_s390_get_cmma(kvm, args, values, bufsize); srcu_read_unlock(&kvm->srcu, srcu_idx); - up_read(&kvm->mm->mmap_sem); + mmap_read_unlock(kvm->mm); if (kvm->arch.migration_mode) args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); @@ -2147,7 +2146,7 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm, goto out; } - down_read(&kvm->mm->mmap_sem); + mmap_read_lock(kvm->mm); srcu_idx = srcu_read_lock(&kvm->srcu); for (i = 0; i < args->count; i++) { hva = gfn_to_hva(kvm, args->start_gfn + i); @@ -2162,12 +2161,12 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm, set_pgste_bits(kvm->mm, hva, mask, pgstev); } srcu_read_unlock(&kvm->srcu, srcu_idx); - up_read(&kvm->mm->mmap_sem); + mmap_read_unlock(kvm->mm); if (!kvm->mm->context.uses_cmm) { - down_write(&kvm->mm->mmap_sem); + mmap_write_lock(kvm->mm); kvm->mm->context.uses_cmm = 1; - up_write(&kvm->mm->mmap_sem); + mmap_write_unlock(kvm->mm); } out: vfree(bits); @@ -2240,9 +2239,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) if (r) break; - down_write(¤t->mm->mmap_sem); + mmap_write_lock(current->mm); r = gmap_mark_unmergeable(); - up_write(¤t->mm->mmap_sem); + mmap_write_unlock(current->mm); if (r) break; @@ -3924,11 +3923,13 @@ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token, } } -void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, +bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) { trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token); __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token); + + return true; } void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, @@ -3944,7 +3945,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, /* s390 will always inject the page directly */ } -bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) +bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) { /* * s390 will always inject the page directly, @@ -3999,9 +4000,6 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) if (need_resched()) schedule(); - if (test_cpu_flag(CIF_MCCK_PENDING)) - s390_handle_mcck(); - if (!kvm_is_ucontrol(vcpu->kvm)) { rc = kvm_s390_deliver_pending_interrupts(vcpu); if (rc) @@ -4337,8 +4335,9 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) store_regs_fmt2(vcpu, kvm_run); } -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { + struct kvm_run *kvm_run = vcpu->run; int rc; if (kvm_run->immediate_exit) diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 893893642415..96ae368aa0a2 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -13,6 +13,7 @@ #include <linux/errno.h> #include <linux/compat.h> #include <linux/mm_types.h> +#include <linux/pgtable.h> #include <asm/asm-offsets.h> #include <asm/facility.h> @@ -20,7 +21,6 @@ #include <asm/debug.h> #include <asm/ebcdic.h> #include <asm/sysinfo.h> -#include <asm/pgtable.h> #include <asm/page-states.h> #include <asm/pgalloc.h> #include <asm/gmap.h> @@ -270,18 +270,18 @@ static int handle_iske(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); retry: unlocked = false; - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); rc = get_guest_storage_key(current->mm, vmaddr, &key); if (rc) { rc = fixup_user_fault(current, current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); if (!rc) { - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); goto retry; } } - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); if (rc < 0) @@ -317,17 +317,17 @@ static int handle_rrbe(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); retry: unlocked = false; - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); rc = reset_guest_reference_bit(current->mm, vmaddr); if (rc < 0) { rc = fixup_user_fault(current, current->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); if (!rc) { - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); goto retry; } } - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); if (rc < 0) @@ -385,7 +385,7 @@ static int handle_sske(struct kvm_vcpu *vcpu) if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey, m3 & SSKE_NQ, m3 & SSKE_MR, m3 & SSKE_MC); @@ -395,7 +395,7 @@ static int handle_sske(struct kvm_vcpu *vcpu) FAULT_FLAG_WRITE, &unlocked); rc = !rc ? -EAGAIN : rc; } - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); if (rc < 0) @@ -1091,7 +1091,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) if (rc) return rc; - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); rc = cond_set_guest_storage_key(current->mm, vmaddr, key, NULL, nq, mr, mc); if (rc < 0) { @@ -1099,7 +1099,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) FAULT_FLAG_WRITE, &unlocked); rc = !rc ? -EAGAIN : rc; } - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); if (rc == -EAGAIN) @@ -1122,7 +1122,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) } /* - * Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu) + * Must be called with relevant read locks held (kvm->mm->mmap_lock, kvm->srcu) */ static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc) { @@ -1220,9 +1220,9 @@ static int handle_essa(struct kvm_vcpu *vcpu) * already correct, we do nothing and avoid the lock. */ if (vcpu->kvm->mm->context.uses_cmm == 0) { - down_write(&vcpu->kvm->mm->mmap_sem); + mmap_write_lock(vcpu->kvm->mm); vcpu->kvm->mm->context.uses_cmm = 1; - up_write(&vcpu->kvm->mm->mmap_sem); + mmap_write_unlock(vcpu->kvm->mm); } /* * If we are here, we are supposed to have CMMA enabled in @@ -1239,11 +1239,11 @@ static int handle_essa(struct kvm_vcpu *vcpu) } else { int srcu_idx; - down_read(&vcpu->kvm->mm->mmap_sem); + mmap_read_lock(vcpu->kvm->mm); srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); i = __do_essa(vcpu, orc); srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); - up_read(&vcpu->kvm->mm->mmap_sem); + mmap_read_unlock(vcpu->kvm->mm); if (i < 0) return i; /* Account for the possible extra cbrl entry */ @@ -1251,10 +1251,10 @@ static int handle_essa(struct kvm_vcpu *vcpu) } vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); - down_read(&gmap->mm->mmap_sem); + mmap_read_lock(gmap->mm); for (i = 0; i < entries; ++i) __gmap_zap(gmap, cbrlo[i]); - up_read(&gmap->mm->mmap_sem); + mmap_read_unlock(gmap->mm); return 0; } diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 4f6c22d72072..9e9056cebfcf 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1000,11 +1000,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) handle_last_fault(vcpu, vsie_page); - if (need_resched()) - schedule(); - if (test_cpu_flag(CIF_MCCK_PENDING)) - s390_handle_mcck(); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); /* save current guest state of bp isolation override */ @@ -1185,6 +1180,7 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) kvm_s390_vcpu_has_irq(vcpu, 0) || kvm_s390_vcpu_sie_inhibited(vcpu)) break; + cond_resched(); } if (rc == -EFAULT) { diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index d4aa10795605..daca7bad66de 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL(__delay); static void __udelay_disabled(unsigned long long usecs) { - unsigned long cr0, cr0_new, psw_mask; + unsigned long cr0, cr0_new, psw_mask, flags; struct s390_idle_data idle; u64 end; @@ -45,7 +45,9 @@ static void __udelay_disabled(unsigned long long usecs) psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT; set_clock_comparator(end); set_cpu_flag(CIF_IGNORE_IRQ); + local_irq_save(flags); psw_idle(&idle, psw_mask); + local_irq_restore(flags); clear_cpu_flag(CIF_IGNORE_IRQ); set_clock_comparator(S390_lowcore.clock_comparator); __ctl_load(cr0, 0, 0); diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index ae989b740376..36bce727897b 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -245,7 +245,7 @@ static int cmm_skip_blanks(char *cp, char **endp) } static int cmm_pages_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { long nr = cmm_get_pages(); struct ctl_table ctl_entry = { @@ -264,7 +264,7 @@ static int cmm_pages_handler(struct ctl_table *ctl, int write, } static int cmm_timed_pages_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, + void *buffer, size_t *lenp, loff_t *ppos) { long nr = cmm_get_timed_pages(); @@ -284,7 +284,7 @@ static int cmm_timed_pages_handler(struct ctl_table *ctl, int write, } static int cmm_timeout_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { char buf[64], *p; long nr, seconds; @@ -297,8 +297,7 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, if (write) { len = min(*lenp, sizeof(buf)); - if (copy_from_user(buf, buffer, len)) - return -EFAULT; + memcpy(buf, buffer, len); buf[len - 1] = '\0'; cmm_skip_blanks(buf, &p); nr = simple_strtoul(p, &p, 0); @@ -311,8 +310,7 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, cmm_timeout_pages, cmm_timeout_seconds); if (len > *lenp) len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; + memcpy(buffer, buf, len); *lenp = len; *ppos += len; } diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 5d67b81c704a..c2ac9b8ae612 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -6,7 +6,6 @@ #include <linux/kasan.h> #include <asm/kasan.h> #include <asm/sections.h> -#include <asm/pgtable.h> static unsigned long max_addr; diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index fd0dae9d10f4..9e0aa7aa03ba 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -20,9 +20,9 @@ #include <linux/ctype.h> #include <linux/ioport.h> #include <linux/refcount.h> +#include <linux/pgtable.h> #include <asm/diag.h> #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/ebcdic.h> #include <asm/errno.h> #include <asm/extmem.h> diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index dedc28be27ab..6a24751557f0 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -33,7 +33,6 @@ #include <linux/hugetlb.h> #include <asm/asm-offsets.h> #include <asm/diag.h> -#include <asm/pgtable.h> #include <asm/gmap.h> #include <asm/irq.h> #include <asm/mmu_context.h> @@ -434,7 +433,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) flags |= FAULT_FLAG_USER; if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) flags |= FAULT_FLAG_WRITE; - down_read(&mm->mmap_sem); + mmap_read_lock(mm); gmap = NULL; if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) { @@ -508,14 +507,14 @@ retry: if (IS_ENABLED(CONFIG_PGSTE) && gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { /* FAULT_FLAG_RETRY_NOWAIT has been set, - * mmap_sem has not been released */ + * mmap_lock has not been released */ current->thread.gmap_pfault = 1; fault = VM_FAULT_PFAULT; goto out_up; } flags &= ~FAULT_FLAG_RETRY_NOWAIT; flags |= FAULT_FLAG_TRIED; - down_read(&mm->mmap_sem); + mmap_read_lock(mm); goto retry; } } @@ -533,7 +532,7 @@ retry: } fault = 0; out_up: - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); out: return fault; } @@ -825,22 +824,22 @@ void do_secure_storage_access(struct pt_regs *regs) switch (get_fault_type(regs)) { case USER_FAULT: mm = current->mm; - down_read(&mm->mmap_sem); + mmap_read_lock(mm); vma = find_vma(mm, addr); if (!vma) { - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); break; } page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET); if (IS_ERR_OR_NULL(page)) { - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); break; } if (arch_make_page_accessible(page)) send_sig(SIGSEGV, current, 0); put_page(page); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); break; case KERNEL_FAULT: page = phys_to_page(addr); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 1a95d8809cc3..190357ff86b3 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -17,8 +17,8 @@ #include <linux/swapops.h> #include <linux/ksm.h> #include <linux/mman.h> +#include <linux/pgtable.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/gmap.h> #include <asm/tlb.h> @@ -300,7 +300,7 @@ struct gmap *gmap_get_enabled(void) EXPORT_SYMBOL_GPL(gmap_get_enabled); /* - * gmap_alloc_table is assumed to be called with mmap_sem held + * gmap_alloc_table is assumed to be called with mmap_lock held */ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, unsigned long init, unsigned long gaddr) @@ -405,10 +405,10 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) return -EINVAL; flush = 0; - down_write(&gmap->mm->mmap_sem); + mmap_write_lock(gmap->mm); for (off = 0; off < len; off += PMD_SIZE) flush |= __gmap_unmap_by_gaddr(gmap, to + off); - up_write(&gmap->mm->mmap_sem); + mmap_write_unlock(gmap->mm); if (flush) gmap_flush_tlb(gmap); return 0; @@ -438,7 +438,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, return -EINVAL; flush = 0; - down_write(&gmap->mm->mmap_sem); + mmap_write_lock(gmap->mm); for (off = 0; off < len; off += PMD_SIZE) { /* Remove old translation */ flush |= __gmap_unmap_by_gaddr(gmap, to + off); @@ -448,7 +448,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, (void *) from + off)) break; } - up_write(&gmap->mm->mmap_sem); + mmap_write_unlock(gmap->mm); if (flush) gmap_flush_tlb(gmap); if (off >= len) @@ -466,7 +466,7 @@ EXPORT_SYMBOL_GPL(gmap_map_segment); * Returns user space address which corresponds to the guest address or * -EFAULT if no such mapping exists. * This function does not establish potentially missing page table entries. - * The mmap_sem of the mm that belongs to the address space must be held + * The mmap_lock of the mm that belongs to the address space must be held * when this function gets called. * * Note: Can also be called for shadow gmaps. @@ -495,9 +495,9 @@ unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) { unsigned long rc; - down_read(&gmap->mm->mmap_sem); + mmap_read_lock(gmap->mm); rc = __gmap_translate(gmap, gaddr); - up_read(&gmap->mm->mmap_sem); + mmap_read_unlock(gmap->mm); return rc; } EXPORT_SYMBOL_GPL(gmap_translate); @@ -534,7 +534,7 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new, * * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT * if the vm address is already mapped to a different guest segment. - * The mmap_sem of the mm that belongs to the address space must be held + * The mmap_lock of the mm that belongs to the address space must be held * when this function gets called. */ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) @@ -640,7 +640,7 @@ int gmap_fault(struct gmap *gmap, unsigned long gaddr, int rc; bool unlocked; - down_read(&gmap->mm->mmap_sem); + mmap_read_lock(gmap->mm); retry: unlocked = false; @@ -655,7 +655,7 @@ retry: goto out_up; } /* - * In the case that fixup_user_fault unlocked the mmap_sem during + * In the case that fixup_user_fault unlocked the mmap_lock during * faultin redo __gmap_translate to not race with a map/unmap_segment. */ if (unlocked) @@ -663,13 +663,13 @@ retry: rc = __gmap_link(gmap, gaddr, vmaddr); out_up: - up_read(&gmap->mm->mmap_sem); + mmap_read_unlock(gmap->mm); return rc; } EXPORT_SYMBOL_GPL(gmap_fault); /* - * this function is assumed to be called with mmap_sem held + * this function is assumed to be called with mmap_lock held */ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) { @@ -696,7 +696,7 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) unsigned long gaddr, vmaddr, size; struct vm_area_struct *vma; - down_read(&gmap->mm->mmap_sem); + mmap_read_lock(gmap->mm); for (gaddr = from; gaddr < to; gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { /* Find the vm address for the guest address */ @@ -719,7 +719,7 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); zap_page_range(vma, vmaddr, size); } - up_read(&gmap->mm->mmap_sem); + mmap_read_unlock(gmap->mm); } EXPORT_SYMBOL_GPL(gmap_discard); @@ -788,19 +788,19 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level) { const int asce_type = gmap->asce & _ASCE_TYPE_MASK; - unsigned long *table; + unsigned long *table = gmap->table; - if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4)) - return NULL; if (gmap_is_shadow(gmap) && gmap->removed) return NULL; + if (WARN_ON_ONCE(level > (asce_type >> 2) + 1)) + return NULL; + if (asce_type != _ASCE_TYPE_REGION1 && gaddr & (-1UL << (31 + (asce_type >> 2) * 11))) return NULL; - table = gmap->table; - switch (gmap->asce & _ASCE_TYPE_MASK) { + switch (asce_type) { case _ASCE_TYPE_REGION1: table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; if (level == 4) @@ -882,7 +882,7 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, if (fixup_user_fault(current, mm, vmaddr, fault_flags, &unlocked)) return -EFAULT; if (unlocked) - /* lost mmap_sem, caller has to retry __gmap_translate */ + /* lost mmap_lock, caller has to retry __gmap_translate */ return 0; /* Connect the page tables */ return __gmap_link(gmap, gaddr, vmaddr); @@ -953,7 +953,7 @@ static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp) * -EAGAIN if a fixup is needed * -EINVAL if unsupported notifier bits have been specified * - * Expected to be called with sg->mm->mmap_sem in read and + * Expected to be called with sg->mm->mmap_lock in read and * guest_table_lock held. */ static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, @@ -999,7 +999,7 @@ static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, * Returns 0 if successfully protected, -ENOMEM if out of memory and * -EAGAIN if a fixup is needed. * - * Expected to be called with sg->mm->mmap_sem in read + * Expected to be called with sg->mm->mmap_lock in read */ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, pmd_t *pmdp, int prot, unsigned long bits) @@ -1035,7 +1035,7 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, * Returns 0 if successfully protected, -ENOMEM if out of memory and * -EFAULT if gaddr is invalid (or mapping for shadows is missing). * - * Called with sg->mm->mmap_sem in read. + * Called with sg->mm->mmap_lock in read. */ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, unsigned long len, int prot, unsigned long bits) @@ -1106,9 +1106,9 @@ int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr, return -EINVAL; if (!MACHINE_HAS_ESOP && prot == PROT_READ) return -EINVAL; - down_read(&gmap->mm->mmap_sem); + mmap_read_lock(gmap->mm); rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT); - up_read(&gmap->mm->mmap_sem); + mmap_read_unlock(gmap->mm); return rc; } EXPORT_SYMBOL_GPL(gmap_mprotect_notify); @@ -1124,7 +1124,7 @@ EXPORT_SYMBOL_GPL(gmap_mprotect_notify); * if reading using the virtual address failed. -EINVAL if called on a gmap * shadow. * - * Called with gmap->mm->mmap_sem in read. + * Called with gmap->mm->mmap_lock in read. */ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val) { @@ -1696,11 +1696,11 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, } spin_unlock(&parent->shadow_lock); /* protect after insertion, so it will get properly invalidated */ - down_read(&parent->mm->mmap_sem); + mmap_read_lock(parent->mm); rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE, PROT_READ, GMAP_NOTIFY_SHADOW); - up_read(&parent->mm->mmap_sem); + mmap_read_unlock(parent->mm); spin_lock(&parent->shadow_lock); new->initialized = true; if (rc) { @@ -1729,7 +1729,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow); * shadow table structure is incomplete, -ENOMEM if out of memory and * -EFAULT if an address in the parent gmap could not be resolved. * - * Called with sg->mm->mmap_sem in read. + * Called with sg->mm->mmap_lock in read. */ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, int fake) @@ -1813,7 +1813,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_r2t); * shadow table structure is incomplete, -ENOMEM if out of memory and * -EFAULT if an address in the parent gmap could not be resolved. * - * Called with sg->mm->mmap_sem in read. + * Called with sg->mm->mmap_lock in read. */ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, int fake) @@ -1897,7 +1897,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_r3t); * shadow table structure is incomplete, -ENOMEM if out of memory and * -EFAULT if an address in the parent gmap could not be resolved. * - * Called with sg->mm->mmap_sem in read. + * Called with sg->mm->mmap_lock in read. */ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, int fake) @@ -1981,7 +1981,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_sgt); * Returns 0 if the shadow page table was found and -EAGAIN if the page * table was not found. * - * Called with sg->mm->mmap_sem in read. + * Called with sg->mm->mmap_lock in read. */ int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt, int *dat_protection, @@ -2021,7 +2021,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup); * shadow table structure is incomplete, -ENOMEM if out of memory, * -EFAULT if an address in the parent gmap could not be resolved and * - * Called with gmap->mm->mmap_sem in read + * Called with gmap->mm->mmap_lock in read */ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, int fake) @@ -2100,7 +2100,7 @@ EXPORT_SYMBOL_GPL(gmap_shadow_pgt); * shadow table structure is incomplete, -ENOMEM if out of memory and * -EFAULT if an address in the parent gmap could not be resolved. * - * Called with sg->mm->mmap_sem in read. + * Called with sg->mm->mmap_lock in read. */ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte) { @@ -2543,12 +2543,12 @@ int s390_enable_sie(void) /* Fail if the page tables are 2K */ if (!mm_alloc_pgste(mm)) return -EINVAL; - down_write(&mm->mmap_sem); + mmap_write_lock(mm); mm->context.has_pgste = 1; /* split thp mappings and disable thp for future mappings */ thp_split_mm(mm); walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL); - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); return 0; } EXPORT_SYMBOL_GPL(s390_enable_sie); @@ -2617,7 +2617,7 @@ int s390_enable_skey(void) struct mm_struct *mm = current->mm; int rc = 0; - down_write(&mm->mmap_sem); + mmap_write_lock(mm); if (mm_uses_skeys(mm)) goto out_up; @@ -2630,7 +2630,7 @@ int s390_enable_skey(void) walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL); out_up: - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); return rc; } EXPORT_SYMBOL_GPL(s390_enable_skey); @@ -2651,9 +2651,9 @@ static const struct mm_walk_ops reset_cmma_walk_ops = { void s390_reset_cmma(struct mm_struct *mm) { - down_write(&mm->mmap_sem); + mmap_write_lock(mm); walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL); - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); } EXPORT_SYMBOL_GPL(s390_reset_cmma); @@ -2685,9 +2685,9 @@ void s390_reset_acc(struct mm_struct *mm) */ if (!mmget_not_zero(mm)) return; - down_read(&mm->mmap_sem); + mmap_read_lock(mm); walk_page_range(mm, 0, TASK_SIZE, &reset_acc_walk_ops, NULL); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); mmput(mm); } EXPORT_SYMBOL_GPL(s390_reset_acc); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 4632d4e26b66..82df06d720e8 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -254,25 +254,15 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address, return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); } -static __init int setup_hugepagesz(char *opt) +bool __init arch_hugetlb_valid_size(unsigned long size) { - unsigned long size; - char *string = opt; - - size = memparse(opt, &opt); - if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) { - hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); - } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { - hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); - } else { - hugetlb_bad_size(); - pr_err("hugepagesz= specifies an unsupported page size %s\n", - string); - return 0; - } - return 1; + if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) + return true; + else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) + return true; + else + return false; } -__setup("hugepagesz=", setup_hugepagesz); static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr, unsigned long len, diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 87b2d024e75a..6dc7c3b60ef6 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -33,7 +33,6 @@ #include <linux/dma-direct.h> #include <asm/processor.h> #include <linux/uaccess.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/dma.h> #include <asm/lowcore.h> @@ -122,7 +121,7 @@ void __init paging_init(void) memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); max_zone_pfns[ZONE_NORMAL] = max_low_pfn; - free_area_init_nodes(max_zone_pfns); + free_area_init(max_zone_pfns); } void mark_rodata_ro(void) diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 06345616a646..99dd1c63a065 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -2,8 +2,8 @@ #include <linux/kasan.h> #include <linux/sched/task.h> #include <linux/memblock.h> +#include <linux/pgtable.h> #include <asm/pgalloc.h> -#include <asm/pgtable.h> #include <asm/kasan.h> #include <asm/mem_detect.h> #include <asm/processor.h> diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index de7ca4b6718f..22a0be655f27 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -55,19 +55,22 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz */ static DEFINE_SPINLOCK(s390_kernel_write_lock); -void notrace s390_kernel_write(void *dst, const void *src, size_t size) +notrace void *s390_kernel_write(void *dst, const void *src, size_t size) { + void *tmp = dst; unsigned long flags; long copied; spin_lock_irqsave(&s390_kernel_write_lock, flags); while (size) { - copied = s390_kernel_write_odd(dst, src, size); - dst += copied; + copied = s390_kernel_write_odd(tmp, src, size); + tmp += copied; src += copied; size -= copied; } spin_unlock_irqrestore(&s390_kernel_write_lock, flags); + + return dst; } static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count) diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index e22c06d5f206..c5c52ec2b46f 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -7,7 +7,6 @@ #include <linux/mm.h> #include <asm/cacheflush.h> #include <asm/facility.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/page.h> #include <asm/set_memory.h> @@ -86,7 +85,7 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, { pte_t *ptep, new; - ptep = pte_offset(pmdp, addr); + ptep = pte_offset_kernel(pmdp, addr); do { new = *ptep; if (pte_none(new)) @@ -338,19 +337,11 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) { unsigned long address; int nr, i, j; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; pte_t *pte; for (i = 0; i < numpages;) { address = page_to_phys(page + i); - pgd = pgd_offset_k(address); - p4d = p4d_offset(pgd, address); - pud = pud_offset(p4d, address); - pmd = pmd_offset(pud, address); - pte = pte_offset_kernel(pmd, address); + pte = virt_to_kpte(address); nr = (unsigned long)pte >> ilog2(sizeof(long)); nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1)); nr = min(numpages - i, nr); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index fff169d64711..11d2c8395e2a 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -114,7 +114,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) spin_lock_bh(&mm->page_table_lock); /* - * This routine gets called with mmap_sem lock held and there is + * This routine gets called with mmap_lock lock held and there is * no reason to optimize for the case of otherwise. However, if * that would ever change, the below check will let us know. */ diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 9ebd01219812..2e0cc19f4cd7 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -19,7 +19,6 @@ #include <linux/ksm.h> #include <linux/mman.h> -#include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/tlbflush.h> diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index f810930aff42..8b6282cf7d13 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -13,7 +13,6 @@ #include <linux/slab.h> #include <asm/cacheflush.h> #include <asm/pgalloc.h> -#include <asm/pgtable.h> #include <asm/setup.h> #include <asm/tlbflush.h> #include <asm/sections.h> diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 8d2134136290..f4242b894cf2 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -503,7 +503,8 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth) } else { /* j tail_call_start: NOP if no tail calls are used */ EMIT4_PCREL(0xa7f40000, 6); - _EMIT2(0); + /* bcr 0,%0 */ + EMIT2(0x0700, 0, REG_0); } /* Tail calls have to skip above initialization */ jit->tail_call_start = jit->prg; @@ -594,7 +595,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) * stack space for the large switch statement. */ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, - int i, bool extra_pass) + int i, bool extra_pass, u32 stack_depth) { struct bpf_insn *insn = &fp->insnsi[i]; u32 dst_reg = insn->dst_reg; @@ -1207,7 +1208,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, */ if (jit->seen & SEEN_STACK) - off = STK_OFF_TCCNT + STK_OFF + fp->aux->stack_depth; + off = STK_OFF_TCCNT + STK_OFF + stack_depth; else off = STK_OFF_TCCNT; /* lhi %w0,1 */ @@ -1249,7 +1250,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, /* * Restore registers before calling function */ - save_restore_regs(jit, REGS_RESTORE, fp->aux->stack_depth); + save_restore_regs(jit, REGS_RESTORE, stack_depth); /* * goto *(prog->bpf_func + tail_call_start); @@ -1519,7 +1520,7 @@ static int bpf_set_addr(struct bpf_jit *jit, int i) * Compile eBPF program into s390x code */ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp, - bool extra_pass) + bool extra_pass, u32 stack_depth) { int i, insn_count, lit32_size, lit64_size; @@ -1527,18 +1528,18 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp, jit->lit64 = jit->lit64_start; jit->prg = 0; - bpf_jit_prologue(jit, fp->aux->stack_depth); + bpf_jit_prologue(jit, stack_depth); if (bpf_set_addr(jit, 0) < 0) return -1; for (i = 0; i < fp->len; i += insn_count) { - insn_count = bpf_jit_insn(jit, fp, i, extra_pass); + insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth); if (insn_count < 0) return -1; /* Next instruction address */ if (bpf_set_addr(jit, i + insn_count) < 0) return -1; } - bpf_jit_epilogue(jit, fp->aux->stack_depth); + bpf_jit_epilogue(jit, stack_depth); lit32_size = jit->lit32 - jit->lit32_start; lit64_size = jit->lit64 - jit->lit64_start; @@ -1569,6 +1570,7 @@ struct s390_jit_data { */ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) { + u32 stack_depth = round_up(fp->aux->stack_depth, 8); struct bpf_prog *tmp, *orig_fp = fp; struct bpf_binary_header *header; struct s390_jit_data *jit_data; @@ -1621,7 +1623,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) * - 3: Calculate program size and addrs arrray */ for (pass = 1; pass <= 3; pass++) { - if (bpf_jit_prog(&jit, fp, extra_pass)) { + if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) { fp = orig_fp; goto free_addrs; } @@ -1635,7 +1637,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) goto free_addrs; } skip_init_ctx: - if (bpf_jit_prog(&jit, fp, extra_pass)) { + if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) { bpf_jit_binary_free(header); fp = orig_fp; goto free_addrs; diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index 748626a33028..b4e3c84772a1 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -4,4 +4,5 @@ # obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \ - pci_event.o pci_debug.o pci_insn.o pci_mmio.o + pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ + pci_bus.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 94ca121933de..3902c9f6f2d6 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -36,18 +36,21 @@ #include <asm/pci_clp.h> #include <asm/pci_dma.h> +#include "pci_bus.h" + /* list of all detected zpci devices */ static LIST_HEAD(zpci_list); static DEFINE_SPINLOCK(zpci_list_lock); static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE); static DEFINE_SPINLOCK(zpci_domain_lock); -static unsigned int zpci_num_domains_allocated; #define ZPCI_IOMAP_ENTRIES \ min(((unsigned long) ZPCI_NR_DEVICES * PCI_STD_NUM_BARS / 2), \ ZPCI_IOMAP_MAX_ENTRIES) +unsigned int s390_pci_no_rid; + static DEFINE_SPINLOCK(zpci_iomap_lock); static unsigned long *zpci_iomap_bitmap; struct zpci_iomap_entry *zpci_iomap_start; @@ -88,17 +91,12 @@ void zpci_remove_reserved_devices(void) spin_unlock(&zpci_list_lock); list_for_each_entry_safe(zdev, tmp, &remove, entry) - zpci_remove_device(zdev); -} - -static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus) -{ - return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL; + zpci_zdev_put(zdev); } int pci_domain_nr(struct pci_bus *bus) { - return ((struct zpci_dev *) bus->sysdata)->domain; + return ((struct zpci_bus *) bus->sysdata)->domain_nr; } EXPORT_SYMBOL_GPL(pci_domain_nr); @@ -228,28 +226,29 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count) zpci_memcpy_toio(to, from, count); } -void __iomem *ioremap(unsigned long ioaddr, unsigned long size) +void __iomem *ioremap(phys_addr_t addr, size_t size) { + unsigned long offset, vaddr; struct vm_struct *area; - unsigned long offset; + phys_addr_t last_addr; - if (!size) + last_addr = addr + size - 1; + if (!size || last_addr < addr) return NULL; if (!static_branch_unlikely(&have_mio)) - return (void __iomem *) ioaddr; + return (void __iomem *) addr; - offset = ioaddr & ~PAGE_MASK; - ioaddr &= PAGE_MASK; + offset = addr & ~PAGE_MASK; + addr &= PAGE_MASK; size = PAGE_ALIGN(size + offset); area = get_vm_area(size, VM_IOREMAP); if (!area) return NULL; - if (ioremap_page_range((unsigned long) area->addr, - (unsigned long) area->addr + size, - ioaddr, PAGE_KERNEL)) { - vunmap(area->addr); + vaddr = (unsigned long) area->addr; + if (ioremap_page_range(vaddr, vaddr + size, addr, PAGE_KERNEL)) { + free_vm_area(area); return NULL; } return (void __iomem *) ((unsigned long) area->addr + offset); @@ -373,29 +372,17 @@ EXPORT_SYMBOL(pci_iounmap); static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { - struct zpci_dev *zdev = get_zdev_by_bus(bus); - int ret; + struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn); - if (!zdev || devfn != ZPCI_DEVFN) - ret = -ENODEV; - else - ret = zpci_cfg_load(zdev, where, val, size); - - return ret; + return (zdev) ? zpci_cfg_load(zdev, where, val, size) : -ENODEV; } static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val) { - struct zpci_dev *zdev = get_zdev_by_bus(bus); - int ret; + struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn); - if (!zdev || devfn != ZPCI_DEVFN) - ret = -ENODEV; - else - ret = zpci_cfg_store(zdev, where, val, size); - - return ret; + return (zdev) ? zpci_cfg_store(zdev, where, val, size) : -ENODEV; } static struct pci_ops pci_root_ops = { @@ -506,15 +493,15 @@ static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start, return r; } -static int zpci_setup_bus_resources(struct zpci_dev *zdev, - struct list_head *resources) +int zpci_setup_bus_resources(struct zpci_dev *zdev, + struct list_head *resources) { unsigned long addr, size, flags; struct resource *res; int i, entry; snprintf(zdev->res_name, sizeof(zdev->res_name), - "PCI Bus %04x:%02x", zdev->domain, ZPCI_BUS_NR); + "PCI Bus %04x:%02x", zdev->uid, ZPCI_BUS_NR); for (i = 0; i < PCI_STD_NUM_BARS; i++) { if (!zdev->bars[i].size) @@ -608,98 +595,53 @@ void pcibios_disable_device(struct pci_dev *pdev) zpci_debug_exit_device(zdev); } -static int zpci_alloc_domain(struct zpci_dev *zdev) +static int __zpci_register_domain(int domain) { spin_lock(&zpci_domain_lock); - if (zpci_num_domains_allocated > (ZPCI_NR_DEVICES - 1)) { + if (test_bit(domain, zpci_domain)) { spin_unlock(&zpci_domain_lock); - pr_err("Adding PCI function %08x failed because the configured limit of %d is reached\n", - zdev->fid, ZPCI_NR_DEVICES); - return -ENOSPC; + pr_err("Domain %04x is already assigned\n", domain); + return -EEXIST; } + set_bit(domain, zpci_domain); + spin_unlock(&zpci_domain_lock); + return domain; +} - if (zpci_unique_uid) { - zdev->domain = (u16) zdev->uid; - if (zdev->domain == 0) { - pr_warn("UID checking is active but no UID is set for PCI function %08x, so automatic domain allocation is used instead\n", - zdev->fid); - update_uid_checking(false); - goto auto_allocate; - } +static int __zpci_alloc_domain(void) +{ + int domain; - if (test_bit(zdev->domain, zpci_domain)) { - spin_unlock(&zpci_domain_lock); - pr_err("Adding PCI function %08x failed because domain %04x is already assigned\n", - zdev->fid, zdev->domain); - return -EEXIST; - } - set_bit(zdev->domain, zpci_domain); - zpci_num_domains_allocated++; - spin_unlock(&zpci_domain_lock); - return 0; - } -auto_allocate: + spin_lock(&zpci_domain_lock); /* * We can always auto allocate domains below ZPCI_NR_DEVICES. * There is either a free domain or we have reached the maximum in * which case we would have bailed earlier. */ - zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES); - set_bit(zdev->domain, zpci_domain); - zpci_num_domains_allocated++; + domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES); + set_bit(domain, zpci_domain); spin_unlock(&zpci_domain_lock); - return 0; + return domain; } -static void zpci_free_domain(struct zpci_dev *zdev) +int zpci_alloc_domain(int domain) { - spin_lock(&zpci_domain_lock); - clear_bit(zdev->domain, zpci_domain); - zpci_num_domains_allocated--; - spin_unlock(&zpci_domain_lock); + if (zpci_unique_uid) { + if (domain) + return __zpci_register_domain(domain); + pr_warn("UID checking was active but no UID is provided: switching to automatic domain allocation\n"); + update_uid_checking(false); + } + return __zpci_alloc_domain(); } -void pcibios_remove_bus(struct pci_bus *bus) +void zpci_free_domain(int domain) { - struct zpci_dev *zdev = get_zdev_by_bus(bus); - - zpci_exit_slot(zdev); - zpci_cleanup_bus_resources(zdev); - zpci_destroy_iommu(zdev); - zpci_free_domain(zdev); - - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); - - zpci_dbg(3, "rem fid:%x\n", zdev->fid); - kfree(zdev); + spin_lock(&zpci_domain_lock); + clear_bit(domain, zpci_domain); + spin_unlock(&zpci_domain_lock); } -static int zpci_scan_bus(struct zpci_dev *zdev) -{ - LIST_HEAD(resources); - int ret; - - ret = zpci_setup_bus_resources(zdev, &resources); - if (ret) - goto error; - - zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops, - zdev, &resources); - if (!zdev->bus) { - ret = -EIO; - goto error; - } - zdev->bus->max_bus_speed = zdev->max_bus_speed; - pci_bus_add_devices(zdev->bus); - return 0; - -error: - zpci_cleanup_bus_resources(zdev); - pci_free_resource_list(&resources); - return ret; -} int zpci_enable_device(struct zpci_dev *zdev) { @@ -734,13 +676,15 @@ int zpci_create_device(struct zpci_dev *zdev) { int rc; - rc = zpci_alloc_domain(zdev); - if (rc) - goto out; + kref_init(&zdev->kref); + + spin_lock(&zpci_list_lock); + list_add_tail(&zdev->entry, &zpci_list); + spin_unlock(&zpci_list_lock); rc = zpci_init_iommu(zdev); if (rc) - goto out_free; + goto out; mutex_init(&zdev->lock); if (zdev->state == ZPCI_FN_STATE_CONFIGURED) { @@ -748,36 +692,59 @@ int zpci_create_device(struct zpci_dev *zdev) if (rc) goto out_destroy_iommu; } - rc = zpci_scan_bus(zdev); + + rc = zpci_bus_device_register(zdev, &pci_root_ops); if (rc) goto out_disable; - spin_lock(&zpci_list_lock); - list_add_tail(&zdev->entry, &zpci_list); - spin_unlock(&zpci_list_lock); - - zpci_init_slot(zdev); - return 0; out_disable: if (zdev->state == ZPCI_FN_STATE_ONLINE) zpci_disable_device(zdev); + out_destroy_iommu: zpci_destroy_iommu(zdev); -out_free: - zpci_free_domain(zdev); out: + spin_lock(&zpci_list_lock); + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); return rc; } -void zpci_remove_device(struct zpci_dev *zdev) +void zpci_release_device(struct kref *kref) { - if (!zdev->bus) - return; + struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); + + if (zdev->zbus->bus) { + struct pci_dev *pdev; + + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); + if (pdev) + pci_stop_and_remove_bus_device_locked(pdev); + } + + switch (zdev->state) { + case ZPCI_FN_STATE_ONLINE: + case ZPCI_FN_STATE_CONFIGURED: + zpci_disable_device(zdev); + fallthrough; + case ZPCI_FN_STATE_STANDBY: + if (zdev->has_hp_slot) + zpci_exit_slot(zdev); + zpci_cleanup_bus_resources(zdev); + zpci_bus_device_unregister(zdev); + zpci_destroy_iommu(zdev); + fallthrough; + default: + break; + } - pci_stop_root_bus(zdev->bus); - pci_remove_root_bus(zdev->bus); + spin_lock(&zpci_list_lock); + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); + zpci_dbg(3, "rem fid:%x\n", zdev->fid); + kfree(zdev); } int zpci_report_error(struct pci_dev *pdev, @@ -844,6 +811,10 @@ char * __init pcibios_setup(char *str) s390_pci_force_floating = 1; return NULL; } + if (!strcmp(str, "norid")) { + s390_pci_no_rid = 1; + return NULL; + } return str; } diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c new file mode 100644 index 000000000000..642a99384688 --- /dev/null +++ b/arch/s390/pci/pci_bus.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2020 + * + * Author(s): + * Pierre Morel <pmorel@linux.ibm.com> + * + */ + +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/export.h> +#include <linux/delay.h> +#include <linux/seq_file.h> +#include <linux/jump_label.h> +#include <linux/pci.h> +#include <linux/printk.h> + +#include <asm/pci_clp.h> +#include <asm/pci_dma.h> + +#include "pci_bus.h" + +static LIST_HEAD(zbus_list); +static DEFINE_SPINLOCK(zbus_list_lock); +static int zpci_nb_devices; + +/* zpci_bus_scan + * @zbus: the zbus holding the zdevices + * @ops: the pci operations + * + * The domain number must be set before pci_scan_root_bus is called. + * This function can be called once the domain is known, hence + * when the function_0 is dicovered. + */ +static int zpci_bus_scan(struct zpci_bus *zbus, int domain, struct pci_ops *ops) +{ + struct pci_bus *bus; + int rc; + + rc = zpci_alloc_domain(domain); + if (rc < 0) + return rc; + zbus->domain_nr = rc; + + bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, ops, zbus, &zbus->resources); + if (!bus) { + zpci_free_domain(zbus->domain_nr); + return -EFAULT; + } + + zbus->bus = bus; + pci_bus_add_devices(bus); + return 0; +} + +static void zpci_bus_release(struct kref *kref) +{ + struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref); + + if (zbus->bus) { + pci_lock_rescan_remove(); + pci_stop_root_bus(zbus->bus); + + zpci_free_domain(zbus->domain_nr); + pci_free_resource_list(&zbus->resources); + + pci_remove_root_bus(zbus->bus); + pci_unlock_rescan_remove(); + } + + spin_lock(&zbus_list_lock); + list_del(&zbus->bus_next); + spin_unlock(&zbus_list_lock); + kfree(zbus); +} + +static void zpci_bus_put(struct zpci_bus *zbus) +{ + kref_put(&zbus->kref, zpci_bus_release); +} + +static struct zpci_bus *zpci_bus_get(int pchid) +{ + struct zpci_bus *zbus; + + spin_lock(&zbus_list_lock); + list_for_each_entry(zbus, &zbus_list, bus_next) { + if (pchid == zbus->pchid) { + kref_get(&zbus->kref); + goto out_unlock; + } + } + zbus = NULL; +out_unlock: + spin_unlock(&zbus_list_lock); + return zbus; +} + +static struct zpci_bus *zpci_bus_alloc(int pchid) +{ + struct zpci_bus *zbus; + + zbus = kzalloc(sizeof(*zbus), GFP_KERNEL); + if (!zbus) + return NULL; + + zbus->pchid = pchid; + INIT_LIST_HEAD(&zbus->bus_next); + spin_lock(&zbus_list_lock); + list_add_tail(&zbus->bus_next, &zbus_list); + spin_unlock(&zbus_list_lock); + + kref_init(&zbus->kref); + INIT_LIST_HEAD(&zbus->resources); + + zbus->bus_resource.start = 0; + zbus->bus_resource.end = ZPCI_BUS_NR; + zbus->bus_resource.flags = IORESOURCE_BUS; + pci_add_resource(&zbus->resources, &zbus->bus_resource); + + return zbus; +} + +#ifdef CONFIG_PCI_IOV +static int zpci_bus_link_virtfn(struct pci_dev *pdev, + struct pci_dev *virtfn, int vfid) +{ + int rc; + + virtfn->physfn = pci_dev_get(pdev); + rc = pci_iov_sysfs_link(pdev, virtfn, vfid); + if (rc) { + pci_dev_put(pdev); + virtfn->physfn = NULL; + return rc; + } + return 0; +} + +static int zpci_bus_setup_virtfn(struct zpci_bus *zbus, + struct pci_dev *virtfn, int vfn) +{ + int i, cand_devfn; + struct zpci_dev *zdev; + struct pci_dev *pdev; + int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/ + int rc = 0; + + virtfn->is_virtfn = 1; + virtfn->multifunction = 0; + WARN_ON(vfid < 0); + /* If the parent PF for the given VF is also configured in the + * instance, it must be on the same zbus. + * We can then identify the parent PF by checking what + * devfn the VF would have if it belonged to that PF using the PF's + * stride and offset. Only if this candidate devfn matches the + * actual devfn will we link both functions. + */ + for (i = 0; i < ZPCI_FUNCTIONS_PER_BUS; i++) { + zdev = zbus->function[i]; + if (zdev && zdev->is_physfn) { + pdev = pci_get_slot(zbus->bus, zdev->devfn); + cand_devfn = pci_iov_virtfn_devfn(pdev, vfid); + if (cand_devfn == virtfn->devfn) { + rc = zpci_bus_link_virtfn(pdev, virtfn, vfid); + break; + } + } + } + return rc; +} +#else +static inline int zpci_bus_setup_virtfn(struct zpci_bus *zbus, + struct pci_dev *virtfn, int vfn) +{ + virtfn->is_virtfn = 1; + virtfn->multifunction = 0; + return 0; +} +#endif + +static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev) +{ + struct pci_bus *bus; + struct resource_entry *window, *n; + struct resource *res; + struct pci_dev *pdev; + int rc; + + bus = zbus->bus; + if (!bus) + return -EINVAL; + + pdev = pci_get_slot(bus, zdev->devfn); + if (pdev) { + /* Device is already known. */ + pci_dev_put(pdev); + return 0; + } + + rc = zpci_init_slot(zdev); + if (rc) + return rc; + zdev->has_hp_slot = 1; + + resource_list_for_each_entry_safe(window, n, &zbus->resources) { + res = window->res; + pci_bus_add_resource(bus, res, 0); + } + + pdev = pci_scan_single_device(bus, zdev->devfn); + if (pdev) { + if (!zdev->is_physfn) { + rc = zpci_bus_setup_virtfn(zbus, pdev, zdev->vfn); + if (rc) + goto failed_with_pdev; + } + pci_bus_add_device(pdev); + } + return 0; + +failed_with_pdev: + pci_stop_and_remove_bus_device(pdev); + pci_dev_put(pdev); + return rc; +} + +static void zpci_bus_add_devices(struct zpci_bus *zbus) +{ + int i; + + for (i = 1; i < ZPCI_FUNCTIONS_PER_BUS; i++) + if (zbus->function[i]) + zpci_bus_add_device(zbus, zbus->function[i]); + + pci_lock_rescan_remove(); + pci_bus_add_devices(zbus->bus); + pci_unlock_rescan_remove(); +} + +int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops) +{ + struct zpci_bus *zbus = NULL; + int rc = -EBADF; + + if (zpci_nb_devices == ZPCI_NR_DEVICES) { + pr_warn("Adding PCI function %08x failed because the configured limit of %d is reached\n", + zdev->fid, ZPCI_NR_DEVICES); + return -ENOSPC; + } + zpci_nb_devices++; + + if (zdev->devfn >= ZPCI_FUNCTIONS_PER_BUS) + return -EINVAL; + + if (!s390_pci_no_rid && zdev->rid_available) + zbus = zpci_bus_get(zdev->pchid); + + if (!zbus) { + zbus = zpci_bus_alloc(zdev->pchid); + if (!zbus) + return -ENOMEM; + } + + zdev->zbus = zbus; + if (zbus->function[zdev->devfn]) { + pr_err("devfn %04x is already assigned\n", zdev->devfn); + goto error; /* rc already set */ + } + zbus->function[zdev->devfn] = zdev; + + zpci_setup_bus_resources(zdev, &zbus->resources); + + if (zbus->bus) { + if (!zbus->multifunction) { + WARN_ONCE(1, "zbus is not multifunction\n"); + goto error_bus; + } + if (!zdev->rid_available) { + WARN_ONCE(1, "rid_available not set for multifunction\n"); + goto error_bus; + } + rc = zpci_bus_add_device(zbus, zdev); + if (rc) + goto error_bus; + } else if (zdev->devfn == 0) { + if (zbus->multifunction && !zdev->rid_available) { + WARN_ONCE(1, "rid_available not set on function 0 for multifunction\n"); + goto error_bus; + } + rc = zpci_bus_scan(zbus, (u16)zdev->uid, ops); + if (rc) + goto error_bus; + zpci_bus_add_devices(zbus); + rc = zpci_init_slot(zdev); + if (rc) + goto error_bus; + zdev->has_hp_slot = 1; + zbus->multifunction = zdev->rid_available; + zbus->max_bus_speed = zdev->max_bus_speed; + } else { + zbus->multifunction = 1; + } + + return 0; + +error_bus: + zpci_nb_devices--; + zbus->function[zdev->devfn] = NULL; +error: + pr_err("Adding PCI function %08x failed\n", zdev->fid); + zpci_bus_put(zbus); + return rc; +} + +void zpci_bus_device_unregister(struct zpci_dev *zdev) +{ + struct zpci_bus *zbus = zdev->zbus; + + zpci_nb_devices--; + zbus->function[zdev->devfn] = NULL; + zpci_bus_put(zbus); +} diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h new file mode 100644 index 000000000000..89be3c354b7b --- /dev/null +++ b/arch/s390/pci/pci_bus.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2020 + * + * Author(s): + * Pierre Morel <pmorel@linux.ibm.com> + * + */ + +int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops); +void zpci_bus_device_unregister(struct zpci_dev *zdev); +int zpci_bus_init(void); + +void zpci_release_device(struct kref *kref); +static inline void zpci_zdev_put(struct zpci_dev *zdev) +{ + kref_put(&zdev->kref, zpci_release_device); +} + +int zpci_alloc_domain(int domain); +void zpci_free_domain(int domain); +int zpci_setup_bus_resources(struct zpci_dev *zdev, + struct list_head *resources); + +static inline struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus, + unsigned int devfn) +{ + struct zpci_bus *zbus = bus->sysdata; + + return (devfn >= ZPCI_FUNCTIONS_PER_BUS) ? NULL : zbus->function[devfn]; +} diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index ea794ae755ae..7e735f41a0a6 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -155,8 +155,13 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev, zdev->pfgid = response->pfgid; zdev->pft = response->pft; zdev->vfn = response->vfn; + zdev->port = response->port; zdev->uid = response->uid; zdev->fmb_length = sizeof(u32) * response->fmb_len; + zdev->rid_available = response->rid_avail; + zdev->is_physfn = response->is_physfn; + if (!s390_pci_no_rid && zdev->rid_available) + zdev->devfn = response->rid & ZPCI_RID_MASK_DEVFN; memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip)); if (response->util_str_avail) { @@ -309,14 +314,13 @@ out: int clp_disable_fh(struct zpci_dev *zdev) { - u32 fh = zdev->fh; int rc; if (!zdev_enabled(zdev)) return 0; rc = clp_set_pci_fn(zdev, 0, CLP_SET_DISABLE_PCI_FN); - zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); + zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); return rc; } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 8d6ee4af4230..08e1d619398e 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -14,6 +14,8 @@ #include <asm/pci_debug.h> #include <asm/sclp.h> +#include "pci_bus.h" + /* Content Code Description for PCI Function Error */ struct zpci_ccdf_err { u32 reserved1; @@ -53,7 +55,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) zpci_err_hex(ccdf, sizeof(*ccdf)); if (zdev) - pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN); + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n", pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); @@ -78,36 +80,28 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) enum zpci_state state; int ret; - if (zdev) - pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN); + if (zdev && zdev->zbus && zdev->zbus->bus) + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); - pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n", - pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); zpci_err("avail CCDF:\n"); zpci_err_hex(ccdf, sizeof(*ccdf)); switch (ccdf->pec) { case 0x0301: /* Reserved|Standby -> Configured */ if (!zdev) { - ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 0); - if (ret) - break; - zdev = get_zdev_by_fid(ccdf->fid); - } - if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY) + ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 1); break; - zdev->state = ZPCI_FN_STATE_CONFIGURED; + } zdev->fh = ccdf->fh; - ret = zpci_enable_device(zdev); - if (ret) - break; - pci_lock_rescan_remove(); - pci_rescan_bus(zdev->bus); - pci_unlock_rescan_remove(); + zdev->state = ZPCI_FN_STATE_CONFIGURED; + zpci_create_device(zdev); break; case 0x0302: /* Reserved -> Standby */ - if (!zdev) + if (!zdev) { clp_add_pci_device(ccdf->fid, ccdf->fh, 0); + break; + } + zdev->fh = ccdf->fh; break; case 0x0303: /* Deconfiguration requested */ if (!zdev) @@ -135,12 +129,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) pci_stop_and_remove_bus_device_locked(pdev); } - zdev->fh = ccdf->fh; - zpci_disable_device(zdev); zdev->state = ZPCI_FN_STATE_STANDBY; if (!clp_get_state(ccdf->fid, &state) && state == ZPCI_FN_STATE_RESERVED) { - zpci_remove_device(zdev); + zpci_zdev_put(zdev); } break; case 0x0306: /* 0x308 or 0x302 for multiple devices */ @@ -149,12 +141,11 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) case 0x0308: /* Standby -> Reserved */ if (!zdev) break; - zpci_remove_device(zdev); + zpci_zdev_put(zdev); break; default: break; } - pci_dev_put(pdev); } void zpci_event_availability(void *data) diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 020a2c514d96..38efa3e852c4 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -125,7 +125,7 @@ static long get_pfn(unsigned long user_addr, unsigned long access, struct vm_area_struct *vma; long ret; - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); ret = -EINVAL; vma = find_vma(current->mm, user_addr); if (!vma) @@ -135,7 +135,7 @@ static long get_pfn(unsigned long user_addr, unsigned long access, goto out; ret = follow_pfn(vma, user_addr, pfn); out: - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); return ret; } diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index 215f17437a4f..5c028bee91b9 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -33,6 +33,7 @@ zpci_attr(pchid, "0x%04x\n", pchid); zpci_attr(pfgid, "0x%02x\n", pfgid); zpci_attr(vfn, "0x%04x\n", vfn); zpci_attr(pft, "0x%02x\n", pft); +zpci_attr(port, "%d\n", port); zpci_attr(uid, "0x%x\n", uid); zpci_attr(segment0, "0x%02x\n", pfip[0]); zpci_attr(segment1, "0x%02x\n", pfip[1]); @@ -88,7 +89,7 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, ret = zpci_enable_device(zdev); if (ret) goto out; - pci_rescan_bus(zdev->bus); + pci_rescan_bus(zdev->zbus->bus); } out: pci_unlock_rescan_remove(); @@ -142,6 +143,7 @@ static struct attribute *zpci_dev_attrs[] = { &dev_attr_pchid.attr, &dev_attr_pfgid.attr, &dev_attr_pft.attr, + &dev_attr_port.attr, &dev_attr_vfn.attr, &dev_attr_uid.attr, &dev_attr_recover.attr, |