From 2fc4876ea8a9932e0d0bd84daf638186fcadd01f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 31 Aug 2017 13:18:22 +0200 Subject: s390/mm: use VM_BUG_ON in crst_table_[upgrade|downgrade] The BUG_ON in crst_table_[upgrade|downgrade] is a debugging aid, replace it with VM_BUG_ON. Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index c5b74dd61197..05b5b1b0a8d9 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -83,7 +83,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) int rc, notify; /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ - BUG_ON(mm->context.asce_limit < _REGION2_SIZE); + VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE); if (end >= TASK_SIZE_MAX) return -ENOMEM; rc = 0; @@ -124,7 +124,7 @@ void crst_table_downgrade(struct mm_struct *mm) pgd_t *pgd; /* downgrade should only happen from 3 to 2 levels (compat only) */ - BUG_ON(mm->context.asce_limit != _REGION2_SIZE); + VM_BUG_ON(mm->context.asce_limit != _REGION2_SIZE); if (current->active_mm == mm) { clear_user_asce(); -- cgit v1.2.3 From e7fc5146cfe4f1b10f2ed6c36b65248aa948abe8 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 8 Nov 2016 07:09:13 +0100 Subject: s390/zcrypt: externalize test AP queue Under certain specified conditions, the Test AP Queue (TAPQ) subfunction of the Process Adjunct Processor Queue (PQAP) instruction will be intercepted by a guest VM. The guest VM must have a means for executing the intercepted instruction. The vfio_ap driver will provide an interface to execute the PQAP(TAPQ) instruction subfunction on behalf of a guest VM. The code for executing the AP instructions currently resides in the AP bus. This patch refactors the AP bus code to externalize access to the PQAP(TAPQ) instruction subfunction to make it available to the vfio_ap driver. Signed-off-by: Tony Krowiak Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ap.h | 64 ++++++++++++++++++++++++++++++++++++++++++ drivers/s390/crypto/ap_bus.c | 28 ++++++++++++++---- drivers/s390/crypto/ap_bus.h | 36 +----------------------- drivers/s390/crypto/ap_queue.c | 2 +- 4 files changed, 88 insertions(+), 42 deletions(-) create mode 100644 arch/s390/include/asm/ap.h (limited to 'arch/s390') diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h new file mode 100644 index 000000000000..0f3e6f38db55 --- /dev/null +++ b/arch/s390/include/asm/ap.h @@ -0,0 +1,64 @@ +/* + * Adjunct processor (AP) interfaces + * + * Copyright IBM Corp. 2017 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Tony Krowiak + * Martin Schwidefsky + * Harald Freudenberger + */ + +#ifndef _ASM_S390_AP_H_ +#define _ASM_S390_AP_H_ + +/** + * The ap_qid_t identifier of an ap queue. + * If the AP facilities test (APFT) facility is available, + * card and queue index are 8 bit values, otherwise + * card index is 6 bit and queue index a 4 bit value. + */ +typedef unsigned int ap_qid_t; + +#define AP_MKQID(_card, _queue) (((_card) & 63) << 8 | ((_queue) & 255)) +#define AP_QID_CARD(_qid) (((_qid) >> 8) & 63) +#define AP_QID_QUEUE(_qid) ((_qid) & 255) + +/** + * struct ap_queue_status - Holds the AP queue status. + * @queue_empty: Shows if queue is empty + * @replies_waiting: Waiting replies + * @queue_full: Is 1 if the queue is full + * @irq_enabled: Shows if interrupts are enabled for the AP + * @response_code: Holds the 8 bit response code + * + * The ap queue status word is returned by all three AP functions + * (PQAP, NQAP and DQAP). There's a set of flags in the first + * byte, followed by a 1 byte response code. + */ +struct ap_queue_status { + unsigned int queue_empty : 1; + unsigned int replies_waiting : 1; + unsigned int queue_full : 1; + unsigned int _pad1 : 4; + unsigned int irq_enabled : 1; + unsigned int response_code : 8; + unsigned int _pad2 : 16; +}; + +/** + * ap_test_queue(): Test adjunct processor queue. + * @qid: The AP queue number + * @tbit: Test facilities bit + * @info: Pointer to queue descriptor + * + * Returns AP queue status structure. + */ +struct ap_queue_status ap_test_queue(ap_qid_t qid, + int tbit, + unsigned long *info); + +#endif /* _ASM_S390_AP_H_ */ diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 6dee598979e7..4fcfa8d4e0b5 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -165,20 +165,34 @@ static int ap_configuration_available(void) return test_facility(12); } +/** + * ap_apft_available(): Test if AP facilities test (APFT) + * facility is available. + * + * Returns 1 if APFT is is available. + */ +static int ap_apft_available(void) +{ + return test_facility(15); +} + /** * ap_test_queue(): Test adjunct processor queue. * @qid: The AP queue number + * @tbit: Test facilities bit * @info: Pointer to queue descriptor * * Returns AP queue status structure. */ -static inline struct ap_queue_status -ap_test_queue(ap_qid_t qid, unsigned long *info) +struct ap_queue_status ap_test_queue(ap_qid_t qid, + int tbit, + unsigned long *info) { - if (test_facility(15)) - qid |= 1UL << 23; /* set APFT T bit*/ + if (tbit) + qid |= 1UL << 23; /* set T bit*/ return ap_tapq(qid, info); } +EXPORT_SYMBOL(ap_test_queue); static inline int ap_query_configuration(void) { @@ -261,7 +275,7 @@ static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type, if (!ap_test_config_card_id(AP_QID_CARD(qid))) return -ENODEV; - status = ap_test_queue(qid, &info); + status = ap_test_queue(qid, ap_apft_available(), &info); switch (status.response_code) { case AP_RESPONSE_NORMAL: *queue_depth = (int)(info & 0xff); @@ -940,7 +954,9 @@ static int ap_select_domain(void) for (j = 0; j < AP_DEVICES; j++) { if (!ap_test_config_card_id(j)) continue; - status = ap_test_queue(AP_MKQID(j, i), NULL); + status = ap_test_queue(AP_MKQID(j, i), + ap_apft_available(), + NULL); if (status.response_code != AP_RESPONSE_NORMAL) continue; count++; diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 4dc7c88fb054..f07698d41f77 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -28,6 +28,7 @@ #include #include +#include #define AP_DEVICES 64 /* Number of AP devices. */ #define AP_DOMAINS 256 /* Number of AP domains. */ @@ -40,41 +41,6 @@ extern int ap_domain_index; extern spinlock_t ap_list_lock; extern struct list_head ap_card_list; -/** - * The ap_qid_t identifier of an ap queue. It contains a - * 6 bit card index and a 4 bit queue index (domain). - */ -typedef unsigned int ap_qid_t; - -#define AP_MKQID(_card, _queue) (((_card) & 63) << 8 | ((_queue) & 255)) -#define AP_QID_CARD(_qid) (((_qid) >> 8) & 63) -#define AP_QID_QUEUE(_qid) ((_qid) & 255) - -/** - * structy ap_queue_status - Holds the AP queue status. - * @queue_empty: Shows if queue is empty - * @replies_waiting: Waiting replies - * @queue_full: Is 1 if the queue is full - * @pad: A 4 bit pad - * @int_enabled: Shows if interrupts are enabled for the AP - * @response_code: Holds the 8 bit response code - * @pad2: A 16 bit pad - * - * The ap queue status word is returned by all three AP functions - * (PQAP, NQAP and DQAP). There's a set of flags in the first - * byte, followed by a 1 byte response code. - */ -struct ap_queue_status { - unsigned int queue_empty : 1; - unsigned int replies_waiting : 1; - unsigned int queue_full : 1; - unsigned int pad1 : 4; - unsigned int int_enabled : 1; - unsigned int response_code : 8; - unsigned int pad2 : 16; -} __packed; - - static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) { return (*ptr & (0x80000000u >> nr)) != 0; diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 0f1a5d02acb0..c906fb73a215 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -362,7 +362,7 @@ static enum ap_wait ap_sm_setirq_wait(struct ap_queue *aq) /* Get the status with TAPQ */ status = ap_tapq(aq->qid, NULL); - if (status.int_enabled == 1) { + if (status.irq_enabled == 1) { /* Irqs are now enabled */ aq->interrupt = AP_INTR_ENABLED; aq->state = (aq->queue_count > 0) ? -- cgit v1.2.3 From 050349b5b71df52c24989037bd6515cb54c3ef35 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 8 Nov 2016 11:54:28 +0100 Subject: s390/zcrypt: externalize AP config info query KVM has a need to fetch the crypto configuration information as it is returned by the PQAP(QCI) instruction. This patch introduces a new API ap_query_configuration() which provides this info in a handy way for the caller. Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ap.h | 26 ++++++++++++++++++++++++++ drivers/s390/crypto/ap_bus.c | 21 ++++++++++++++++----- drivers/s390/crypto/ap_bus.h | 11 ----------- 3 files changed, 42 insertions(+), 16 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index 0f3e6f38db55..8cefd6ed981d 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -61,4 +61,30 @@ struct ap_queue_status ap_test_queue(ap_qid_t qid, int tbit, unsigned long *info); +struct ap_config_info { + unsigned int apsc : 1; /* S bit */ + unsigned int apxa : 1; /* N bit */ + unsigned int qact : 1; /* C bit */ + unsigned int rc8a : 1; /* R bit */ + unsigned char _reserved1 : 4; + unsigned char _reserved2[3]; + unsigned char Na; /* max # of APs - 1 */ + unsigned char Nd; /* max # of Domains - 1 */ + unsigned char _reserved3[10]; + unsigned int apm[8]; /* AP ID mask */ + unsigned int aqm[8]; /* AP queue mask */ + unsigned int adm[8]; /* AP domain mask */ + unsigned char _reserved4[16]; +} __aligned(8); + +/* + * ap_query_configuration(): Fetch cryptographic config info + * + * Returns the ap configuration info fetched via PQAP(QCI). + * On success 0 is returned, on failure a negative errno + * is returned, e.g. if the PQAP(QCI) instruction is not + * available, the return value will be -EOPNOTSUPP. + */ +int ap_query_configuration(struct ap_config_info *info); + #endif /* _ASM_S390_AP_H_ */ diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 4fcfa8d4e0b5..5f0be2040272 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -194,12 +194,23 @@ struct ap_queue_status ap_test_queue(ap_qid_t qid, } EXPORT_SYMBOL(ap_test_queue); -static inline int ap_query_configuration(void) +/* + * ap_query_configuration(): Fetch cryptographic config info + * + * Returns the ap configuration info fetched via PQAP(QCI). + * On success 0 is returned, on failure a negative errno + * is returned, e.g. if the PQAP(QCI) instruction is not + * available, the return value will be -EOPNOTSUPP. + */ +int ap_query_configuration(struct ap_config_info *info) { - if (!ap_configuration) + if (!ap_configuration_available()) return -EOPNOTSUPP; - return ap_qci(ap_configuration); + if (!info) + return -EINVAL; + return ap_qci(info); } +EXPORT_SYMBOL(ap_query_configuration); /** * ap_init_configuration(): Allocate and query configuration array. @@ -212,7 +223,7 @@ static void ap_init_configuration(void) ap_configuration = kzalloc(sizeof(*ap_configuration), GFP_KERNEL); if (!ap_configuration) return; - if (ap_query_configuration() != 0) { + if (ap_query_configuration(ap_configuration) != 0) { kfree(ap_configuration); ap_configuration = NULL; return; @@ -1009,7 +1020,7 @@ static void ap_scan_bus(struct work_struct *unused) AP_DBF(DBF_DEBUG, "ap_scan_bus running\n"); - ap_query_configuration(); + ap_query_configuration(ap_configuration); if (ap_select_domain() != 0) goto out; diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index f07698d41f77..754cf2223cfb 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -204,17 +204,6 @@ struct ap_message { struct ap_message *); }; -struct ap_config_info { - unsigned int special_command:1; - unsigned int ap_extended:1; - unsigned char reserved1:6; - unsigned char reserved2[15]; - unsigned int apm[8]; /* AP ID mask */ - unsigned int aqm[8]; /* AP queue mask */ - unsigned int adm[8]; /* AP domain mask */ - unsigned char reserved4[16]; -} __packed; - /** * ap_init_message() - Initialize ap_message. * Initialize a message before using. Otherwise this might result in -- cgit v1.2.3 From 46fde9a9d204e322cf4fda391c30213633dcc17f Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 9 Nov 2016 15:00:23 +0100 Subject: s390/zcrypt: externalize AP queue interrupt control KVM has a need to control the interrupts on real and virtualized AP queue devices. This fix provides a new function to control the interrupt facilities of an AP queue device. Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ap.h | 36 ++++++++++++++++++++++++++++++++++++ drivers/s390/crypto/ap_asm.h | 9 ++++++--- drivers/s390/crypto/ap_queue.c | 24 +++++++++++++++++++++++- 3 files changed, 65 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index 8cefd6ed981d..c02f4aba88a6 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -87,4 +87,40 @@ struct ap_config_info { */ int ap_query_configuration(struct ap_config_info *info); +/* + * struct ap_qirq_ctrl - convenient struct for easy invocation + * of the ap_queue_irq_ctrl() function. This struct is passed + * as GR1 parameter to the PQAP(AQIC) instruction. For details + * please see the AR documentation. + */ +struct ap_qirq_ctrl { + unsigned int _res1 : 8; + unsigned int zone : 8; /* zone info */ + unsigned int ir : 1; /* ir flag: enable (1) or disable (0) irq */ + unsigned int _res2 : 4; + unsigned int gisc : 3; /* guest isc field */ + unsigned int _res3 : 6; + unsigned int gf : 2; /* gisa format */ + unsigned int _res4 : 1; + unsigned int gisa : 27; /* gisa origin */ + unsigned int _res5 : 1; + unsigned int isc : 3; /* irq sub class */ +}; + +/** + * ap_queue_irq_ctrl(): Control interruption on a AP queue. + * @qid: The AP queue number + * @qirqctrl: struct ap_qirq_ctrl, see above + * @ind: The notification indicator byte + * + * Returns AP queue status. + * + * Control interruption on the given AP queue. + * Just a simple wrapper function for the low level PQAP(AQIC) + * instruction available for other kernel modules. + */ +struct ap_queue_status ap_queue_irq_ctrl(ap_qid_t qid, + struct ap_qirq_ctrl qirqctrl, + void *ind); + #endif /* _ASM_S390_AP_H_ */ diff --git a/drivers/s390/crypto/ap_asm.h b/drivers/s390/crypto/ap_asm.h index 287b4ad0999e..cd350345b3d2 100644 --- a/drivers/s390/crypto/ap_asm.h +++ b/drivers/s390/crypto/ap_asm.h @@ -69,16 +69,19 @@ static inline struct ap_queue_status ap_rapq(ap_qid_t qid) } /** - * ap_aqic(): Enable interruption for a specific AP. + * ap_aqic(): Control interruption for a specific AP. * @qid: The AP queue number + * @qirqctrl: struct ap_qirq_ctrl (64 bit value) * @ind: The notification indicator byte * * Returns AP queue status. */ -static inline struct ap_queue_status ap_aqic(ap_qid_t qid, void *ind) +static inline struct ap_queue_status ap_aqic(ap_qid_t qid, + struct ap_qirq_ctrl qirqctrl, + void *ind) { register unsigned long reg0 asm ("0") = qid | (3UL << 24); - register unsigned long reg1_in asm ("1") = (8UL << 44) | AP_ISC; + register struct ap_qirq_ctrl reg1_in asm ("1") = qirqctrl; register struct ap_queue_status reg1_out asm ("1"); register void *reg2 asm ("2") = ind; diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index c906fb73a215..56b96edffd5b 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -15,6 +15,25 @@ #include "ap_bus.h" #include "ap_asm.h" +/** + * ap_queue_irq_ctrl(): Control interruption on a AP queue. + * @qirqctrl: struct ap_qirq_ctrl (64 bit value) + * @ind: The notification indicator byte + * + * Returns AP queue status. + * + * Control interruption on the given AP queue. + * Just a simple wrapper function for the low level PQAP(AQIC) + * instruction available for other kernel modules. + */ +struct ap_queue_status ap_queue_irq_ctrl(ap_qid_t qid, + struct ap_qirq_ctrl qirqctrl, + void *ind) +{ + return ap_aqic(qid, qirqctrl, ind); +} +EXPORT_SYMBOL(ap_queue_irq_ctrl); + /** * ap_queue_enable_interruption(): Enable interruption on an AP queue. * @qid: The AP queue number @@ -27,8 +46,11 @@ static int ap_queue_enable_interruption(struct ap_queue *aq, void *ind) { struct ap_queue_status status; + struct ap_qirq_ctrl qirqctrl = { 0 }; - status = ap_aqic(aq->qid, ind); + qirqctrl.ir = 1; + qirqctrl.isc = AP_ISC; + status = ap_aqic(aq->qid, qirqctrl, ind); switch (status.response_code) { case AP_RESPONSE_NORMAL: case AP_RESPONSE_OTHERWISE_CHANGED: -- cgit v1.2.3 From b3e5dc45fd1ec2aa1de6b80008f9295eb17e0659 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 16 Aug 2017 14:10:01 +0200 Subject: s390/mm: fix local TLB flushing vs. detach of an mm address space The local TLB flushing code keeps an additional mask in the mm.context, the cpu_attach_mask. At the time a global flush of an address space is done the cpu_attach_mask is copied to the mm_cpumask in order to avoid future global flushes in case the mm is used by a single CPU only after the flush. Trouble is that the reset of the mm_cpumask is racy against the detach of an mm address space by switch_mm. The current order is first the global TLB flush and then the copy of the cpu_attach_mask to the mm_cpumask. The order needs to be the other way around. Cc: Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu_context.h | 4 ++-- arch/s390/include/asm/tlbflush.h | 26 +++++--------------------- 2 files changed, 7 insertions(+), 23 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 72e9ca83a668..8823e35f69a9 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -103,7 +103,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, if (prev == next) return; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); - cpumask_set_cpu(cpu, mm_cpumask(next)); /* Clear old ASCE by loading the kernel ASCE. */ __ctl_load(S390_lowcore.kernel_asce, 1, 1); __ctl_load(S390_lowcore.kernel_asce, 7, 7); @@ -121,7 +120,7 @@ static inline void finish_arch_post_lock_switch(void) preempt_disable(); while (atomic_read(&mm->context.flush_count)) cpu_relax(); - + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); if (mm->context.flush_mm) __tlb_flush_mm(mm); preempt_enable(); @@ -136,6 +135,7 @@ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { switch_mm(prev, next, current); + cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); set_user_asce(next); } diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 4d759f8f4bc7..16fe2a3d9a03 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -48,23 +48,6 @@ static inline void __tlb_flush_global(void) * Flush TLB entries for a specific mm on all CPUs (in case gmap is used * this implicates multiple ASCEs!). */ -static inline void __tlb_flush_full(struct mm_struct *mm) -{ - preempt_disable(); - atomic_inc(&mm->context.flush_count); - if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { - /* Local TLB flush */ - __tlb_flush_local(); - } else { - /* Global TLB flush */ - __tlb_flush_global(); - /* Reset TLB flush mask */ - cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); - } - atomic_dec(&mm->context.flush_count); - preempt_enable(); -} - static inline void __tlb_flush_mm(struct mm_struct *mm) { unsigned long gmap_asce; @@ -76,16 +59,18 @@ static inline void __tlb_flush_mm(struct mm_struct *mm) */ preempt_disable(); atomic_inc(&mm->context.flush_count); + /* Reset TLB flush mask */ + cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); + barrier(); gmap_asce = READ_ONCE(mm->context.gmap_asce); if (MACHINE_HAS_IDTE && gmap_asce != -1UL) { if (gmap_asce) __tlb_flush_idte(gmap_asce); __tlb_flush_idte(mm->context.asce); } else { - __tlb_flush_full(mm); + /* Global TLB flush */ + __tlb_flush_global(); } - /* Reset TLB flush mask */ - cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask); atomic_dec(&mm->context.flush_count); preempt_enable(); } @@ -99,7 +84,6 @@ static inline void __tlb_flush_kernel(void) } #else #define __tlb_flush_global() __tlb_flush_local() -#define __tlb_flush_full(mm) __tlb_flush_local() /* * Flush TLB entries for a specific ASCE on all CPUs. -- cgit v1.2.3 From 60f07c8ec5fae06c23e9fd7bab67dabce92b3414 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 17 Aug 2017 08:15:16 +0200 Subject: s390/mm: fix race on mm->context.flush_mm The order in __tlb_flush_mm_lazy is to flush TLB first and then clear the mm->context.flush_mm bit. This can lead to missed flushes as the bit can be set anytime, the order needs to be the other way aronud. But this leads to a different race, __tlb_flush_mm_lazy may be called on two CPUs concurrently. If mm->context.flush_mm is cleared first then another CPU can bypass __tlb_flush_mm_lazy although the first CPU has not done the flush yet. In a virtualized environment the time until the flush is finally completed can be arbitrarily long. Add a spinlock to serialize __tlb_flush_mm_lazy and use the function in finish_arch_post_lock_switch as well. Cc: Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu.h | 2 ++ arch/s390/include/asm/mmu_context.h | 4 ++-- arch/s390/include/asm/tlbflush.h | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index bd6f30304518..3525fe6e7e4c 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -5,6 +5,7 @@ #include typedef struct { + spinlock_t lock; cpumask_t cpu_attach_mask; atomic_t flush_count; unsigned int flush_mm; @@ -27,6 +28,7 @@ typedef struct { } mm_context_t; #define INIT_MM_CONTEXT(name) \ + .context.lock = __SPIN_LOCK_UNLOCKED(name.context.lock), \ .context.pgtable_lock = \ __SPIN_LOCK_UNLOCKED(name.context.pgtable_lock), \ .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 8823e35f69a9..484efe8f4234 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -17,6 +17,7 @@ static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + spin_lock_init(&mm->context.lock); spin_lock_init(&mm->context.pgtable_lock); INIT_LIST_HEAD(&mm->context.pgtable_list); spin_lock_init(&mm->context.gmap_lock); @@ -121,8 +122,7 @@ static inline void finish_arch_post_lock_switch(void) while (atomic_read(&mm->context.flush_count)) cpu_relax(); cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - if (mm->context.flush_mm) - __tlb_flush_mm(mm); + __tlb_flush_mm_lazy(mm); preempt_enable(); } set_fs(current->thread.mm_segment); diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 16fe2a3d9a03..b08d5bc2666e 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -101,10 +101,12 @@ static inline void __tlb_flush_kernel(void) static inline void __tlb_flush_mm_lazy(struct mm_struct * mm) { + spin_lock(&mm->context.lock); if (mm->context.flush_mm) { - __tlb_flush_mm(mm); mm->context.flush_mm = 0; + __tlb_flush_mm(mm); } + spin_unlock(&mm->context.lock); } /* -- cgit v1.2.3 From f28a4b4ddf8e7181c6c0bc45603d65c4ab6b14f9 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 17 Aug 2017 18:17:49 +0200 Subject: s390/mm: use a single lock for the fields in mm_context_t The three locks 'lock', 'pgtable_lock' and 'gmap_lock' in the mm_context_t can be reduced to a single lock. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu.h | 5 ----- arch/s390/include/asm/mmu_context.h | 2 -- arch/s390/mm/gmap.c | 8 ++++---- arch/s390/mm/pgalloc.c | 16 ++++++++-------- 4 files changed, 12 insertions(+), 19 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 3525fe6e7e4c..3f46a6577b8d 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -9,9 +9,7 @@ typedef struct { cpumask_t cpu_attach_mask; atomic_t flush_count; unsigned int flush_mm; - spinlock_t pgtable_lock; struct list_head pgtable_list; - spinlock_t gmap_lock; struct list_head gmap_list; unsigned long gmap_asce; unsigned long asce; @@ -29,10 +27,7 @@ typedef struct { #define INIT_MM_CONTEXT(name) \ .context.lock = __SPIN_LOCK_UNLOCKED(name.context.lock), \ - .context.pgtable_lock = \ - __SPIN_LOCK_UNLOCKED(name.context.pgtable_lock), \ .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \ - .context.gmap_lock = __SPIN_LOCK_UNLOCKED(name.context.gmap_lock), \ .context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list), static inline int tprot(unsigned long addr) diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 484efe8f4234..3c9abedc323c 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -18,9 +18,7 @@ static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { spin_lock_init(&mm->context.lock); - spin_lock_init(&mm->context.pgtable_lock); INIT_LIST_HEAD(&mm->context.pgtable_list); - spin_lock_init(&mm->context.gmap_lock); INIT_LIST_HEAD(&mm->context.gmap_list); cpumask_clear(&mm->context.cpu_attach_mask); atomic_set(&mm->context.flush_count, 0); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 9e1494e3d849..2f66290c9b92 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -100,14 +100,14 @@ struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit) if (!gmap) return NULL; gmap->mm = mm; - spin_lock(&mm->context.gmap_lock); + spin_lock(&mm->context.lock); list_add_rcu(&gmap->list, &mm->context.gmap_list); if (list_is_singular(&mm->context.gmap_list)) gmap_asce = gmap->asce; else gmap_asce = -1UL; WRITE_ONCE(mm->context.gmap_asce, gmap_asce); - spin_unlock(&mm->context.gmap_lock); + spin_unlock(&mm->context.lock); return gmap; } EXPORT_SYMBOL_GPL(gmap_create); @@ -248,7 +248,7 @@ void gmap_remove(struct gmap *gmap) spin_unlock(&gmap->shadow_lock); } /* Remove gmap from the pre-mm list */ - spin_lock(&gmap->mm->context.gmap_lock); + spin_lock(&gmap->mm->context.lock); list_del_rcu(&gmap->list); if (list_empty(&gmap->mm->context.gmap_list)) gmap_asce = 0; @@ -258,7 +258,7 @@ void gmap_remove(struct gmap *gmap) else gmap_asce = -1UL; WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce); - spin_unlock(&gmap->mm->context.gmap_lock); + spin_unlock(&gmap->mm->context.lock); synchronize_rcu(); /* Put reference */ gmap_put(gmap); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 05b5b1b0a8d9..05f1f27e6708 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -188,7 +188,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) /* Try to get a fragment of a 4K page as a 2K page table */ if (!mm_alloc_pgste(mm)) { table = NULL; - spin_lock_bh(&mm->context.pgtable_lock); + spin_lock_bh(&mm->context.lock); if (!list_empty(&mm->context.pgtable_list)) { page = list_first_entry(&mm->context.pgtable_list, struct page, lru); @@ -203,7 +203,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) list_del(&page->lru); } } - spin_unlock_bh(&mm->context.pgtable_lock); + spin_unlock_bh(&mm->context.lock); if (table) return table; } @@ -227,9 +227,9 @@ unsigned long *page_table_alloc(struct mm_struct *mm) /* Return the first 2K fragment of the page */ atomic_set(&page->_mapcount, 1); clear_table(table, _PAGE_INVALID, PAGE_SIZE); - spin_lock_bh(&mm->context.pgtable_lock); + spin_lock_bh(&mm->context.lock); list_add(&page->lru, &mm->context.pgtable_list); - spin_unlock_bh(&mm->context.pgtable_lock); + spin_unlock_bh(&mm->context.lock); } return table; } @@ -243,13 +243,13 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) if (!mm_alloc_pgste(mm)) { /* Free 2K page table fragment of a 4K page */ bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.pgtable_lock); + spin_lock_bh(&mm->context.lock); mask = atomic_xor_bits(&page->_mapcount, 1U << bit); if (mask & 3) list_add(&page->lru, &mm->context.pgtable_list); else list_del(&page->lru); - spin_unlock_bh(&mm->context.pgtable_lock); + spin_unlock_bh(&mm->context.lock); if (mask != 0) return; } @@ -275,13 +275,13 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, return; } bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.pgtable_lock); + spin_lock_bh(&mm->context.lock); mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); if (mask & 3) list_add_tail(&page->lru, &mm->context.pgtable_list); else list_del(&page->lru); - spin_unlock_bh(&mm->context.pgtable_lock); + spin_unlock_bh(&mm->context.lock); table = (unsigned long *) (__pa(table) | (1U << bit)); tlb_remove_table(tlb, table); } -- cgit v1.2.3