From 3bd5371a4da68613fb3d4aaf961ed8244bcbd741 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:26:00 +0100 Subject: xen/events: Remove unused bind_evtchn_to_irq_lateeoi() Signed-off-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Link: https://lore.kernel.org/r/20201210194044.972064156@linutronix.de --- drivers/xen/events/events_base.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 6038c4c35db5..d6458e439c78 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1132,12 +1132,6 @@ int bind_evtchn_to_irq(evtchn_port_t evtchn) } EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); -int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn) -{ - return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip); -} -EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi); - static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; -- cgit v1.2.3 From 67473b8194bc3ecc42d60a4f5dc1ed479f28ed6e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:26:01 +0100 Subject: xen/events: Remove disfunct affinity spreading This function can only ever work when the event channels: - are already established - interrupts assigned to them - the affinity has been set by user space already because any newly set up event channel is forced to be bound to CPU0 and the affinity mask of the interrupt is forced to contain cpumask_of(0). As the CPU0 enforcement was in place _before_ this was implemented it's entirely unclear how that can ever have worked at all. Remove it as preparation for doing it proper. Signed-off-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Link: https://lore.kernel.org/r/20201210194045.065115500@linutronix.de --- drivers/xen/events/events_base.c | 9 --------- drivers/xen/evtchn.c | 34 +--------------------------------- 2 files changed, 1 insertion(+), 42 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index d6458e439c78..9cade1994785 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1696,15 +1696,6 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, return ret; } -/* To be called with desc->lock held. */ -int xen_set_affinity_evtchn(struct irq_desc *desc, unsigned int tcpu) -{ - struct irq_data *d = irq_desc_get_irq_data(desc); - - return set_affinity_irq(d, cpumask_of(tcpu), false); -} -EXPORT_SYMBOL_GPL(xen_set_affinity_evtchn); - static void enable_dynirq(struct irq_data *data) { evtchn_port_t evtchn = evtchn_from_irq(data->irq); diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 5dc016d68f83..a7a85719a8c8 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -421,36 +421,6 @@ static void evtchn_unbind_from_user(struct per_user_data *u, del_evtchn(u, evtchn); } -static DEFINE_PER_CPU(int, bind_last_selected_cpu); - -static void evtchn_bind_interdom_next_vcpu(evtchn_port_t evtchn) -{ - unsigned int selected_cpu, irq; - struct irq_desc *desc; - unsigned long flags; - - irq = irq_from_evtchn(evtchn); - desc = irq_to_desc(irq); - - if (!desc) - return; - - raw_spin_lock_irqsave(&desc->lock, flags); - selected_cpu = this_cpu_read(bind_last_selected_cpu); - selected_cpu = cpumask_next_and(selected_cpu, - desc->irq_common_data.affinity, cpu_online_mask); - - if (unlikely(selected_cpu >= nr_cpu_ids)) - selected_cpu = cpumask_first_and(desc->irq_common_data.affinity, - cpu_online_mask); - - this_cpu_write(bind_last_selected_cpu, selected_cpu); - - /* unmask expects irqs to be disabled */ - xen_set_affinity_evtchn(desc, selected_cpu); - raw_spin_unlock_irqrestore(&desc->lock, flags); -} - static long evtchn_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -508,10 +478,8 @@ static long evtchn_ioctl(struct file *file, break; rc = evtchn_bind_to_user(u, bind_interdomain.local_port); - if (rc == 0) { + if (rc == 0) rc = bind_interdomain.local_port; - evtchn_bind_interdom_next_vcpu(rc); - } break; } -- cgit v1.2.3 From 1ca1b4e2c0cbc88ce3939910ac36dca51d326fe4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:26:02 +0100 Subject: xen/events: Use immediate affinity setting There is absolutely no reason to mimic the x86 deferred affinity setting. This mechanism is required to handle the hardware induced issues of IO/APIC and MSI and is not in use when the interrupts are remapped. XEN does not need this and can simply change the affinity from the calling context. The core code invokes this with the interrupt descriptor lock held so it is fully serialized against any other operation. Mark the interrupts with IRQ_MOVE_PCNTXT to disable the deferred affinity setting. The conditional mask/unmask operation is already handled in xen_rebind_evtchn_to_cpu(). This makes XEN on x86 use the same mechanics as on e.g. ARM64 where deferred affinity setting is not required and not implemented and the code path in the ack functions is compiled out. Signed-off-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Link: https://lore.kernel.org/r/20201210194045.157601122@linutronix.de --- drivers/xen/events/events_base.c | 35 +++++++++-------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 9cade1994785..eaba42a89c8f 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -628,6 +628,11 @@ static void xen_irq_init(unsigned irq) info->refcnt = -1; set_info_for_irq(irq, info); + /* + * Interrupt affinity setting can be immediate. No point + * in delaying it until an interrupt is handled. + */ + irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); INIT_LIST_HEAD(&info->eoi_list); list_add_tail(&info->list, &xen_irq_list_head); @@ -739,18 +744,7 @@ static void eoi_pirq(struct irq_data *data) if (!VALID_EVTCHN(evtchn)) return; - if (unlikely(irqd_is_setaffinity_pending(data)) && - likely(!irqd_irq_disabled(data))) { - int masked = test_and_set_mask(evtchn); - - clear_evtchn(evtchn); - - irq_move_masked_irq(data); - - if (!masked) - unmask_evtchn(evtchn); - } else - clear_evtchn(evtchn); + clear_evtchn(evtchn); if (pirq_needs_eoi(data->irq)) { rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); @@ -1641,7 +1635,6 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) mutex_unlock(&irq_mapping_update_lock); bind_evtchn_to_cpu(evtchn, info->cpu); - /* This will be deferred until interrupt is processed */ irq_set_affinity(irq, cpumask_of(info->cpu)); /* Unmask the event channel. */ @@ -1688,8 +1681,9 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, bool force) { unsigned tcpu = cpumask_first_and(dest, cpu_online_mask); - int ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu); + int ret; + ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu); if (!ret) irq_data_update_effective_affinity(data, cpumask_of(tcpu)); @@ -1719,18 +1713,7 @@ static void ack_dynirq(struct irq_data *data) if (!VALID_EVTCHN(evtchn)) return; - if (unlikely(irqd_is_setaffinity_pending(data)) && - likely(!irqd_irq_disabled(data))) { - int masked = test_and_set_mask(evtchn); - - clear_evtchn(evtchn); - - irq_move_masked_irq(data); - - if (!masked) - unmask_evtchn(evtchn); - } else - clear_evtchn(evtchn); + clear_evtchn(evtchn); } static void mask_ack_dynirq(struct irq_data *data) -- cgit v1.2.3 From f7a6f994b4f0ee69c656dda3da11431d92d6b08f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:26:03 +0100 Subject: xen/events: Only force affinity mask for percpu interrupts All event channel setups bind the interrupt on CPU0 or the target CPU for percpu interrupts and overwrite the affinity mask with the corresponding cpumask. That does not make sense. The XEN implementation of irqchip::irq_set_affinity() already picks a single target CPU out of the affinity mask and the actual target is stored in the effective CPU mask, so destroying the user chosen affinity mask which might contain more than one CPU is wrong. Change the implementation so that the channel is bound to CPU0 at the XEN level and leave the affinity mask alone. At startup of the interrupt affinity will be assigned out of the affinity mask and the XEN binding will be updated. Only keep the enforcement for real percpu interrupts. On resume the overwrite is not required either because info->cpu and the affinity mask are still the same as at the time of suspend. Same for rebind_evtchn_irq(). This also prepares for proper interrupt spreading. Signed-off-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Link: https://lore.kernel.org/r/20201210194045.250321315@linutronix.de --- drivers/xen/events/events_base.c | 48 ++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 14 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index eaba42a89c8f..679b2cbd2de4 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -433,15 +433,20 @@ static bool pirq_needs_eoi_flag(unsigned irq) return info->u.pirq.flags & PIRQ_NEEDS_EOI; } -static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu) +static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu, + bool force_affinity) { int irq = get_evtchn_to_irq(evtchn); struct irq_info *info = info_for_irq(irq); BUG_ON(irq == -1); -#ifdef CONFIG_SMP - cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu)); -#endif + + if (IS_ENABLED(CONFIG_SMP) && force_affinity) { + cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu)); + cpumask_copy(irq_get_effective_affinity_mask(irq), + cpumask_of(cpu)); + } + xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu); info->cpu = cpu; @@ -788,7 +793,7 @@ static unsigned int __startup_pirq(unsigned int irq) goto err; info->evtchn = evtchn; - bind_evtchn_to_cpu(evtchn, 0); + bind_evtchn_to_cpu(evtchn, 0, false); rc = xen_evtchn_port_setup(evtchn); if (rc) @@ -1107,8 +1112,14 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip) irq = ret; goto out; } - /* New interdomain events are bound to VCPU 0. */ - bind_evtchn_to_cpu(evtchn, 0); + /* + * New interdomain events are initially bound to vCPU0 This + * is required to setup the event channel in the first + * place and also important for UP guests because the + * affinity setting is not invoked on them so nothing would + * bind the channel. + */ + bind_evtchn_to_cpu(evtchn, 0, false); } else { struct irq_info *info = info_for_irq(irq); WARN_ON(info == NULL || info->type != IRQT_EVTCHN); @@ -1156,7 +1167,11 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) irq = ret; goto out; } - bind_evtchn_to_cpu(evtchn, cpu); + /* + * Force the affinity mask to the target CPU so proc shows + * the correct target. + */ + bind_evtchn_to_cpu(evtchn, cpu, true); } else { struct irq_info *info = info_for_irq(irq); WARN_ON(info == NULL || info->type != IRQT_IPI); @@ -1269,7 +1284,11 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) goto out; } - bind_evtchn_to_cpu(evtchn, cpu); + /* + * Force the affinity mask for percpu interrupts so proc + * shows the correct target. + */ + bind_evtchn_to_cpu(evtchn, cpu, percpu); } else { struct irq_info *info = info_for_irq(irq); WARN_ON(info == NULL || info->type != IRQT_VIRQ); @@ -1634,8 +1653,7 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) mutex_unlock(&irq_mapping_update_lock); - bind_evtchn_to_cpu(evtchn, info->cpu); - irq_set_affinity(irq, cpumask_of(info->cpu)); + bind_evtchn_to_cpu(evtchn, info->cpu, false); /* Unmask the event channel. */ enable_irq(irq); @@ -1669,7 +1687,7 @@ static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu) * it, but don't do the xenlinux-level rebind in that case. */ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) - bind_evtchn_to_cpu(evtchn, tcpu); + bind_evtchn_to_cpu(evtchn, tcpu, false); if (!masked) unmask_evtchn(evtchn); @@ -1798,7 +1816,8 @@ static void restore_cpu_virqs(unsigned int cpu) /* Record the new mapping. */ (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq); - bind_evtchn_to_cpu(evtchn, cpu); + /* The affinity mask is still valid */ + bind_evtchn_to_cpu(evtchn, cpu, false); } } @@ -1823,7 +1842,8 @@ static void restore_cpu_ipis(unsigned int cpu) /* Record the new mapping. */ (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi); - bind_evtchn_to_cpu(evtchn, cpu); + /* The affinity mask is still valid */ + bind_evtchn_to_cpu(evtchn, cpu, false); } } -- cgit v1.2.3 From 62ebcda8a8dfa4aeaa3288020a082787910afebc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:26:04 +0100 Subject: xen/events: Reduce irq_info:: Spurious_cnt storage size To prepare for interrupt spreading reduce the storage size of irq_info::spurious_cnt to u8 so the required flag for the spreading logic will not increase the storage size. Protect the usage site against overruns. Signed-off-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Link: https://lore.kernel.org/r/20201210194045.360198201@linutronix.de --- drivers/xen/events/events_base.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 679b2cbd2de4..b352440963ee 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -95,7 +95,7 @@ struct irq_info { struct list_head list; struct list_head eoi_list; short refcnt; - short spurious_cnt; + u8 spurious_cnt; enum xen_irq_type type; /* type */ unsigned irq; evtchn_port_t evtchn; /* event channel */ @@ -528,8 +528,10 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) return; if (spurious) { - if ((1 << info->spurious_cnt) < (HZ << 2)) - info->spurious_cnt++; + if ((1 << info->spurious_cnt) < (HZ << 2)) { + if (info->spurious_cnt != 0xFF) + info->spurious_cnt++; + } if (info->spurious_cnt > 1) { delay = 1 << (info->spurious_cnt - 2); if (delay > HZ) -- cgit v1.2.3 From 88f0a9d066443118261adf7e049781476f09dac1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Dec 2020 20:26:05 +0100 Subject: xen/events: Implement irq distribution Keep track of the assignments of event channels to CPUs and select the online CPU with the least assigned channels in the affinity mask which is handed to irq_chip::irq_set_affinity() from the core code. Signed-off-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Stefano Stabellini Link: https://lore.kernel.org/r/20201210194045.457218278@linutronix.de --- drivers/xen/events/events_base.c | 76 +++++++++++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 8 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index b352440963ee..a8030332a191 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -96,6 +96,7 @@ struct irq_info { struct list_head eoi_list; short refcnt; u8 spurious_cnt; + u8 is_accounted; enum xen_irq_type type; /* type */ unsigned irq; evtchn_port_t evtchn; /* event channel */ @@ -161,6 +162,9 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1}; /* IRQ <-> IPI mapping */ static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1}; +/* Event channel distribution data */ +static atomic_t channels_on_cpu[NR_CPUS]; + static int **evtchn_to_irq; #ifdef CONFIG_X86 static unsigned long *pirq_eoi_map; @@ -257,6 +261,32 @@ static void set_info_for_irq(unsigned int irq, struct irq_info *info) irq_set_chip_data(irq, info); } +/* Per CPU channel accounting */ +static void channels_on_cpu_dec(struct irq_info *info) +{ + if (!info->is_accounted) + return; + + info->is_accounted = 0; + + if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids)) + return; + + WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0)); +} + +static void channels_on_cpu_inc(struct irq_info *info) +{ + if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids)) + return; + + if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1, + INT_MAX))) + return; + + info->is_accounted = 1; +} + /* Constructors for packed IRQ information. */ static int xen_irq_info_common_setup(struct irq_info *info, unsigned irq, @@ -339,6 +369,7 @@ static void xen_irq_info_cleanup(struct irq_info *info) { set_evtchn_to_irq(info->evtchn, -1); info->evtchn = 0; + channels_on_cpu_dec(info); } /* @@ -449,7 +480,9 @@ static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu, xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu); + channels_on_cpu_dec(info); info->cpu = cpu; + channels_on_cpu_inc(info); } /** @@ -622,11 +655,6 @@ static void xen_irq_init(unsigned irq) { struct irq_info *info; -#ifdef CONFIG_SMP - /* By default all event channels notify CPU#0. */ - cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0)); -#endif - info = kzalloc(sizeof(*info), GFP_KERNEL); if (info == NULL) panic("Unable to allocate metadata for IRQ%d\n", irq); @@ -1697,10 +1725,38 @@ static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu) return 0; } +/* + * Find the CPU within @dest mask which has the least number of channels + * assigned. This is not precise as the per cpu counts can be modified + * concurrently. + */ +static unsigned int select_target_cpu(const struct cpumask *dest) +{ + unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX; + + for_each_cpu_and(cpu, dest, cpu_online_mask) { + unsigned int curch = atomic_read(&channels_on_cpu[cpu]); + + if (curch < minch) { + minch = curch; + best_cpu = cpu; + } + } + + /* + * Catch the unlikely case that dest contains no online CPUs. Can't + * recurse. + */ + if (best_cpu == UINT_MAX) + return select_target_cpu(cpu_online_mask); + + return best_cpu; +} + static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, bool force) { - unsigned tcpu = cpumask_first_and(dest, cpu_online_mask); + unsigned int tcpu = select_target_cpu(dest); int ret; ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu); @@ -1928,8 +1984,12 @@ void xen_irq_resume(void) xen_evtchn_resume(); /* No IRQ <-> event-channel mappings. */ - list_for_each_entry(info, &xen_irq_list_head, list) - info->evtchn = 0; /* zap event-channel binding */ + list_for_each_entry(info, &xen_irq_list_head, list) { + /* Zap event-channel binding */ + info->evtchn = 0; + /* Adjust accounting */ + channels_on_cpu_dec(info); + } clear_evtchn_to_irq_all(); -- cgit v1.2.3