From 893fbfff976cd069f2e60c3b186dbe3f85504db2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 22 Jun 2018 10:52:49 +0100 Subject: irqchip/ls-scfg-msi: Fix MSI affinity handling The ls-scfs-msi driver is not dealing with the effective affinity as it should. Let's fix that, and make it clear that the effective affinity is restricted to a single CPU. Also prevent the driver from messing with the internals of the affinity setting infrastructure. Reported-by: Alexandre Belloni Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Alexandre Belloni Cc: Jason Cooper Cc: Yang Yingliang Cc: Sumit Garg Link: https://lkml.kernel.org/r/20180622095254.5906-3-marc.zyngier@arm.com --- drivers/irqchip/irq-ls-scfg-msi.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/irqchip') diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c index 1ec3bfe56693..c671b3212010 100644 --- a/drivers/irqchip/irq-ls-scfg-msi.c +++ b/drivers/irqchip/irq-ls-scfg-msi.c @@ -93,8 +93,12 @@ static void ls_scfg_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) msg->address_lo = lower_32_bits(msi_data->msiir_addr); msg->data = data->hwirq; - if (msi_affinity_flag) - msg->data |= cpumask_first(data->common->affinity); + if (msi_affinity_flag) { + const struct cpumask *mask; + + mask = irq_data_get_effective_affinity_mask(data); + msg->data |= cpumask_first(mask); + } iommu_dma_map_msi_msg(data->irq, msg); } @@ -121,7 +125,7 @@ static int ls_scfg_msi_set_affinity(struct irq_data *irq_data, return -EINVAL; } - cpumask_copy(irq_data->common->affinity, mask); + irq_data_update_effective_affinity(irq_data, cpumask_of(cpu)); return IRQ_SET_MASK_OK; } -- cgit v1.2.3 From cbaf45a6be497c272e80500e4fd9bccdf20d5050 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 22 Jun 2018 10:52:50 +0100 Subject: irqchip/gic-v2m: Fix SPI release on error path On failing to allocate the required SPIs, the actual number of interrupts should be freed and not its log2 value. Fixes: de337ee30142 ("irqchip/gic-v2m: Add PCI Multi-MSI support") Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Jason Cooper Cc: Alexandre Belloni Cc: Yang Yingliang Cc: Sumit Garg Link: https://lkml.kernel.org/r/20180622095254.5906-4-marc.zyngier@arm.com --- drivers/irqchip/irq-gic-v2m.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/irqchip') diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index 0f52d44b3f69..f5fe0100f9ff 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -199,7 +199,7 @@ static int gicv2m_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, fail: irq_domain_free_irqs_parent(domain, virq, nr_irqs); - gicv2m_unalloc_msi(v2m, hwirq, get_count_order(nr_irqs)); + gicv2m_unalloc_msi(v2m, hwirq, nr_irqs); return err; } -- cgit v1.2.3 From c1797b11a09c8323c92b074fd48b89a936c991d0 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 22 Jun 2018 10:52:51 +0100 Subject: irqchip/gic-v3-its: Don't bind LPI to unavailable NUMA node On a NUMA system, if an ITS is local to an offline node, the ITS driver may pick an offline CPU to bind the LPI. In this case, pick an online CPU (and the first one will do). But on some systems, binding an LPI to non-local node CPU may cause deadlock (see Cavium erratum 23144). In this case, just fail the activate and return an error code. Signed-off-by: Yang Yingliang Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Jason Cooper Cc: Alexandre Belloni Cc: Sumit Garg Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180622095254.5906-5-marc.zyngier@arm.com --- drivers/irqchip/irq-gic-v3-its.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/irqchip') diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 5377d7e2afba..cae53937feeb 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2310,7 +2310,14 @@ static int its_irq_domain_activate(struct irq_domain *domain, cpu_mask = cpumask_of_node(its_dev->its->numa_node); /* Bind the LPI to the first possible CPU */ - cpu = cpumask_first(cpu_mask); + cpu = cpumask_first_and(cpu_mask, cpu_online_mask); + if (cpu >= nr_cpu_ids) { + if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) + return -EINVAL; + + cpu = cpumask_first(cpu_online_mask); + } + its_dev->event_map.col_map[event] = cpu; irq_data_update_effective_affinity(d, cpumask_of(cpu)); -- cgit v1.2.3 From 83559b47cdc4d396fc1187a13b527d01b55e0fe6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 22 Jun 2018 10:52:52 +0100 Subject: irqchip/gic-v3-its: Only emit SYNC if targetting a valid collection It is possible, under obscure circumstances, to convince the ITS driver to emit a SYNC operation that targets a collection that is not bound to any redistributor (and the target_address field is zero) because the corresponding CPU has not been seen yet (the system has been booted with max_cpus="something small"). If the ITS is using the linear CPU number as the target, this is not a big deal, as we just end-up issuing a SYNC to CPU0. But if the ITS requires the physical address of the redistributor (with GITS_TYPER.PTA==1), we end-up asking the ITS to write to the physical address zero, which is not exactly a good idea (there has been report of the ITS locking up). This should of course never happen, but hey, this is SW... In order to avoid the above disaster, let's track which collections have been actually initialized, and let's not generate a SYNC if the collection hasn't been properly bound to a redistributor. Take this opportunity to spit our a warning, in the hope that someone may report the issue if it arrises again. Reported-by: Yang Yingliang Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Jason Cooper Cc: Alexandre Belloni Cc: Sumit Garg Link: https://lkml.kernel.org/r/20180622095254.5906-6-marc.zyngier@arm.com --- drivers/irqchip/irq-gic-v3-its.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'drivers/irqchip') diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index cae53937feeb..fcfc96f8e0de 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -182,6 +182,14 @@ static struct its_collection *dev_event_to_col(struct its_device *its_dev, return its->collections + its_dev->event_map.col_map[event]; } +static struct its_collection *valid_col(struct its_collection *col) +{ + if (WARN_ON_ONCE(col->target_address & GENMASK_ULL(0, 15))) + return NULL; + + return col; +} + /* * ITS command descriptors - parameters to be encoded in a command * block. @@ -439,7 +447,7 @@ static struct its_collection *its_build_mapti_cmd(struct its_node *its, its_fixup_cmd(cmd); - return col; + return valid_col(col); } static struct its_collection *its_build_movi_cmd(struct its_node *its, @@ -458,7 +466,7 @@ static struct its_collection *its_build_movi_cmd(struct its_node *its, its_fixup_cmd(cmd); - return col; + return valid_col(col); } static struct its_collection *its_build_discard_cmd(struct its_node *its, @@ -476,7 +484,7 @@ static struct its_collection *its_build_discard_cmd(struct its_node *its, its_fixup_cmd(cmd); - return col; + return valid_col(col); } static struct its_collection *its_build_inv_cmd(struct its_node *its, @@ -494,7 +502,7 @@ static struct its_collection *its_build_inv_cmd(struct its_node *its, its_fixup_cmd(cmd); - return col; + return valid_col(col); } static struct its_collection *its_build_int_cmd(struct its_node *its, @@ -512,7 +520,7 @@ static struct its_collection *its_build_int_cmd(struct its_node *its, its_fixup_cmd(cmd); - return col; + return valid_col(col); } static struct its_collection *its_build_clear_cmd(struct its_node *its, @@ -530,7 +538,7 @@ static struct its_collection *its_build_clear_cmd(struct its_node *its, its_fixup_cmd(cmd); - return col; + return valid_col(col); } static struct its_collection *its_build_invall_cmd(struct its_node *its, @@ -1824,11 +1832,16 @@ static int its_alloc_tables(struct its_node *its) static int its_alloc_collections(struct its_node *its) { + int i; + its->collections = kcalloc(nr_cpu_ids, sizeof(*its->collections), GFP_KERNEL); if (!its->collections) return -ENOMEM; + for (i = 0; i < nr_cpu_ids; i++) + its->collections[i].target_address = ~0ULL; + return 0; } -- cgit v1.2.3 From 205e065d91d72e6afad112ea84f0ca60b30bf5ab Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 22 Jun 2018 10:52:53 +0100 Subject: irqchip/gic-v3-its: Only emit VSYNC if targetting a valid collection Similarily to the SYNC operation, it must be verified that the VPE targetted by a VLPI is backed by a valid collection in the GIC driver data structures. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Jason Cooper Cc: Alexandre Belloni Cc: Yang Yingliang Cc: Sumit Garg Link: https://lkml.kernel.org/r/20180622095254.5906-7-marc.zyngier@arm.com --- drivers/irqchip/irq-gic-v3-its.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'drivers/irqchip') diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index fcfc96f8e0de..0269ffb93f6e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -190,6 +190,14 @@ static struct its_collection *valid_col(struct its_collection *col) return col; } +static struct its_vpe *valid_vpe(struct its_node *its, struct its_vpe *vpe) +{ + if (valid_col(its->collections + vpe->col_idx)) + return vpe; + + return NULL; +} + /* * ITS command descriptors - parameters to be encoded in a command * block. @@ -562,7 +570,7 @@ static struct its_vpe *its_build_vinvall_cmd(struct its_node *its, its_fixup_cmd(cmd); - return desc->its_vinvall_cmd.vpe; + return valid_vpe(its, desc->its_vinvall_cmd.vpe); } static struct its_vpe *its_build_vmapp_cmd(struct its_node *its, @@ -584,7 +592,7 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its, its_fixup_cmd(cmd); - return desc->its_vmapp_cmd.vpe; + return valid_vpe(its, desc->its_vmapp_cmd.vpe); } static struct its_vpe *its_build_vmapti_cmd(struct its_node *its, @@ -607,7 +615,7 @@ static struct its_vpe *its_build_vmapti_cmd(struct its_node *its, its_fixup_cmd(cmd); - return desc->its_vmapti_cmd.vpe; + return valid_vpe(its, desc->its_vmapti_cmd.vpe); } static struct its_vpe *its_build_vmovi_cmd(struct its_node *its, @@ -630,7 +638,7 @@ static struct its_vpe *its_build_vmovi_cmd(struct its_node *its, its_fixup_cmd(cmd); - return desc->its_vmovi_cmd.vpe; + return valid_vpe(its, desc->its_vmovi_cmd.vpe); } static struct its_vpe *its_build_vmovp_cmd(struct its_node *its, @@ -648,7 +656,7 @@ static struct its_vpe *its_build_vmovp_cmd(struct its_node *its, its_fixup_cmd(cmd); - return desc->its_vmovp_cmd.vpe; + return valid_vpe(its, desc->its_vmovp_cmd.vpe); } static u64 its_cmd_ptr_to_offset(struct its_node *its, -- cgit v1.2.3 From 82f499c8811149069ec958b72a86643a7a289b25 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 22 Jun 2018 10:52:54 +0100 Subject: irqchip/gic-v3-its: Fix reprogramming of redistributors on CPU hotplug Enabling LPIs was made a lot stricter recently, by checking that they are disabled before enabling them. By doing so, the CPU hotplug case was missed altogether, which leaves LPIs enabled on hotplug off (expecting the CPU to eventually come back), and won't write a different value anyway on hotplug on. So skip that check if that particular case is detected Fixes: 6eb486b66a30 ("irqchip/gic-v3: Ensure GICR_CTLR.EnableLPI=0 is observed before enabling") Reported-by: Sumit Garg Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Sumit Garg Cc: Jason Cooper Cc: Alexandre Belloni Cc: Yang Yingliang Link: https://lkml.kernel.org/r/20180622095254.5906-8-marc.zyngier@arm.com --- drivers/irqchip/irq-gic-v3-its.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/irqchip') diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 0269ffb93f6e..d7842d312d3e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3427,6 +3427,16 @@ static int redist_disable_lpis(void) u64 timeout = USEC_PER_SEC; u64 val; + /* + * If coming via a CPU hotplug event, we don't need to disable + * LPIs before trying to re-enable them. They are already + * configured and all is well in the world. Detect this case + * by checking the allocation of the pending table for the + * current CPU. + */ + if (gic_data_rdist()->pend_page) + return 0; + if (!gic_rdists_supports_plpis()) { pr_info("CPU%d: LPIs not supported\n", smp_processor_id()); return -ENXIO; -- cgit v1.2.3 From 880cb3cddd164128522146d1c087fbd1811ed0cc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 27 May 2018 16:14:15 +0100 Subject: irqchip/gic-v3-its: Refactor LPI allocator Our current LPI allocator relies on a bitmap, each bit representing a chunk of 32 LPIs, meaning that each device gets allocated LPIs in multiple of 32. It served us well so far, but new use cases now require much more finer grain allocations, down the the individual LPI. Given the size of the IntID space (up to 32bit), it isn't practical to continue using a bitmap, so let's use a different data structure altogether. We switch to a list, where each element represent a contiguous range of LPIs. On allocation, we simply grab the first group big enough to satisfy the allocation, and substract what we need from it. If the group becomes empty, we just remove it. On freeing interrupts, we insert a new group of interrupt in the list, sort it and fuse the adjacent groups. This makes freeing interrupt much more expensive than allocating them (an unusual behaviour), but that's fine as long as we consider that freeing interrupts is an extremely rare event. We still allocate interrupts in blocks of 32 for the time being, but subsequent patches will relax this. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 191 ++++++++++++++++++++++++++------------- 1 file changed, 129 insertions(+), 62 deletions(-) (limited to 'drivers/irqchip') diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index d7842d312d3e..9084a7e5a4b2 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include #include #include @@ -1421,112 +1423,177 @@ static struct irq_chip its_irq_chip = { .irq_set_vcpu_affinity = its_irq_set_vcpu_affinity, }; + /* * How we allocate LPIs: * - * The GIC has id_bits bits for interrupt identifiers. From there, we - * must subtract 8192 which are reserved for SGIs/PPIs/SPIs. Then, as - * we allocate LPIs by chunks of 32, we can shift the whole thing by 5 - * bits to the right. + * lpi_range_list contains ranges of LPIs that are to available to + * allocate from. To allocate LPIs, just pick the first range that + * fits the required allocation, and reduce it by the required + * amount. Once empty, remove the range from the list. + * + * To free a range of LPIs, add a free range to the list, sort it and + * merge the result if the new range happens to be adjacent to an + * already free block. * - * This gives us (((1UL << id_bits) - 8192) >> 5) possible allocations. + * The consequence of the above is that allocation is cost is low, but + * freeing is expensive. We assumes that freeing rarely occurs. + */ + +/* + * Compatibility defines until we fully refactor the allocator */ #define IRQS_PER_CHUNK_SHIFT 5 #define IRQS_PER_CHUNK (1UL << IRQS_PER_CHUNK_SHIFT) #define ITS_MAX_LPI_NRBITS 16 /* 64K LPIs */ -static unsigned long *lpi_bitmap; -static u32 lpi_chunks; -static DEFINE_SPINLOCK(lpi_lock); +static DEFINE_MUTEX(lpi_range_lock); +static LIST_HEAD(lpi_range_list); -static int its_lpi_to_chunk(int lpi) +struct lpi_range { + struct list_head entry; + u32 base_id; + u32 span; +}; + +static struct lpi_range *mk_lpi_range(u32 base, u32 span) { - return (lpi - 8192) >> IRQS_PER_CHUNK_SHIFT; + struct lpi_range *range; + + range = kzalloc(sizeof(*range), GFP_KERNEL); + if (range) { + INIT_LIST_HEAD(&range->entry); + range->base_id = base; + range->span = span; + } + + return range; } -static int its_chunk_to_lpi(int chunk) +static int lpi_range_cmp(void *priv, struct list_head *a, struct list_head *b) { - return (chunk << IRQS_PER_CHUNK_SHIFT) + 8192; + struct lpi_range *ra, *rb; + + ra = container_of(a, struct lpi_range, entry); + rb = container_of(b, struct lpi_range, entry); + + return rb->base_id - ra->base_id; } -static int __init its_lpi_init(u32 id_bits) +static void merge_lpi_ranges(void) { - lpi_chunks = its_lpi_to_chunk(1UL << id_bits); + struct lpi_range *range, *tmp; - lpi_bitmap = kcalloc(BITS_TO_LONGS(lpi_chunks), sizeof(long), - GFP_KERNEL); - if (!lpi_bitmap) { - lpi_chunks = 0; - return -ENOMEM; + list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) { + if (!list_is_last(&range->entry, &lpi_range_list) && + (tmp->base_id == (range->base_id + range->span))) { + tmp->base_id = range->base_id; + tmp->span += range->span; + list_del(&range->entry); + kfree(range); + } } +} - pr_info("ITS: Allocated %d chunks for LPIs\n", (int)lpi_chunks); - return 0; +static int alloc_lpi_range(u32 nr_lpis, u32 *base) +{ + struct lpi_range *range, *tmp; + int err = -ENOSPC; + + mutex_lock(&lpi_range_lock); + + list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) { + if (range->span >= nr_lpis) { + *base = range->base_id; + range->base_id += nr_lpis; + range->span -= nr_lpis; + + if (range->span == 0) { + list_del(&range->entry); + kfree(range); + } + + err = 0; + break; + } + } + + mutex_unlock(&lpi_range_lock); + + pr_debug("ITS: alloc %u:%u\n", *base, nr_lpis); + return err; } -static unsigned long *its_lpi_alloc_chunks(int nr_irqs, int *base, int *nr_ids) +static int free_lpi_range(u32 base, u32 nr_lpis) { - unsigned long *bitmap = NULL; - int chunk_id; - int nr_chunks; - int i; + struct lpi_range *new; + int err = 0; - nr_chunks = DIV_ROUND_UP(nr_irqs, IRQS_PER_CHUNK); + mutex_lock(&lpi_range_lock); - spin_lock(&lpi_lock); + new = mk_lpi_range(base, nr_lpis); + if (!new) { + err = -ENOMEM; + goto out; + } + + list_add(&new->entry, &lpi_range_list); + list_sort(NULL, &lpi_range_list, lpi_range_cmp); + merge_lpi_ranges(); +out: + mutex_unlock(&lpi_range_lock); + return err; +} + +static int __init its_lpi_init(u32 id_bits) +{ + u32 lpis = (1UL << id_bits) - 8192; + int err; + + /* + * Initializing the allocator is just the same as freeing the + * full range of LPIs. + */ + err = free_lpi_range(8192, lpis); + pr_debug("ITS: Allocator initialized for %u LPIs\n", lpis); + return err; +} + +static unsigned long *its_lpi_alloc_chunks(int nr_irqs, u32 *base, int *nr_ids) +{ + unsigned long *bitmap = NULL; + int err = 0; + int nr_lpis; + + nr_lpis = round_up(nr_irqs, IRQS_PER_CHUNK); do { - chunk_id = bitmap_find_next_zero_area(lpi_bitmap, lpi_chunks, - 0, nr_chunks, 0); - if (chunk_id < lpi_chunks) + err = alloc_lpi_range(nr_lpis, base); + if (!err) break; - nr_chunks--; - } while (nr_chunks > 0); + nr_lpis -= IRQS_PER_CHUNK; + } while (nr_lpis > 0); - if (!nr_chunks) + if (err) goto out; - bitmap = kcalloc(BITS_TO_LONGS(nr_chunks * IRQS_PER_CHUNK), - sizeof(long), - GFP_ATOMIC); + bitmap = kcalloc(BITS_TO_LONGS(nr_lpis), sizeof (long), GFP_ATOMIC); if (!bitmap) goto out; - for (i = 0; i < nr_chunks; i++) - set_bit(chunk_id + i, lpi_bitmap); - - *base = its_chunk_to_lpi(chunk_id); - *nr_ids = nr_chunks * IRQS_PER_CHUNK; + *nr_ids = nr_lpis; out: - spin_unlock(&lpi_lock); - if (!bitmap) *base = *nr_ids = 0; return bitmap; } -static void its_lpi_free_chunks(unsigned long *bitmap, int base, int nr_ids) +static void its_lpi_free_chunks(unsigned long *bitmap, u32 base, u32 nr_ids) { - int lpi; - - spin_lock(&lpi_lock); - - for (lpi = base; lpi < (base + nr_ids); lpi += IRQS_PER_CHUNK) { - int chunk = its_lpi_to_chunk(lpi); - - BUG_ON(chunk > lpi_chunks); - if (test_bit(chunk, lpi_bitmap)) { - clear_bit(chunk, lpi_bitmap); - } else { - pr_err("Bad LPI chunk %d\n", chunk); - } - } - - spin_unlock(&lpi_lock); - + WARN_ON(free_lpi_range(base, nr_ids)); kfree(bitmap); } -- cgit v1.2.3 From fe8e93504ce870cb60b5dca97d13d861a53d7353 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 27 May 2018 16:15:22 +0100 Subject: irqchip/gic-v3-its: Use full range of LPIs As we used to represent the LPI range using a bitmap, we were reducing the number of LPIs to at most 64k in order to preserve memory. With our new allocator, there is no such need, as dealing with 2^16 or 2^32 LPIs takes the same amount of memory. So let's use the number of IntID bits reported by the GIC instead of an arbitrary limit. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/irqchip') diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 9084a7e5a4b2..20244006cd82 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1445,7 +1445,6 @@ static struct irq_chip its_irq_chip = { */ #define IRQS_PER_CHUNK_SHIFT 5 #define IRQS_PER_CHUNK (1UL << IRQS_PER_CHUNK_SHIFT) -#define ITS_MAX_LPI_NRBITS 16 /* 64K LPIs */ static DEFINE_MUTEX(lpi_range_lock); static LIST_HEAD(lpi_range_list); @@ -1626,7 +1625,7 @@ static int __init its_alloc_lpi_tables(void) { phys_addr_t paddr; - lpi_id_bits = min_t(u32, gic_rdists->id_bits, ITS_MAX_LPI_NRBITS); + lpi_id_bits = gic_rdists->id_bits; gic_rdists->prop_page = its_allocate_prop_table(GFP_NOWAIT); if (!gic_rdists->prop_page) { pr_err("Failed to allocate PROPBASE\n"); -- cgit v1.2.3 From 147c8f376e5526ebfaf2827734916414db52a3c4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 27 May 2018 16:39:55 +0100 Subject: irqchip/gic-v3-its: Move minimum LPI requirements to individual busses At the moment, the core ITS driver imposes the allocation to be in chunks of 32. As we want to relax this on a per bus basis, let's move the the the allocation constraints to each bus. No functionnal change. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c | 3 +++ drivers/irqchip/irq-gic-v3-its-pci-msi.c | 6 ++++-- drivers/irqchip/irq-gic-v3-its-platform-msi.c | 2 ++ drivers/irqchip/irq-gic-v3-its.c | 11 +++++++---- 4 files changed, 16 insertions(+), 6 deletions(-) (limited to 'drivers/irqchip') diff --git a/drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c b/drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c index 4eca5c763766..606efa64adff 100644 --- a/drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c +++ b/drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c @@ -45,6 +45,9 @@ static int its_fsl_mc_msi_prepare(struct irq_domain *msi_domain, */ info->scratchpad[0].ul = mc_bus_dev->icid; msi_info = msi_get_domain_info(msi_domain->parent); + + /* Allocate at least 32 MSIs, and always as a power of 2 */ + nvec = max_t(int, 32, roundup_pow_of_two(nvec)); return msi_info->ops->msi_prepare(msi_domain->parent, dev, nvec, info); } diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c index 25a98de5cfb2..75c3cafabc6a 100644 --- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c +++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c @@ -86,8 +86,10 @@ static int its_pci_msi_prepare(struct irq_domain *domain, struct device *dev, /* ITS specific DeviceID, as the core ITS ignores dev. */ info->scratchpad[0].ul = pci_msi_domain_get_msi_rid(domain, pdev); - return msi_info->ops->msi_prepare(domain->parent, - dev, max(nvec, alias_count), info); + /* Allocate at least 32 MSIs, and always as a power of 2 */ + nvec = max(nvec, alias_count); + nvec = max_t(int, 32, roundup_pow_of_two(nvec)); + return msi_info->ops->msi_prepare(domain->parent, dev, nvec, info); } static struct msi_domain_ops its_pci_msi_ops = { diff --git a/drivers/irqchip/irq-gic-v3-its-platform-msi.c b/drivers/irqchip/irq-gic-v3-its-platform-msi.c index 8881a053c173..7b8e87b493fe 100644 --- a/drivers/irqchip/irq-gic-v3-its-platform-msi.c +++ b/drivers/irqchip/irq-gic-v3-its-platform-msi.c @@ -73,6 +73,8 @@ static int its_pmsi_prepare(struct irq_domain *domain, struct device *dev, /* ITS specific DeviceID, as the core ITS ignores dev. */ info->scratchpad[0].ul = dev_id; + /* Allocate at least 32 MSIs, and always as a power of 2 */ + nvec = max_t(int, 32, roundup_pow_of_two(nvec)); return msi_info->ops->msi_prepare(domain->parent, dev, nvec, info); } diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 20244006cd82..03ac8d05279d 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2200,12 +2200,15 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, if (!its_alloc_device_table(its, dev_id)) return NULL; + if (WARN_ON(!is_power_of_2(nvecs))) + nvecs = roundup_pow_of_two(nvecs); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); /* - * We allocate at least one chunk worth of LPIs bet device, - * and thus that many ITEs. The device may require less though. + * Even if the device wants a single LPI, the ITT must be + * sized as a power of two (and you need at least one bit...). */ - nr_ites = max(IRQS_PER_CHUNK, roundup_pow_of_two(nvecs)); + nr_ites = max(2, nvecs); sz = nr_ites * its->ite_size; sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1; itt = kzalloc(sz, GFP_KERNEL); @@ -2861,7 +2864,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq BUG_ON(!vm); - bitmap = its_lpi_alloc_chunks(nr_irqs, &base, &nr_ids); + bitmap = its_lpi_alloc_chunks(roundup_pow_of_two(nr_irqs), &base, &nr_ids); if (!bitmap) return -ENOMEM; -- cgit v1.2.3 From 38dd7c494cf604879e187e9b56690d25f876cf69 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 27 May 2018 17:03:03 +0100 Subject: irqchip/gic-v3-its: Drop chunk allocation compatibility The chunk allocation system is now officially dead, so let's remove it. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 41 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 25 deletions(-) (limited to 'drivers/irqchip') diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 03ac8d05279d..9c5b85577053 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1440,12 +1440,6 @@ static struct irq_chip its_irq_chip = { * freeing is expensive. We assumes that freeing rarely occurs. */ -/* - * Compatibility defines until we fully refactor the allocator - */ -#define IRQS_PER_CHUNK_SHIFT 5 -#define IRQS_PER_CHUNK (1UL << IRQS_PER_CHUNK_SHIFT) - static DEFINE_MUTEX(lpi_range_lock); static LIST_HEAD(lpi_range_list); @@ -1558,30 +1552,27 @@ static int __init its_lpi_init(u32 id_bits) return err; } -static unsigned long *its_lpi_alloc_chunks(int nr_irqs, u32 *base, int *nr_ids) +static unsigned long *its_lpi_alloc(int nr_irqs, u32 *base, int *nr_ids) { unsigned long *bitmap = NULL; int err = 0; - int nr_lpis; - - nr_lpis = round_up(nr_irqs, IRQS_PER_CHUNK); do { - err = alloc_lpi_range(nr_lpis, base); + err = alloc_lpi_range(nr_irqs, base); if (!err) break; - nr_lpis -= IRQS_PER_CHUNK; - } while (nr_lpis > 0); + nr_irqs /= 2; + } while (nr_irqs > 0); if (err) goto out; - bitmap = kcalloc(BITS_TO_LONGS(nr_lpis), sizeof (long), GFP_ATOMIC); + bitmap = kcalloc(BITS_TO_LONGS(nr_irqs), sizeof (long), GFP_ATOMIC); if (!bitmap) goto out; - *nr_ids = nr_lpis; + *nr_ids = nr_irqs; out: if (!bitmap) @@ -1590,7 +1581,7 @@ out: return bitmap; } -static void its_lpi_free_chunks(unsigned long *bitmap, u32 base, u32 nr_ids) +static void its_lpi_free(unsigned long *bitmap, u32 base, u32 nr_ids) { WARN_ON(free_lpi_range(base, nr_ids)); kfree(bitmap); @@ -2213,7 +2204,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1; itt = kzalloc(sz, GFP_KERNEL); if (alloc_lpis) { - lpi_map = its_lpi_alloc_chunks(nvecs, &lpi_base, &nr_lpis); + lpi_map = its_lpi_alloc(nvecs, &lpi_base, &nr_lpis); if (lpi_map) col_map = kcalloc(nr_lpis, sizeof(*col_map), GFP_KERNEL); @@ -2448,9 +2439,9 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, /* If all interrupts have been freed, start mopping the floor */ if (bitmap_empty(its_dev->event_map.lpi_map, its_dev->event_map.nr_lpis)) { - its_lpi_free_chunks(its_dev->event_map.lpi_map, - its_dev->event_map.lpi_base, - its_dev->event_map.nr_lpis); + its_lpi_free(its_dev->event_map.lpi_map, + its_dev->event_map.lpi_base, + its_dev->event_map.nr_lpis); kfree(its_dev->event_map.col_map); /* Unmap device/itt */ @@ -2849,7 +2840,7 @@ static void its_vpe_irq_domain_free(struct irq_domain *domain, } if (bitmap_empty(vm->db_bitmap, vm->nr_db_lpis)) { - its_lpi_free_chunks(vm->db_bitmap, vm->db_lpi_base, vm->nr_db_lpis); + its_lpi_free(vm->db_bitmap, vm->db_lpi_base, vm->nr_db_lpis); its_free_prop_table(vm->vprop_page); } } @@ -2864,18 +2855,18 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq BUG_ON(!vm); - bitmap = its_lpi_alloc_chunks(roundup_pow_of_two(nr_irqs), &base, &nr_ids); + bitmap = its_lpi_alloc(roundup_pow_of_two(nr_irqs), &base, &nr_ids); if (!bitmap) return -ENOMEM; if (nr_ids < nr_irqs) { - its_lpi_free_chunks(bitmap, base, nr_ids); + its_lpi_free(bitmap, base, nr_ids); return -ENOMEM; } vprop_page = its_allocate_prop_table(GFP_KERNEL); if (!vprop_page) { - its_lpi_free_chunks(bitmap, base, nr_ids); + its_lpi_free(bitmap, base, nr_ids); return -ENOMEM; } @@ -2902,7 +2893,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq if (i > 0) its_vpe_irq_domain_free(domain, virq, i - 1); - its_lpi_free_chunks(bitmap, base, nr_ids); + its_lpi_free(bitmap, base, nr_ids); its_free_prop_table(vprop_page); } -- cgit v1.2.3 From a4f9edb29d9c19f9f8dcd2df7ddfe4eb7ad58996 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 30 May 2018 17:29:52 +0100 Subject: irqchip/gic-v3: Expose GICD_TYPER in the rdist structure Instead of exposing the GIC distributor IntID field in the rdist structure that is passed to the ITS, let's replace it with a copy of the whole GICD_TYPER register. We are going to need some of this information at a later time. No functionnal change. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 2 +- drivers/irqchip/irq-gic-v3.c | 4 ++-- include/linux/irqchip/arm-gic-v3.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/irqchip') diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 9c5b85577053..efe6d1a6c32e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1616,7 +1616,7 @@ static int __init its_alloc_lpi_tables(void) { phys_addr_t paddr; - lpi_id_bits = gic_rdists->id_bits; + lpi_id_bits = GICD_TYPER_ID_BITS(gic_rdists->gicd_typer); gic_rdists->prop_page = its_allocate_prop_table(GFP_NOWAIT); if (!gic_rdists->prop_page) { pr_err("Failed to allocate PROPBASE\n"); diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 76ea56d779a1..e214181b77b7 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -877,7 +877,7 @@ static struct irq_chip gic_eoimode1_chip = { .flags = IRQCHIP_SET_TYPE_MASKED, }; -#define GIC_ID_NR (1U << gic_data.rdists.id_bits) +#define GIC_ID_NR (1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer)) static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) @@ -1091,7 +1091,7 @@ static int __init gic_init_bases(void __iomem *dist_base, * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI) */ typer = readl_relaxed(gic_data.dist_base + GICD_TYPER); - gic_data.rdists.id_bits = GICD_TYPER_ID_BITS(typer); + gic_data.rdists.gicd_typer = typer; gic_irqs = GICD_TYPER_IRQS(typer); if (gic_irqs > 1020) gic_irqs = 1020; diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index cbb872c1b607..396cd99af02f 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -576,8 +576,8 @@ struct rdists { phys_addr_t phys_base; } __percpu *rdist; struct page *prop_page; - int id_bits; u64 flags; + u32 gicd_typer; bool has_vlpis; bool has_direct_lpi; }; -- cgit v1.2.3 From 12b2905af183c931bedcab4292c81d3a415e080f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 31 May 2018 09:01:59 +0100 Subject: irqchip/gic-v3-its: Honor hypervisor enforced LPI range A recent extension to the GIC architecture allows a hypervisor to arbitrarily reduce the number of LPIs available to a guest, no matter what the GIC says about the valid range of IntIDs. Let's factor in this information when computing the number of available LPIs Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 9 +++++++++ include/linux/irqchip/arm-gic-v3.h | 1 + 2 files changed, 10 insertions(+) (limited to 'drivers/irqchip') diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index efe6d1a6c32e..f56c84977241 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1541,8 +1541,17 @@ out: static int __init its_lpi_init(u32 id_bits) { u32 lpis = (1UL << id_bits) - 8192; + u32 numlpis; int err; + numlpis = 1UL << GICD_TYPER_NUM_LPIS(gic_rdists->gicd_typer); + + if (numlpis > 2 && !WARN_ON(numlpis > lpis)) { + lpis = numlpis; + pr_info("ITS: Using hypervisor restricted LPI range [%u]\n", + lpis); + } + /* * Initializing the allocator is just the same as freeing the * full range of LPIs. diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 396cd99af02f..9d2ea3e907d0 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -73,6 +73,7 @@ #define GICD_TYPER_MBIS (1U << 16) #define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) +#define GICD_TYPER_NUM_LPIS(typer) ((((typer) >> 11) & 0x1f) + 1) #define GICD_TYPER_IRQS(typer) ((((typer) & 0x1f) + 1) * 32) #define GICD_IROUTER_SPI_MODE_ONE (0U << 31) -- cgit v1.2.3 From 30800b3a1fb14c6f143db5616a4aaea9fef94af3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 31 May 2018 16:21:43 +0100 Subject: irqchip/gic-v3-its: Reduce minimum LPI allocation to 1 for PCI devices Allocating a minimum of 32 LPIs per PCI device, let's reduce it to be just 1, as most devices do not need that many interrupts. We still have to special-case DevID 0, as there is plenty of broken HW around where the PCI RID is not presented as a DevID to the ITS, and all the devices are presented as DevID 0. In this case, we keep the 32 minimal allocation. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its-pci-msi.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/irqchip') diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c index 75c3cafabc6a..8d6d009d1d58 100644 --- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c +++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c @@ -66,7 +66,7 @@ static int its_pci_msi_prepare(struct irq_domain *domain, struct device *dev, { struct pci_dev *pdev, *alias_dev; struct msi_domain_info *msi_info; - int alias_count = 0; + int alias_count = 0, minnvec = 1; if (!dev_is_pci(dev)) return -EINVAL; @@ -86,9 +86,17 @@ static int its_pci_msi_prepare(struct irq_domain *domain, struct device *dev, /* ITS specific DeviceID, as the core ITS ignores dev. */ info->scratchpad[0].ul = pci_msi_domain_get_msi_rid(domain, pdev); - /* Allocate at least 32 MSIs, and always as a power of 2 */ + /* + * Always allocate a power of 2, and special case device 0 for + * broken systems where the DevID is not wired (and all devices + * appear as DevID 0). For that reason, we generously allocate a + * minimum of 32 MSIs for DevID 0. If you want more because all + * your devices are aliasing to DevID 0, consider fixing your HW. + */ nvec = max(nvec, alias_count); - nvec = max_t(int, 32, roundup_pow_of_two(nvec)); + if (!info->scratchpad[0].ul) + minnvec = 32; + nvec = max_t(int, minnvec, roundup_pow_of_two(nvec)); return msi_info->ops->msi_prepare(domain->parent, dev, nvec, info); } -- cgit v1.2.3 From a8db74564b0c634667e1722264bde303d296f566 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 18 Jul 2018 17:42:04 +0200 Subject: irqchip/gic-v3-its: Make its_lock a raw_spin_lock_t The its_lock lock is held while a new device is added to the list and during setup while the CPU is booted. Even on -RT the CPU-bootup is performed with disabled interrupts. Make its_lock a raw_spin_lock_t. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/irqchip') diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index f56c84977241..316a57530f6d 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -162,7 +162,7 @@ static struct { } vpe_proxy; static LIST_HEAD(its_nodes); -static DEFINE_SPINLOCK(its_lock); +static DEFINE_RAW_SPINLOCK(its_lock); static struct rdists *gic_rdists; static struct irq_domain *its_parent; @@ -2063,12 +2063,12 @@ static void its_cpu_init_collections(void) { struct its_node *its; - spin_lock(&its_lock); + raw_spin_lock(&its_lock); list_for_each_entry(its, &its_nodes, entry) its_cpu_init_collection(its); - spin_unlock(&its_lock); + raw_spin_unlock(&its_lock); } static struct its_device *its_find_device(struct its_node *its, u32 dev_id) @@ -3139,7 +3139,7 @@ static int its_save_disable(void) struct its_node *its; int err = 0; - spin_lock(&its_lock); + raw_spin_lock(&its_lock); list_for_each_entry(its, &its_nodes, entry) { void __iomem *base; @@ -3171,7 +3171,7 @@ err: writel_relaxed(its->ctlr_save, base + GITS_CTLR); } } - spin_unlock(&its_lock); + raw_spin_unlock(&its_lock); return err; } @@ -3181,7 +3181,7 @@ static void its_restore_enable(void) struct its_node *its; int ret; - spin_lock(&its_lock); + raw_spin_lock(&its_lock); list_for_each_entry(its, &its_nodes, entry) { void __iomem *base; int i; @@ -3233,7 +3233,7 @@ static void its_restore_enable(void) GITS_TYPER_HCC(gic_read_typer(base + GITS_TYPER))) its_cpu_init_collection(its); } - spin_unlock(&its_lock); + raw_spin_unlock(&its_lock); } static struct syscore_ops its_syscore_ops = { @@ -3467,9 +3467,9 @@ static int __init its_probe_one(struct resource *res, if (err) goto out_free_tables; - spin_lock(&its_lock); + raw_spin_lock(&its_lock); list_add(&its->entry, &its_nodes); - spin_unlock(&its_lock); + raw_spin_unlock(&its_lock); return 0; -- cgit v1.2.3