From 347253c42d7c673aa2a659d756bc7ff893459247 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 25 Jan 2019 17:53:43 +0800 Subject: genirq/affinity: Move allocation of 'node_to_cpumask' to irq_build_affinity_masks() 'node_to_cpumask' is just one temparay variable for irq_build_affinity_masks(), so move it into irq_build_affinity_masks(). No functioanl change. Signed-off-by: Ming Lei Signed-off-by: Thomas Gleixner Reviewed-by: Bjorn Helgaas Cc: Christoph Hellwig Cc: Jens Axboe Cc: linux-block@vger.kernel.org Cc: Sagi Grimberg Cc: linux-nvme@lists.infradead.org Cc: linux-pci@vger.kernel.org Link: https://lkml.kernel.org/r/20190125095347.17950-2-ming.lei@redhat.com --- kernel/irq/affinity.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'kernel/irq/affinity.c') diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 45b68b4ea48b..118b66d64a53 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -175,18 +175,22 @@ out: */ static int irq_build_affinity_masks(const struct irq_affinity *affd, int startvec, int numvecs, int firstvec, - cpumask_var_t *node_to_cpumask, struct irq_affinity_desc *masks) { int curvec = startvec, nr_present, nr_others; int ret = -ENOMEM; cpumask_var_t nmsk, npresmsk; + cpumask_var_t *node_to_cpumask; if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) return ret; if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL)) - goto fail; + goto fail_nmsk; + + node_to_cpumask = alloc_node_to_cpumask(); + if (!node_to_cpumask) + goto fail_npresmsk; ret = 0; /* Stabilize the cpumasks */ @@ -217,9 +221,12 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd, if (nr_present < numvecs) WARN_ON(nr_present + nr_others < numvecs); + free_node_to_cpumask(node_to_cpumask); + + fail_npresmsk: free_cpumask_var(npresmsk); - fail: + fail_nmsk: free_cpumask_var(nmsk); return ret; } @@ -236,7 +243,6 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) { int affvecs = nvecs - affd->pre_vectors - affd->post_vectors; int curvec, usedvecs; - cpumask_var_t *node_to_cpumask; struct irq_affinity_desc *masks = NULL; int i, nr_sets; @@ -247,13 +253,9 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) if (nvecs == affd->pre_vectors + affd->post_vectors) return NULL; - node_to_cpumask = alloc_node_to_cpumask(); - if (!node_to_cpumask) - return NULL; - masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); if (!masks) - goto outnodemsk; + return NULL; /* Fill out vectors at the beginning that don't need affinity */ for (curvec = 0; curvec < affd->pre_vectors; curvec++) @@ -271,11 +273,10 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) int ret; ret = irq_build_affinity_masks(affd, curvec, this_vecs, - curvec, node_to_cpumask, masks); + curvec, masks); if (ret) { kfree(masks); - masks = NULL; - goto outnodemsk; + return NULL; } curvec += this_vecs; usedvecs += this_vecs; @@ -293,8 +294,6 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++) masks[i].is_managed = 1; -outnodemsk: - free_node_to_cpumask(node_to_cpumask); return masks; } -- cgit v1.2.3 From 0145c30e896d26e638d27c957d9eed72893c1c92 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Feb 2019 18:13:07 +0100 Subject: genirq/affinity: Code consolidation All information and calculations in the interrupt affinity spreading code is strictly unsigned int. Though the code uses int all over the place. Convert it over to unsigned int. Signed-off-by: Thomas Gleixner Reviewed-by: Ming Lei Acked-by: Marc Zyngier Cc: Christoph Hellwig Cc: Bjorn Helgaas Cc: Jens Axboe Cc: linux-block@vger.kernel.org Cc: Sagi Grimberg Cc: linux-nvme@lists.infradead.org Cc: linux-pci@vger.kernel.org Cc: Keith Busch Cc: Sumit Saxena Cc: Kashyap Desai Cc: Shivasharan Srikanteshwara Link: https://lkml.kernel.org/r/20190216172228.336424556@linutronix.de --- include/linux/interrupt.h | 20 +++++++++-------- kernel/irq/affinity.c | 56 +++++++++++++++++++++++------------------------ 2 files changed, 38 insertions(+), 38 deletions(-) (limited to 'kernel/irq/affinity.c') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 4a728dba02e2..35e7389c2011 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -251,10 +251,10 @@ struct irq_affinity_notify { * @sets: Number of affinitized sets */ struct irq_affinity { - int pre_vectors; - int post_vectors; - int nr_sets; - int *sets; + unsigned int pre_vectors; + unsigned int post_vectors; + unsigned int nr_sets; + unsigned int *sets; }; /** @@ -314,9 +314,10 @@ extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); struct irq_affinity_desc * -irq_create_affinity_masks(int nvec, const struct irq_affinity *affd); +irq_create_affinity_masks(unsigned int nvec, const struct irq_affinity *affd); -int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd); +unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, + const struct irq_affinity *affd); #else /* CONFIG_SMP */ @@ -350,13 +351,14 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) } static inline struct irq_affinity_desc * -irq_create_affinity_masks(int nvec, const struct irq_affinity *affd) +irq_create_affinity_masks(unsigned int nvec, const struct irq_affinity *affd) { return NULL; } -static inline int -irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd) +static inline unsigned int +irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, + const struct irq_affinity *affd) { return maxvec; } diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 118b66d64a53..82e8799374e9 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -9,7 +9,7 @@ #include static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, - int cpus_per_vec) + unsigned int cpus_per_vec) { const struct cpumask *siblmsk; int cpu, sibl; @@ -95,15 +95,17 @@ static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask, } static int __irq_build_affinity_masks(const struct irq_affinity *affd, - int startvec, int numvecs, int firstvec, + unsigned int startvec, + unsigned int numvecs, + unsigned int firstvec, cpumask_var_t *node_to_cpumask, const struct cpumask *cpu_mask, struct cpumask *nmsk, struct irq_affinity_desc *masks) { - int n, nodes, cpus_per_vec, extra_vecs, done = 0; - int last_affv = firstvec + numvecs; - int curvec = startvec; + unsigned int n, nodes, cpus_per_vec, extra_vecs, done = 0; + unsigned int last_affv = firstvec + numvecs; + unsigned int curvec = startvec; nodemask_t nodemsk = NODE_MASK_NONE; if (!cpumask_weight(cpu_mask)) @@ -117,18 +119,16 @@ static int __irq_build_affinity_masks(const struct irq_affinity *affd, */ if (numvecs <= nodes) { for_each_node_mask(n, nodemsk) { - cpumask_or(&masks[curvec].mask, - &masks[curvec].mask, - node_to_cpumask[n]); + cpumask_or(&masks[curvec].mask, &masks[curvec].mask, + node_to_cpumask[n]); if (++curvec == last_affv) curvec = firstvec; } - done = numvecs; - goto out; + return numvecs; } for_each_node_mask(n, nodemsk) { - int ncpus, v, vecs_to_assign, vecs_per_node; + unsigned int ncpus, v, vecs_to_assign, vecs_per_node; /* Spread the vectors per node */ vecs_per_node = (numvecs - (curvec - firstvec)) / nodes; @@ -163,8 +163,6 @@ static int __irq_build_affinity_masks(const struct irq_affinity *affd, curvec = firstvec; --nodes; } - -out: return done; } @@ -174,13 +172,14 @@ out: * 2) spread other possible CPUs on these vectors */ static int irq_build_affinity_masks(const struct irq_affinity *affd, - int startvec, int numvecs, int firstvec, + unsigned int startvec, unsigned int numvecs, + unsigned int firstvec, struct irq_affinity_desc *masks) { - int curvec = startvec, nr_present, nr_others; - int ret = -ENOMEM; - cpumask_var_t nmsk, npresmsk; + unsigned int curvec = startvec, nr_present, nr_others; cpumask_var_t *node_to_cpumask; + cpumask_var_t nmsk, npresmsk; + int ret = -ENOMEM; if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) return ret; @@ -239,12 +238,10 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd, * Returns the irq_affinity_desc pointer or NULL if allocation failed. */ struct irq_affinity_desc * -irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) +irq_create_affinity_masks(unsigned int nvecs, const struct irq_affinity *affd) { - int affvecs = nvecs - affd->pre_vectors - affd->post_vectors; - int curvec, usedvecs; + unsigned int affvecs, curvec, usedvecs, nr_sets, i; struct irq_affinity_desc *masks = NULL; - int i, nr_sets; /* * If there aren't any vectors left after applying the pre/post @@ -264,16 +261,17 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) * Spread on present CPUs starting from affd->pre_vectors. If we * have multiple sets, build each sets affinity mask separately. */ + affvecs = nvecs - affd->pre_vectors - affd->post_vectors; nr_sets = affd->nr_sets; if (!nr_sets) nr_sets = 1; for (i = 0, usedvecs = 0; i < nr_sets; i++) { - int this_vecs = affd->sets ? affd->sets[i] : affvecs; + unsigned int this_vecs = affd->sets ? affd->sets[i] : affvecs; int ret; ret = irq_build_affinity_masks(affd, curvec, this_vecs, - curvec, masks); + curvec, masks); if (ret) { kfree(masks); return NULL; @@ -303,17 +301,17 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) * @maxvec: The maximum number of vectors available * @affd: Description of the affinity requirements */ -int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd) +unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, + const struct irq_affinity *affd) { - int resv = affd->pre_vectors + affd->post_vectors; - int vecs = maxvec - resv; - int set_vecs; + unsigned int resv = affd->pre_vectors + affd->post_vectors; + unsigned int set_vecs; if (resv > minvec) return 0; if (affd->nr_sets) { - int i; + unsigned int i; for (i = 0, set_vecs = 0; i < affd->nr_sets; i++) set_vecs += affd->sets[i]; @@ -323,5 +321,5 @@ int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity put_online_cpus(); } - return resv + min(set_vecs, vecs); + return resv + min(set_vecs, maxvec - resv); } -- cgit v1.2.3 From 9cfef55bb57e7620c63087be18a76351628f8d0f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 16 Feb 2019 18:13:08 +0100 Subject: genirq/affinity: Store interrupt sets size in struct irq_affinity The interrupt affinity spreading mechanism supports to spread out affinities for one or more interrupt sets. A interrupt set contains one or more interrupts. Each set is mapped to a specific functionality of a device, e.g. general I/O queues and read I/O queus of multiqueue block devices. The number of interrupts per set is defined by the driver. It depends on the total number of available interrupts for the device, which is determined by the PCI capabilites and the availability of underlying CPU resources, and the number of queues which the device provides and the driver wants to instantiate. The driver passes initial configuration for the interrupt allocation via a pointer to struct irq_affinity. Right now the allocation mechanism is complex as it requires to have a loop in the driver to determine the maximum number of interrupts which are provided by the PCI capabilities and the underlying CPU resources. This loop would have to be replicated in every driver which wants to utilize this mechanism. That's unwanted code duplication and error prone. In order to move this into generic facilities it is required to have a mechanism, which allows the recalculation of the interrupt sets and their size, in the core code. As the core code does not have any knowledge about the underlying device, a driver specific callback will be added to struct affinity_desc, which will be invoked by the core code. The callback will get the number of available interupts as an argument, so the driver can calculate the corresponding number and size of interrupt sets. To support this, two modifications for the handling of struct irq_affinity are required: 1) The (optional) interrupt sets size information is contained in a separate array of integers and struct irq_affinity contains a pointer to it. This is cumbersome and as the maximum number of interrupt sets is small, there is no reason to have separate storage. Moving the size array into struct affinity_desc avoids indirections and makes the code simpler. 2) At the moment the struct irq_affinity pointer which is handed in from the driver and passed through to several core functions is marked 'const'. With the upcoming callback to recalculate the number and size of interrupt sets, it's necessary to remove the 'const' qualifier. Otherwise the callback would not be able to update the data. Implement #1 and store the interrupt sets size in 'struct irq_affinity'. No functional change. [ tglx: Fixed the memcpy() size so it won't copy beyond the size of the source. Fixed the kernel doc comments for struct irq_affinity and de-'This patch'-ed the changelog ] Signed-off-by: Ming Lei Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: Christoph Hellwig Cc: Bjorn Helgaas Cc: Jens Axboe Cc: linux-block@vger.kernel.org Cc: Sagi Grimberg Cc: linux-nvme@lists.infradead.org Cc: linux-pci@vger.kernel.org Cc: Keith Busch Cc: Sumit Saxena Cc: Kashyap Desai Cc: Shivasharan Srikanteshwara Link: https://lkml.kernel.org/r/20190216172228.423723127@linutronix.de --- drivers/nvme/host/pci.c | 7 +++---- include/linux/interrupt.h | 9 ++++++--- kernel/irq/affinity.c | 16 ++++++++++++---- 3 files changed, 21 insertions(+), 11 deletions(-) (limited to 'kernel/irq/affinity.c') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9bc585415d9b..21ffd671b6ed 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2081,12 +2081,11 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues) static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) { struct pci_dev *pdev = to_pci_dev(dev->dev); - int irq_sets[2]; struct irq_affinity affd = { - .pre_vectors = 1, - .nr_sets = ARRAY_SIZE(irq_sets), - .sets = irq_sets, + .pre_vectors = 1, + .nr_sets = 2, }; + unsigned int *irq_sets = affd.set_size; int result = 0; unsigned int irq_queues, this_p_queues; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 35e7389c2011..5afdfd5dc39b 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -241,20 +241,23 @@ struct irq_affinity_notify { void (*release)(struct kref *ref); }; +#define IRQ_AFFINITY_MAX_SETS 4 + /** * struct irq_affinity - Description for automatic irq affinity assignements * @pre_vectors: Don't apply affinity to @pre_vectors at beginning of * the MSI(-X) vector space * @post_vectors: Don't apply affinity to @post_vectors at end of * the MSI(-X) vector space - * @nr_sets: Length of passed in *sets array - * @sets: Number of affinitized sets + * @nr_sets: The number of interrupt sets for which affinity + * spreading is required + * @set_size: Array holding the size of each interrupt set */ struct irq_affinity { unsigned int pre_vectors; unsigned int post_vectors; unsigned int nr_sets; - unsigned int *sets; + unsigned int set_size[IRQ_AFFINITY_MAX_SETS]; }; /** diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 82e8799374e9..278289c091bb 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -238,9 +238,10 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd, * Returns the irq_affinity_desc pointer or NULL if allocation failed. */ struct irq_affinity_desc * -irq_create_affinity_masks(unsigned int nvecs, const struct irq_affinity *affd) +irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) { unsigned int affvecs, curvec, usedvecs, nr_sets, i; + unsigned int set_size[IRQ_AFFINITY_MAX_SETS]; struct irq_affinity_desc *masks = NULL; /* @@ -250,6 +251,9 @@ irq_create_affinity_masks(unsigned int nvecs, const struct irq_affinity *affd) if (nvecs == affd->pre_vectors + affd->post_vectors) return NULL; + if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS)) + return NULL; + masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); if (!masks) return NULL; @@ -263,11 +267,15 @@ irq_create_affinity_masks(unsigned int nvecs, const struct irq_affinity *affd) */ affvecs = nvecs - affd->pre_vectors - affd->post_vectors; nr_sets = affd->nr_sets; - if (!nr_sets) + if (!nr_sets) { nr_sets = 1; + set_size[0] = affvecs; + } else { + memcpy(set_size, affd->set_size, nr_sets * sizeof(unsigned int)); + } for (i = 0, usedvecs = 0; i < nr_sets; i++) { - unsigned int this_vecs = affd->sets ? affd->sets[i] : affvecs; + unsigned int this_vecs = set_size[i]; int ret; ret = irq_build_affinity_masks(affd, curvec, this_vecs, @@ -314,7 +322,7 @@ unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, unsigned int i; for (i = 0, set_vecs = 0; i < affd->nr_sets; i++) - set_vecs += affd->sets[i]; + set_vecs += affd->set_size[i]; } else { get_online_cpus(); set_vecs = cpumask_weight(cpu_possible_mask); -- cgit v1.2.3 From c66d4bd110a1f8a68c1a88bfbf866eb50c6464b7 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 16 Feb 2019 18:13:09 +0100 Subject: genirq/affinity: Add new callback for (re)calculating interrupt sets The interrupt affinity spreading mechanism supports to spread out affinities for one or more interrupt sets. A interrupt set contains one or more interrupts. Each set is mapped to a specific functionality of a device, e.g. general I/O queues and read I/O queus of multiqueue block devices. The number of interrupts per set is defined by the driver. It depends on the total number of available interrupts for the device, which is determined by the PCI capabilites and the availability of underlying CPU resources, and the number of queues which the device provides and the driver wants to instantiate. The driver passes initial configuration for the interrupt allocation via a pointer to struct irq_affinity. Right now the allocation mechanism is complex as it requires to have a loop in the driver to determine the maximum number of interrupts which are provided by the PCI capabilities and the underlying CPU resources. This loop would have to be replicated in every driver which wants to utilize this mechanism. That's unwanted code duplication and error prone. In order to move this into generic facilities it is required to have a mechanism, which allows the recalculation of the interrupt sets and their size, in the core code. As the core code does not have any knowledge about the underlying device, a driver specific callback is required in struct irq_affinity, which can be invoked by the core code. The callback gets the number of available interupts as an argument, so the driver can calculate the corresponding number and size of interrupt sets. At the moment the struct irq_affinity pointer which is handed in from the driver and passed through to several core functions is marked 'const', but for the callback to be able to modify the data in the struct it's required to remove the 'const' qualifier. Add the optional callback to struct irq_affinity, which allows drivers to recalculate the number and size of interrupt sets and remove the 'const' qualifier. For simple invocations, which do not supply a callback, a default callback is installed, which just sets nr_sets to 1 and transfers the number of spreadable vectors to the set_size array at index 0. This is for now guarded by a check for nr_sets != 0 to keep the NVME driver working until it is converted to the callback mechanism. To make sure that the driver configuration is correct under all circumstances the callback is invoked even when there are no interrupts for queues left, i.e. the pre/post requirements already exhaust the numner of available interrupts. At the PCI layer irq_create_affinity_masks() has to be invoked even for the case where the legacy interrupt is used. That ensures that the callback is invoked and the device driver can adjust to that situation. [ tglx: Fixed the simple case (no sets required). Moved the sanity check for nr_sets after the invocation of the callback so it catches broken drivers. Fixed the kernel doc comments for struct irq_affinity and de-'This patch'-ed the changelog ] Signed-off-by: Ming Lei Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: Christoph Hellwig Cc: Bjorn Helgaas Cc: Jens Axboe Cc: linux-block@vger.kernel.org Cc: Sagi Grimberg Cc: linux-nvme@lists.infradead.org Cc: linux-pci@vger.kernel.org Cc: Keith Busch Cc: Sumit Saxena Cc: Kashyap Desai Cc: Shivasharan Srikanteshwara Link: https://lkml.kernel.org/r/20190216172228.512444498@linutronix.de --- drivers/pci/msi.c | 25 +++++++++++------ drivers/scsi/be2iscsi/be_main.c | 2 +- include/linux/interrupt.h | 10 +++++-- include/linux/pci.h | 4 +-- kernel/irq/affinity.c | 62 +++++++++++++++++++++++++++++------------ 5 files changed, 71 insertions(+), 32 deletions(-) (limited to 'kernel/irq/affinity.c') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 4c0b47867258..7149d6315726 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -532,7 +532,7 @@ error_attrs: } static struct msi_desc * -msi_setup_entry(struct pci_dev *dev, int nvec, const struct irq_affinity *affd) +msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd) { struct irq_affinity_desc *masks = NULL; struct msi_desc *entry; @@ -597,7 +597,7 @@ static int msi_verify_entries(struct pci_dev *dev) * which could have been allocated. */ static int msi_capability_init(struct pci_dev *dev, int nvec, - const struct irq_affinity *affd) + struct irq_affinity *affd) { struct msi_desc *entry; int ret; @@ -669,7 +669,7 @@ static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries) static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, struct msix_entry *entries, int nvec, - const struct irq_affinity *affd) + struct irq_affinity *affd) { struct irq_affinity_desc *curmsk, *masks = NULL; struct msi_desc *entry; @@ -736,7 +736,7 @@ static void msix_program_entries(struct pci_dev *dev, * requested MSI-X entries with allocated irqs or non-zero for otherwise. **/ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, - int nvec, const struct irq_affinity *affd) + int nvec, struct irq_affinity *affd) { int ret; u16 control; @@ -932,7 +932,7 @@ int pci_msix_vec_count(struct pci_dev *dev) EXPORT_SYMBOL(pci_msix_vec_count); static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, - int nvec, const struct irq_affinity *affd) + int nvec, struct irq_affinity *affd) { int nr_entries; int i, j; @@ -1018,7 +1018,7 @@ int pci_msi_enabled(void) EXPORT_SYMBOL(pci_msi_enabled); static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, - const struct irq_affinity *affd) + struct irq_affinity *affd) { int nvec; int rc; @@ -1086,7 +1086,7 @@ EXPORT_SYMBOL(pci_enable_msi); static int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, - int maxvec, const struct irq_affinity *affd) + int maxvec, struct irq_affinity *affd) { int rc, nvec = maxvec; @@ -1165,9 +1165,9 @@ EXPORT_SYMBOL(pci_enable_msix_range); */ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, - const struct irq_affinity *affd) + struct irq_affinity *affd) { - static const struct irq_affinity msi_default_affd; + struct irq_affinity msi_default_affd = {0}; int msix_vecs = -ENOSPC; int msi_vecs = -ENOSPC; @@ -1196,6 +1196,13 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, /* use legacy irq if allowed */ if (flags & PCI_IRQ_LEGACY) { if (min_vecs == 1 && dev->irq) { + /* + * Invoke the affinity spreading logic to ensure that + * the device driver can adjust queue configuration + * for the single interrupt case. + */ + if (affd) + irq_create_affinity_masks(1, affd); pci_intx(dev, 1); return 1; } diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 74e260027c7d..76e49d902609 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -3566,7 +3566,7 @@ static void be2iscsi_enable_msix(struct beiscsi_hba *phba) /* if eqid_count == 1 fall back to INTX */ if (enable_msix && nvec > 1) { - const struct irq_affinity desc = { .post_vectors = 1 }; + struct irq_affinity desc = { .post_vectors = 1 }; if (pci_alloc_irq_vectors_affinity(phba->pcidev, 2, nvec, PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, &desc) < 0) { diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5afdfd5dc39b..dcdddf4fa76b 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -252,12 +252,18 @@ struct irq_affinity_notify { * @nr_sets: The number of interrupt sets for which affinity * spreading is required * @set_size: Array holding the size of each interrupt set + * @calc_sets: Callback for calculating the number and size + * of interrupt sets + * @priv: Private data for usage by @calc_sets, usually a + * pointer to driver/device specific data. */ struct irq_affinity { unsigned int pre_vectors; unsigned int post_vectors; unsigned int nr_sets; unsigned int set_size[IRQ_AFFINITY_MAX_SETS]; + void (*calc_sets)(struct irq_affinity *, unsigned int nvecs); + void *priv; }; /** @@ -317,7 +323,7 @@ extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); struct irq_affinity_desc * -irq_create_affinity_masks(unsigned int nvec, const struct irq_affinity *affd); +irq_create_affinity_masks(unsigned int nvec, struct irq_affinity *affd); unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, const struct irq_affinity *affd); @@ -354,7 +360,7 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) } static inline struct irq_affinity_desc * -irq_create_affinity_masks(unsigned int nvec, const struct irq_affinity *affd) +irq_create_affinity_masks(unsigned int nvec, struct irq_affinity *affd) { return NULL; } diff --git a/include/linux/pci.h b/include/linux/pci.h index 65f1d8c2f082..e7c51b00cdfe 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1393,7 +1393,7 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev, } int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, - const struct irq_affinity *affd); + struct irq_affinity *affd); void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); @@ -1419,7 +1419,7 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev, static inline int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, - const struct irq_affinity *aff_desc) + struct irq_affinity *aff_desc) { if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1 && dev->irq) return 1; diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 278289c091bb..d737dc60ab52 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -230,6 +230,12 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd, return ret; } +static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs) +{ + affd->nr_sets = 1; + affd->set_size[0] = affvecs; +} + /** * irq_create_affinity_masks - Create affinity masks for multiqueue spreading * @nvecs: The total number of vectors @@ -240,20 +246,46 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd, struct irq_affinity_desc * irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) { - unsigned int affvecs, curvec, usedvecs, nr_sets, i; - unsigned int set_size[IRQ_AFFINITY_MAX_SETS]; + unsigned int affvecs, curvec, usedvecs, i; struct irq_affinity_desc *masks = NULL; /* - * If there aren't any vectors left after applying the pre/post - * vectors don't bother with assigning affinity. + * Determine the number of vectors which need interrupt affinities + * assigned. If the pre/post request exhausts the available vectors + * then nothing to do here except for invoking the calc_sets() + * callback so the device driver can adjust to the situation. If there + * is only a single vector, then managing the queue is pointless as + * well. */ - if (nvecs == affd->pre_vectors + affd->post_vectors) - return NULL; + if (nvecs > 1 && nvecs > affd->pre_vectors + affd->post_vectors) + affvecs = nvecs - affd->pre_vectors - affd->post_vectors; + else + affvecs = 0; + + /* + * Simple invocations do not provide a calc_sets() callback. Install + * the generic one. The check for affd->nr_sets is a temporary + * workaround and will be removed after the NVME driver is converted + * over. + */ + if (!affd->nr_sets && !affd->calc_sets) + affd->calc_sets = default_calc_sets; + + /* + * If the device driver provided a calc_sets() callback let it + * recalculate the number of sets and their size. The check will go + * away once the NVME driver is converted over. + */ + if (affd->calc_sets) + affd->calc_sets(affd, affvecs); if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS)) return NULL; + /* Nothing to assign? */ + if (!affvecs) + return NULL; + masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); if (!masks) return NULL; @@ -261,21 +293,13 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) /* Fill out vectors at the beginning that don't need affinity */ for (curvec = 0; curvec < affd->pre_vectors; curvec++) cpumask_copy(&masks[curvec].mask, irq_default_affinity); + /* * Spread on present CPUs starting from affd->pre_vectors. If we * have multiple sets, build each sets affinity mask separately. */ - affvecs = nvecs - affd->pre_vectors - affd->post_vectors; - nr_sets = affd->nr_sets; - if (!nr_sets) { - nr_sets = 1; - set_size[0] = affvecs; - } else { - memcpy(set_size, affd->set_size, nr_sets * sizeof(unsigned int)); - } - - for (i = 0, usedvecs = 0; i < nr_sets; i++) { - unsigned int this_vecs = set_size[i]; + for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) { + unsigned int this_vecs = affd->set_size[i]; int ret; ret = irq_build_affinity_masks(affd, curvec, this_vecs, @@ -318,7 +342,9 @@ unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, if (resv > minvec) return 0; - if (affd->nr_sets) { + if (affd->calc_sets) { + set_vecs = maxvec - resv; + } else if (affd->nr_sets) { unsigned int i; for (i = 0, set_vecs = 0; i < affd->nr_sets; i++) -- cgit v1.2.3 From a6a309edba13866b31dc4d8aebad3864a6d56ade Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Feb 2019 18:13:11 +0100 Subject: genirq/affinity: Remove the leftovers of the original set support Now that the NVME driver is converted over to the calc_set() callback, the workarounds of the original set support can be removed. Signed-off-by: Thomas Gleixner Reviewed-by: Ming Lei Acked-by: Marc Zyngier Cc: Christoph Hellwig Cc: Bjorn Helgaas Cc: Jens Axboe Cc: linux-block@vger.kernel.org Cc: Sagi Grimberg Cc: linux-nvme@lists.infradead.org Cc: linux-pci@vger.kernel.org Cc: Keith Busch Cc: Sumit Saxena Cc: Kashyap Desai Cc: Shivasharan Srikanteshwara Link: https://lkml.kernel.org/r/20190216172228.689834224@linutronix.de --- kernel/irq/affinity.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) (limited to 'kernel/irq/affinity.c') diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index d737dc60ab52..f18cd5aa33e8 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -264,20 +264,13 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) /* * Simple invocations do not provide a calc_sets() callback. Install - * the generic one. The check for affd->nr_sets is a temporary - * workaround and will be removed after the NVME driver is converted - * over. + * the generic one. */ - if (!affd->nr_sets && !affd->calc_sets) + if (!affd->calc_sets) affd->calc_sets = default_calc_sets; - /* - * If the device driver provided a calc_sets() callback let it - * recalculate the number of sets and their size. The check will go - * away once the NVME driver is converted over. - */ - if (affd->calc_sets) - affd->calc_sets(affd, affvecs); + /* Recalculate the sets */ + affd->calc_sets(affd, affvecs); if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS)) return NULL; @@ -344,11 +337,6 @@ unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, if (affd->calc_sets) { set_vecs = maxvec - resv; - } else if (affd->nr_sets) { - unsigned int i; - - for (i = 0, set_vecs = 0; i < affd->nr_sets; i++) - set_vecs += affd->set_size[i]; } else { get_online_cpus(); set_vecs = cpumask_weight(cpu_possible_mask); -- cgit v1.2.3