From f1ec7187167ce225d2744b20a90afef5f10fd6cd Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 29 Apr 2015 16:02:30 -0500 Subject: libfdt: add fdt type definitions In preparation for libfdt/dtc update, add the new fdt specific types. Signed-off-by: Rob Herring Cc: Russell King Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org --- arch/powerpc/boot/libfdt_env.h | 4 ++++ arch/powerpc/boot/of.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h index 8dcd744e5728..7e3789ea396b 100644 --- a/arch/powerpc/boot/libfdt_env.h +++ b/arch/powerpc/boot/libfdt_env.h @@ -10,6 +10,10 @@ typedef u32 uint32_t; typedef u64 uint64_t; typedef unsigned long uintptr_t; +typedef __be16 fdt16_t; +typedef __be32 fdt32_t; +typedef __be64 fdt64_t; + #define fdt16_to_cpu(x) be16_to_cpu(x) #define cpu_to_fdt16(x) cpu_to_be16(x) #define fdt32_to_cpu(x) be32_to_cpu(x) diff --git a/arch/powerpc/boot/of.h b/arch/powerpc/boot/of.h index 5603320dce07..53f8f27f94e4 100644 --- a/arch/powerpc/boot/of.h +++ b/arch/powerpc/boot/of.h @@ -21,7 +21,9 @@ int of_setprop(const void *phandle, const char *name, const void *buf, /* Console functions */ void of_console_init(void); +typedef u16 __be16; typedef u32 __be32; +typedef u64 __be64; #ifdef __LITTLE_ENDIAN__ #define cpu_to_be16(x) swab16(x) -- cgit v1.2.3 From 84be456f883c4685680fba8e5154b5f72e92957e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 1 May 2015 12:46:15 +0200 Subject: remove We don't have any arch specific scatterlist now that parisc switched over to the generic one. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- arch/alpha/include/asm/pci.h | 2 +- arch/arm/include/asm/dma.h | 2 +- arch/arm/mach-footbridge/dma.c | 2 +- arch/blackfin/include/asm/pci.h | 2 +- arch/cris/include/asm/dma-mapping.h | 2 +- arch/cris/include/asm/pci.h | 2 +- arch/frv/include/asm/dma-mapping.h | 2 +- arch/frv/include/asm/pci.h | 2 +- arch/ia64/include/asm/pci.h | 2 +- arch/microblaze/include/asm/pci.h | 2 +- arch/mips/include/asm/dma-mapping.h | 2 +- arch/mips/include/asm/pci.h | 2 +- arch/mn10300/include/asm/pci.h | 2 +- arch/parisc/include/asm/dma-mapping.h | 2 +- arch/parisc/include/asm/pci.h | 2 +- arch/powerpc/include/asm/pci.h | 2 +- arch/powerpc/include/asm/vio.h | 2 +- arch/sparc/kernel/iommu_common.h | 2 +- arch/x86/include/asm/pci.h | 2 +- arch/xtensa/include/asm/pci.h | 2 +- drivers/mmc/host/android-goldfish.c | 2 +- include/asm-generic/scatterlist.h | 34 ------------------------------ include/linux/blkdev.h | 3 +-- include/linux/dmapool.h | 2 +- include/linux/scatterlist.h | 39 ++++++++++++++++++++++++++++------- lib/swiotlb.c | 2 +- 26 files changed, 56 insertions(+), 66 deletions(-) delete mode 100644 include/asm-generic/scatterlist.h (limited to 'arch/powerpc') diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h index f7f680f7457d..bf7d97dce9e8 100644 --- a/arch/alpha/include/asm/pci.h +++ b/arch/alpha/include/asm/pci.h @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h index 99084431d6ae..bb4fa67da541 100644 --- a/arch/arm/include/asm/dma.h +++ b/arch/arm/include/asm/dma.h @@ -19,7 +19,7 @@ * It should not be re-used except for that purpose. */ #include -#include +#include #include diff --git a/arch/arm/mach-footbridge/dma.c b/arch/arm/mach-footbridge/dma.c index e2e0df8bcee2..22536b85a81d 100644 --- a/arch/arm/mach-footbridge/dma.c +++ b/arch/arm/mach-footbridge/dma.c @@ -13,9 +13,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/arch/blackfin/include/asm/pci.h b/arch/blackfin/include/asm/pci.h index c737909fba47..14efc0db1ade 100644 --- a/arch/blackfin/include/asm/pci.h +++ b/arch/blackfin/include/asm/pci.h @@ -3,7 +3,7 @@ #ifndef _ASM_BFIN_PCI_H #define _ASM_BFIN_PCI_H -#include +#include #include #include diff --git a/arch/cris/include/asm/dma-mapping.h b/arch/cris/include/asm/dma-mapping.h index 2f0f654f1b44..57f794ee6039 100644 --- a/arch/cris/include/asm/dma-mapping.h +++ b/arch/cris/include/asm/dma-mapping.h @@ -5,10 +5,10 @@ #include #include +#include #include #include -#include #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) diff --git a/arch/cris/include/asm/pci.h b/arch/cris/include/asm/pci.h index cc2399c175e9..c15b4b4baafa 100644 --- a/arch/cris/include/asm/pci.h +++ b/arch/cris/include/asm/pci.h @@ -29,7 +29,7 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); #include #include -#include +#include #include #include diff --git a/arch/frv/include/asm/dma-mapping.h b/arch/frv/include/asm/dma-mapping.h index 1746a2b8e6e7..2840adcd6d92 100644 --- a/arch/frv/include/asm/dma-mapping.h +++ b/arch/frv/include/asm/dma-mapping.h @@ -2,9 +2,9 @@ #define _ASM_DMA_MAPPING_H #include +#include #include #include -#include #include /* diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h index 2035a4d3f9b9..43d224685144 100644 --- a/arch/frv/include/asm/pci.h +++ b/arch/frv/include/asm/pci.h @@ -14,7 +14,7 @@ #define _ASM_FRV_PCI_H #include -#include +#include #include #include diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h index 52af5ed9f60b..0c0e25d6427a 100644 --- a/arch/ia64/include/asm/pci.h +++ b/arch/ia64/include/asm/pci.h @@ -6,9 +6,9 @@ #include #include #include +#include #include -#include #include struct pci_vector_struct { diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h index 468aca8cec0d..81f27aef979c 100644 --- a/arch/microblaze/include/asm/pci.h +++ b/arch/microblaze/include/asm/pci.h @@ -16,8 +16,8 @@ #include #include #include +#include -#include #include #include #include diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index fd1b4a150759..360b3387182a 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h @@ -1,7 +1,7 @@ #ifndef _ASM_DMA_MAPPING_H #define _ASM_DMA_MAPPING_H -#include +#include #include #include #include diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index d9692993fc83..3413b10d833b 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -99,7 +99,7 @@ static inline void pci_resource_to_user(const struct pci_dev *dev, int bar, #include #include -#include +#include #include #include #include diff --git a/arch/mn10300/include/asm/pci.h b/arch/mn10300/include/asm/pci.h index 5f70af25c7d0..393b51d38f36 100644 --- a/arch/mn10300/include/asm/pci.h +++ b/arch/mn10300/include/asm/pci.h @@ -55,7 +55,7 @@ void pcibios_set_master(struct pci_dev *dev); #include #include -#include +#include #include #include diff --git a/arch/parisc/include/asm/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h index d0eae5f2bd87..d8d60a57183f 100644 --- a/arch/parisc/include/asm/dma-mapping.h +++ b/arch/parisc/include/asm/dma-mapping.h @@ -2,8 +2,8 @@ #define _PARISC_DMA_MAPPING_H #include +#include #include -#include /* See Documentation/DMA-API-HOWTO.txt */ struct hppa_dma_ops { diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h index 20df2b04fc09..794d8820db5d 100644 --- a/arch/parisc/include/asm/pci.h +++ b/arch/parisc/include/asm/pci.h @@ -1,7 +1,7 @@ #ifndef __ASM_PARISC_PCI_H #define __ASM_PARISC_PCI_H -#include +#include diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 4aef8d660999..3be43ab63181 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -13,9 +13,9 @@ #include #include #include +#include #include -#include #include #include #include diff --git a/arch/powerpc/include/asm/vio.h b/arch/powerpc/include/asm/vio.h index 4f9b7ca0710f..84286ec77b12 100644 --- a/arch/powerpc/include/asm/vio.h +++ b/arch/powerpc/include/asm/vio.h @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include /* * Architecture-specific constants for drivers to diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h index f4be0d724fc6..b40cec252905 100644 --- a/arch/sparc/kernel/iommu_common.h +++ b/arch/sparc/kernel/iommu_common.h @@ -13,9 +13,9 @@ #include #include #include +#include #include -#include /* * These give mapping size of each iommu pte/tlb. diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 4e370a5d8117..81da003df21b 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/xtensa/include/asm/pci.h b/arch/xtensa/include/asm/pci.h index 5d52dc43dfe7..e438a00fbd63 100644 --- a/arch/xtensa/include/asm/pci.h +++ b/arch/xtensa/include/asm/pci.h @@ -33,7 +33,7 @@ extern struct pci_controller* pcibios_alloc_controller(void); #include #include -#include +#include #include #include diff --git a/drivers/mmc/host/android-goldfish.c b/drivers/mmc/host/android-goldfish.c index 8b4e20a3f16c..b1eac719a4cc 100644 --- a/drivers/mmc/host/android-goldfish.c +++ b/drivers/mmc/host/android-goldfish.c @@ -42,10 +42,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/include/asm-generic/scatterlist.h b/include/asm-generic/scatterlist.h deleted file mode 100644 index 5de07355fad4..000000000000 --- a/include/asm-generic/scatterlist.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef __ASM_GENERIC_SCATTERLIST_H -#define __ASM_GENERIC_SCATTERLIST_H - -#include - -struct scatterlist { -#ifdef CONFIG_DEBUG_SG - unsigned long sg_magic; -#endif - unsigned long page_link; - unsigned int offset; - unsigned int length; - dma_addr_t dma_address; -#ifdef CONFIG_NEED_SG_DMA_LENGTH - unsigned int dma_length; -#endif -}; - -/* - * These macros should be used after a dma_map_sg call has been done - * to get bus addresses of each of the SG entries and their lengths. - * You should only work with the number of sg entries pci_map_sg - * returns, or alternatively stop on the first sg_dma_len(sg) which - * is 0. - */ -#define sg_dma_address(sg) ((sg)->dma_address) - -#ifdef CONFIG_NEED_SG_DMA_LENGTH -#define sg_dma_len(sg) ((sg)->dma_length) -#else -#define sg_dma_len(sg) ((sg)->length) -#endif - -#endif /* __ASM_GENERIC_SCATTERLIST_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7f9a516f24de..504af1e65ce1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -22,8 +22,7 @@ #include #include #include - -#include +#include struct module; struct scsi_ioctl_command; diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h index 52456aa566a0..e1043f79122f 100644 --- a/include/linux/dmapool.h +++ b/include/linux/dmapool.h @@ -11,8 +11,8 @@ #ifndef LINUX_DMAPOOL_H #define LINUX_DMAPOOL_H +#include #include -#include struct device; diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index ed8f9e70df9b..eca1ec93775c 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -2,13 +2,39 @@ #define _LINUX_SCATTERLIST_H #include +#include #include #include - -#include -#include #include +struct scatterlist { +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; + unsigned int offset; + unsigned int length; + dma_addr_t dma_address; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + unsigned int dma_length; +#endif +}; + +/* + * These macros should be used after a dma_map_sg call has been done + * to get bus addresses of each of the SG entries and their lengths. + * You should only work with the number of sg entries dma_map_sg + * returns, or alternatively stop on the first sg_dma_len(sg) which + * is 0. + */ +#define sg_dma_address(sg) ((sg)->dma_address) + +#ifdef CONFIG_NEED_SG_DMA_LENGTH +#define sg_dma_len(sg) ((sg)->dma_length) +#else +#define sg_dma_len(sg) ((sg)->length) +#endif + struct sg_table { struct scatterlist *sgl; /* the list */ unsigned int nents; /* number of mapped entries */ @@ -18,10 +44,9 @@ struct sg_table { /* * Notes on SG table design. * - * Architectures must provide an unsigned long page_link field in the - * scatterlist struct. We use that to place the page pointer AND encode - * information about the sg table as well. The two lower bits are reserved - * for this information. + * We use the unsigned long page_link field in the scatterlist struct to place + * the page pointer AND encode information about the sg table as well. The two + * lower bits are reserved for this information. * * If bit 0 is set, then the page_link contains a pointer to the next sg * table list. Otherwise the next entry is at sg + 1. diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 4abda074ea45..341268841b31 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -29,10 +29,10 @@ #include #include #include +#include #include #include -#include #include #include -- cgit v1.2.3 From ccf73aaf5adfa37f45be12459c17f534e8f2c2c5 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 30 Apr 2015 13:43:31 +0200 Subject: KVM: arm/mips/x86/power use __kvm_guest_{enter|exit} Use __kvm_guest_{enter|exit} instead of kvm_guest_{enter|exit} where interrupts are disabled. Signed-off-by: Christian Borntraeger Signed-off-by: Paolo Bonzini --- arch/arm/kvm/arm.c | 4 ++-- arch/mips/kvm/mips.c | 4 ++-- arch/powerpc/kvm/powerpc.c | 2 +- arch/x86/kvm/x86.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index d9631ecddd56..e41cb11f71b2 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -553,13 +553,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * Enter the guest */ trace_kvm_entry(*vcpu_pc(vcpu)); - kvm_guest_enter(); + __kvm_guest_enter(); vcpu->mode = IN_GUEST_MODE; ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; - kvm_guest_exit(); + __kvm_guest_exit(); trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); /* * We may have taken a host interrupt in HYP mode (ie diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index bb68e8d520e8..71f345b499c8 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -393,7 +393,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_mips_deliver_interrupts(vcpu, kvm_read_c0_guest_cause(vcpu->arch.cop0)); - kvm_guest_enter(); + __kvm_guest_enter(); /* Disable hardware page table walking while in guest */ htw_stop(); @@ -403,7 +403,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) /* Re-enable HTW before enabling interrupts */ htw_start(); - kvm_guest_exit(); + __kvm_guest_exit(); local_irq_enable(); if (vcpu->sigset_active) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ac3ddf115f3d..8cd1f80fdc70 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -115,7 +115,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) continue; } - kvm_guest_enter(); + __kvm_guest_enter(); return 1; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c73efcd03e29..7a959be0aebc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6347,7 +6347,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (req_immediate_exit) smp_send_reschedule(vcpu->cpu); - kvm_guest_enter(); + __kvm_guest_enter(); if (unlikely(vcpu->arch.switch_db_regs)) { set_debugreg(0, 7); -- cgit v1.2.3 From e233d54d4d975760073700b15de975ef702e8194 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 30 Apr 2015 14:39:40 +0200 Subject: KVM: booke: use __kvm_guest_exit Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/booke.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 6c1316a15a27..3872ab31c80a 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1004,10 +1004,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; } - local_irq_enable(); - trace_kvm_exit(exit_nr, vcpu); - kvm_guest_exit(); + __kvm_guest_exit(); + + local_irq_enable(); run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; -- cgit v1.2.3 From 202648a6070b69d60c6d0926ff06c8863e231468 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 27 Apr 2015 21:48:47 +0900 Subject: powerpc: Constify irq_domain_ops The irq_domain_ops are not modified by the driver and the irqdomain core code accepts pointer to a const data. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/i8259.c | 2 +- arch/powerpc/sysdev/ipic.c | 2 +- arch/powerpc/sysdev/mpc8xx_pic.c | 2 +- arch/powerpc/sysdev/mpic.c | 2 +- arch/powerpc/sysdev/mv64x60_pic.c | 2 +- arch/powerpc/sysdev/qe_lib/qe_ic.c | 2 +- arch/powerpc/sysdev/tsi108_pci.c | 2 +- arch/powerpc/sysdev/uic.c | 2 +- arch/powerpc/sysdev/xics/xics-common.c | 2 +- arch/powerpc/sysdev/xilinx_intc.c | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index 45598da0b321..31c33475c7b7 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -204,7 +204,7 @@ static int i8259_host_xlate(struct irq_domain *h, struct device_node *ct, return 0; } -static struct irq_domain_ops i8259_host_ops = { +static const struct irq_domain_ops i8259_host_ops = { .match = i8259_host_match, .map = i8259_host_map, .xlate = i8259_host_xlate, diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index b28733727ed3..d78f1364b639 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -691,7 +691,7 @@ static int ipic_host_map(struct irq_domain *h, unsigned int virq, return 0; } -static struct irq_domain_ops ipic_host_ops = { +static const struct irq_domain_ops ipic_host_ops = { .match = ipic_host_match, .map = ipic_host_map, .xlate = irq_domain_xlate_onetwocell, diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c index c4828c0be5bd..d93a78be4346 100644 --- a/arch/powerpc/sysdev/mpc8xx_pic.c +++ b/arch/powerpc/sysdev/mpc8xx_pic.c @@ -120,7 +120,7 @@ static int mpc8xx_pic_host_xlate(struct irq_domain *h, struct device_node *ct, } -static struct irq_domain_ops mpc8xx_pic_host_ops = { +static const struct irq_domain_ops mpc8xx_pic_host_ops = { .map = mpc8xx_pic_host_map, .xlate = mpc8xx_pic_host_xlate, }; diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index b2b8447a227a..c8e73332eaad 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1195,7 +1195,7 @@ static void mpic_cascade(unsigned int irq, struct irq_desc *desc) chip->irq_eoi(&desc->irq_data); } -static struct irq_domain_ops mpic_host_ops = { +static const struct irq_domain_ops mpic_host_ops = { .match = mpic_host_match, .map = mpic_host_map, .xlate = mpic_host_xlate, diff --git a/arch/powerpc/sysdev/mv64x60_pic.c b/arch/powerpc/sysdev/mv64x60_pic.c index 8848e99a83f2..0f842dd16bcd 100644 --- a/arch/powerpc/sysdev/mv64x60_pic.c +++ b/arch/powerpc/sysdev/mv64x60_pic.c @@ -223,7 +223,7 @@ static int mv64x60_host_map(struct irq_domain *h, unsigned int virq, return 0; } -static struct irq_domain_ops mv64x60_host_ops = { +static const struct irq_domain_ops mv64x60_host_ops = { .map = mv64x60_host_map, }; diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c index 543765e1ef14..6512cd8caa51 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_ic.c +++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c @@ -271,7 +271,7 @@ static int qe_ic_host_map(struct irq_domain *h, unsigned int virq, return 0; } -static struct irq_domain_ops qe_ic_host_ops = { +static const struct irq_domain_ops qe_ic_host_ops = { .match = qe_ic_host_match, .map = qe_ic_host_map, .xlate = irq_domain_xlate_onetwocell, diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c index 188012c58f7f..57b54476e747 100644 --- a/arch/powerpc/sysdev/tsi108_pci.c +++ b/arch/powerpc/sysdev/tsi108_pci.c @@ -397,7 +397,7 @@ static int pci_irq_host_map(struct irq_domain *h, unsigned int virq, return 0; } -static struct irq_domain_ops pci_irq_domain_ops = { +static const struct irq_domain_ops pci_irq_domain_ops = { .map = pci_irq_host_map, .xlate = pci_irq_host_xlate, }; diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index 7c37157d4c24..1cd057f11725 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -189,7 +189,7 @@ static int uic_host_map(struct irq_domain *h, unsigned int virq, return 0; } -static struct irq_domain_ops uic_host_ops = { +static const struct irq_domain_ops uic_host_ops = { .map = uic_host_map, .xlate = irq_domain_xlate_twocell, }; diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 878a54036a25..5bc5889d4acc 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -360,7 +360,7 @@ static int xics_host_xlate(struct irq_domain *h, struct device_node *ct, return 0; } -static struct irq_domain_ops xics_host_ops = { +static const struct irq_domain_ops xics_host_ops = { .match = xics_host_match, .map = xics_host_map, .xlate = xics_host_xlate, diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c index 56f0524e47a6..43b8b275bc5c 100644 --- a/arch/powerpc/sysdev/xilinx_intc.c +++ b/arch/powerpc/sysdev/xilinx_intc.c @@ -179,7 +179,7 @@ static int xilinx_intc_map(struct irq_domain *h, unsigned int virq, return 0; } -static struct irq_domain_ops xilinx_intc_ops = { +static const struct irq_domain_ops xilinx_intc_ops = { .map = xilinx_intc_map, .xlate = xilinx_intc_xlate, }; -- cgit v1.2.3 From 2222ce0fbbcc4ebfa9995c8d23d72c8239ad712c Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Wed, 29 Apr 2015 20:44:58 -0500 Subject: powerpc/pseries: Fix possible leaked device node reference Failure return from dlpar_configure_connector when dlpar adding cpus results in leaking references to the cpus parent device node. Move the call to of_node_put() prior to checking the result of dlpar_configure_connector. Fixes: 8d5ff320766f ("powerpc/pseries: Make dlpar_configure_connector parent node aware") Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/dlpar.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 019d34aaf054..47d9cebe7159 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -421,11 +421,10 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count) return -ENODEV; dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent); + of_node_put(parent); if (!dn) return -EINVAL; - of_node_put(parent); - rc = dlpar_attach_node(dn); if (rc) { dlpar_release_drc(drc_index); -- cgit v1.2.3 From b130e7c04f1130f241eddf72098a6e08d41d08fe Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 7 May 2015 13:49:12 -0400 Subject: powerpc: export of_get_ibm_chip_id function Export the of_get_ibm_chip_id() function. This will be used by the PowerNV NX-842 driver. Signed-off-by: Dan Streetman Signed-off-by: Herbert Xu --- arch/powerpc/kernel/prom.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 308c5e15676b..ea2cea7eaef1 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -800,6 +800,7 @@ int of_get_ibm_chip_id(struct device_node *np) } return -1; } +EXPORT_SYMBOL(of_get_ibm_chip_id); /** * cpu_to_chip_id - Return the cpus chip-id -- cgit v1.2.3 From edc424f8cd84bbae530d8a9f86caf1923606fb24 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 7 May 2015 13:49:13 -0400 Subject: powerpc: Add ICSWX instruction Add the asm ICSWX and ICSWEPX opcodes. Add definitions for the Coprocessor Request structures needed to use the icswx calls to coprocessors. Add icswx() function to perform the ICSWX asm using the provided Coprocessor Command Word value and Coprocessor Request Block structure. This is required for communication with the NX-842 coprocessor on a PowerNV system. Signed-off-by: Dan Streetman Signed-off-by: Herbert Xu --- arch/powerpc/include/asm/icswx.h | 184 ++++++++++++++++++++++++++++++++++ arch/powerpc/include/asm/ppc-opcode.h | 13 +++ 2 files changed, 197 insertions(+) create mode 100644 arch/powerpc/include/asm/icswx.h (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h new file mode 100644 index 000000000000..9f8402b35115 --- /dev/null +++ b/arch/powerpc/include/asm/icswx.h @@ -0,0 +1,184 @@ +/* + * ICSWX api + * + * Copyright (C) 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This provides the Initiate Coprocessor Store Word Indexed (ICSWX) + * instruction. This instruction is used to communicate with PowerPC + * coprocessors. This also provides definitions of the structures used + * to communicate with the coprocessor. + * + * The RFC02130: Coprocessor Architecture document is the reference for + * everything in this file unless otherwise noted. + */ +#ifndef _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_ +#define _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_ + +#include /* for PPC_ICSWX */ + +/* Chapter 6.5.8 Coprocessor-Completion Block (CCB) */ + +#define CCB_VALUE (0x3fffffffffffffff) +#define CCB_ADDRESS (0xfffffffffffffff8) +#define CCB_CM (0x0000000000000007) +#define CCB_CM0 (0x0000000000000004) +#define CCB_CM12 (0x0000000000000003) + +#define CCB_CM0_ALL_COMPLETIONS (0x0) +#define CCB_CM0_LAST_IN_CHAIN (0x4) +#define CCB_CM12_STORE (0x0) +#define CCB_CM12_INTERRUPT (0x1) + +#define CCB_SIZE (0x10) +#define CCB_ALIGN CCB_SIZE + +struct coprocessor_completion_block { + __be64 value; + __be64 address; +} __packed __aligned(CCB_ALIGN); + + +/* Chapter 6.5.7 Coprocessor-Status Block (CSB) */ + +#define CSB_V (0x80) +#define CSB_F (0x04) +#define CSB_CH (0x03) +#define CSB_CE_INCOMPLETE (0x80) +#define CSB_CE_TERMINATION (0x40) +#define CSB_CE_TPBC (0x20) + +#define CSB_CC_SUCCESS (0) +#define CSB_CC_INVALID_ALIGN (1) +#define CSB_CC_OPERAND_OVERLAP (2) +#define CSB_CC_DATA_LENGTH (3) +#define CSB_CC_TRANSLATION (5) +#define CSB_CC_PROTECTION (6) +#define CSB_CC_RD_EXTERNAL (7) +#define CSB_CC_INVALID_OPERAND (8) +#define CSB_CC_PRIVILEGE (9) +#define CSB_CC_INTERNAL (10) +#define CSB_CC_WR_EXTERNAL (12) +#define CSB_CC_NOSPC (13) +#define CSB_CC_EXCESSIVE_DDE (14) +#define CSB_CC_WR_TRANSLATION (15) +#define CSB_CC_WR_PROTECTION (16) +#define CSB_CC_UNKNOWN_CODE (17) +#define CSB_CC_ABORT (18) +#define CSB_CC_TRANSPORT (20) +#define CSB_CC_SEGMENTED_DDL (31) +#define CSB_CC_PROGRESS_POINT (32) +#define CSB_CC_DDE_OVERFLOW (33) +#define CSB_CC_SESSION (34) +#define CSB_CC_PROVISION (36) +#define CSB_CC_CHAIN (37) +#define CSB_CC_SEQUENCE (38) +#define CSB_CC_HW (39) + +#define CSB_SIZE (0x10) +#define CSB_ALIGN CSB_SIZE + +struct coprocessor_status_block { + u8 flags; + u8 cs; + u8 cc; + u8 ce; + __be32 count; + __be64 address; +} __packed __aligned(CSB_ALIGN); + + +/* Chapter 6.5.10 Data-Descriptor List (DDL) + * each list contains one or more Data-Descriptor Entries (DDE) + */ + +#define DDE_P (0x8000) + +#define DDE_SIZE (0x10) +#define DDE_ALIGN DDE_SIZE + +struct data_descriptor_entry { + __be16 flags; + u8 count; + u8 index; + __be32 length; + __be64 address; +} __packed __aligned(DDE_ALIGN); + + +/* Chapter 6.5.2 Coprocessor-Request Block (CRB) */ + +#define CRB_SIZE (0x80) +#define CRB_ALIGN (0x100) /* Errata: requires 256 alignment */ + +/* Coprocessor Status Block field + * ADDRESS address of CSB + * C CCB is valid + * AT 0 = addrs are virtual, 1 = addrs are phys + * M enable perf monitor + */ +#define CRB_CSB_ADDRESS (0xfffffffffffffff0) +#define CRB_CSB_C (0x0000000000000008) +#define CRB_CSB_AT (0x0000000000000002) +#define CRB_CSB_M (0x0000000000000001) + +struct coprocessor_request_block { + __be32 ccw; + __be32 flags; + __be64 csb_addr; + + struct data_descriptor_entry source; + struct data_descriptor_entry target; + + struct coprocessor_completion_block ccb; + + u8 reserved[48]; + + struct coprocessor_status_block csb; +} __packed __aligned(CRB_ALIGN); + + +/* RFC02167 Initiate Coprocessor Instructions document + * Chapter 8.2.1.1.1 RS + * Chapter 8.2.3 Coprocessor Directive + * Chapter 8.2.4 Execution + * + * The CCW must be converted to BE before passing to icswx() + */ + +#define CCW_PS (0xff000000) +#define CCW_CT (0x00ff0000) +#define CCW_CD (0x0000ffff) +#define CCW_CL (0x0000c000) + + +/* RFC02167 Initiate Coprocessor Instructions document + * Chapter 8.2.1 Initiate Coprocessor Store Word Indexed (ICSWX) + * Chapter 8.2.4.1 Condition Register 0 + */ + +#define ICSWX_INITIATED (0x8) +#define ICSWX_BUSY (0x4) +#define ICSWX_REJECTED (0x2) + +static inline int icswx(__be32 ccw, struct coprocessor_request_block *crb) +{ + __be64 ccw_reg = ccw; + u32 cr; + + __asm__ __volatile__( + PPC_ICSWX(%1,0,%2) "\n" + "mfcr %0\n" + : "=r" (cr) + : "r" (ccw_reg), "r" (crb) + : "cr0", "memory"); + + return (int)((cr >> 28) & 0xf); +} + + +#endif /* _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_ */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 5c93f691b495..8452335661a5 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -136,6 +136,8 @@ #define PPC_INST_DCBAL 0x7c2005ec #define PPC_INST_DCBZL 0x7c2007ec #define PPC_INST_ICBT 0x7c00002c +#define PPC_INST_ICSWX 0x7c00032d +#define PPC_INST_ICSWEPX 0x7c00076d #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e #define PPC_INST_LDARX 0x7c0000a8 @@ -403,4 +405,15 @@ #define MFTMR(tmr, r) stringify_in_c(.long PPC_INST_MFTMR | \ TMRN(tmr) | ___PPC_RT(r)) +/* Coprocessor instructions */ +#define PPC_ICSWX(s, a, b) stringify_in_c(.long PPC_INST_ICSWX | \ + ___PPC_RS(s) | \ + ___PPC_RA(a) | \ + ___PPC_RB(b)) +#define PPC_ICSWEPX(s, a, b) stringify_in_c(.long PPC_INST_ICSWEPX | \ + ___PPC_RS(s) | \ + ___PPC_RA(a) | \ + ___PPC_RB(b)) + + #endif /* _ASM_POWERPC_PPC_OPCODE_H */ -- cgit v1.2.3 From f1e7c202a98cb87cc650d99d014f87e6248ae530 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Mar 2015 20:11:56 +1100 Subject: powerpc: Make STRICT_MM_TYPECHECKS a config option The STRICT_MM_TYPECHECKS code has bit-rotted over the years. To make it possible to easily build test it, make it a CONFIG option. Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig.debug | 8 ++++++++ arch/powerpc/include/asm/page.h | 4 +--- arch/powerpc/include/asm/pgtable-ppc64.h | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 0efa8f90a8f1..3a510f4a6b68 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -19,6 +19,14 @@ config PPC_WERROR depends on !PPC_DISABLE_WERROR default y +config STRICT_MM_TYPECHECKS + bool "Do extra type checking on mm types" + default n + help + This option turns on extra type checking for some mm related types. + + If you don't know what this means, say N. + config PRINT_STACK_DEPTH int "Stack depth to print" if DEBUG_KERNEL default 64 diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 69c059887a2c..71294a6e976e 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -278,9 +278,7 @@ extern long long virt_phys_offset; #ifndef __ASSEMBLY__ -#undef STRICT_MM_TYPECHECKS - -#ifdef STRICT_MM_TYPECHECKS +#ifdef CONFIG_STRICT_MM_TYPECHECKS /* These are used to make use of C type-checking. */ /* PTE level */ diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 43e6ad424c7f..f951d9cf358a 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -118,7 +118,7 @@ */ #ifndef __real_pte -#ifdef STRICT_MM_TYPECHECKS +#ifdef CONFIG_STRICT_MM_TYPECHECKS #define __real_pte(e,p) ((real_pte_t){(e)}) #define __rpte_to_pte(r) ((r).pte) #else -- cgit v1.2.3 From 5af7a6f3e2d015dcaaeffa48c6d47238415cbe66 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 10 Apr 2015 11:52:06 +1000 Subject: powerpc/pasemi: Only the build the pasemi MSI code for PASEMI=y The pasemi MSI code is currently always built when MPIC=y && PCI_MSI=y. It should not have any effect on other platforms, because it immediately checks the MPIC's compatible property for "pasemi,pwrficient-openpic". However it's odd that it's still built even when PASEMI=n. It also needn't be in sysdev, as it's only used by pasemi. So move it into platforms/pasemi, whereby it will only be built for PASEMI=y. Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pasemi/Makefile | 1 + arch/powerpc/platforms/pasemi/msi.c | 165 ++++++++++++++++++++++++++++++++ arch/powerpc/sysdev/Makefile | 2 +- arch/powerpc/sysdev/mpic.h | 10 +- arch/powerpc/sysdev/mpic_pasemi_msi.c | 167 --------------------------------- 5 files changed, 172 insertions(+), 173 deletions(-) create mode 100644 arch/powerpc/platforms/pasemi/msi.c delete mode 100644 arch/powerpc/sysdev/mpic_pasemi_msi.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/pasemi/Makefile b/arch/powerpc/platforms/pasemi/Makefile index 8e8d4cae5ebe..60b4e0fd9808 100644 --- a/arch/powerpc/platforms/pasemi/Makefile +++ b/arch/powerpc/platforms/pasemi/Makefile @@ -1,2 +1,3 @@ obj-y += setup.o pci.o time.o idle.o powersave.o iommu.o dma_lib.o misc.o obj-$(CONFIG_PPC_PASEMI_MDIO) += gpio_mdio.o +obj-$(CONFIG_PCI_MSI) += msi.o diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c new file mode 100644 index 000000000000..0b3706604543 --- /dev/null +++ b/arch/powerpc/platforms/pasemi/msi.c @@ -0,0 +1,165 @@ +/* + * Copyright 2007, Olof Johansson, PA Semi + * + * Based on arch/powerpc/sysdev/mpic_u3msi.c: + * + * Copyright 2006, Segher Boessenkool, IBM Corporation. + * Copyright 2006-2007, Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 of the + * License. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +/* Allocate 16 interrupts per device, to give an alignment of 16, + * since that's the size of the grouping w.r.t. affinity. If someone + * needs more than 32 MSI's down the road we'll have to rethink this, + * but it should be OK for now. + */ +#define ALLOC_CHUNK 16 + +#define PASEMI_MSI_ADDR 0xfc080000 + +/* A bit ugly, can we get this from the pci_dev somehow? */ +static struct mpic *msi_mpic; + + +static void mpic_pasemi_msi_mask_irq(struct irq_data *data) +{ + pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq); + pci_msi_mask_irq(data); + mpic_mask_irq(data); +} + +static void mpic_pasemi_msi_unmask_irq(struct irq_data *data) +{ + pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq); + mpic_unmask_irq(data); + pci_msi_unmask_irq(data); +} + +static struct irq_chip mpic_pasemi_msi_chip = { + .irq_shutdown = mpic_pasemi_msi_mask_irq, + .irq_mask = mpic_pasemi_msi_mask_irq, + .irq_unmask = mpic_pasemi_msi_unmask_irq, + .irq_eoi = mpic_end_irq, + .irq_set_type = mpic_set_irq_type, + .irq_set_affinity = mpic_set_affinity, + .name = "PASEMI-MSI", +}; + +static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) +{ + struct msi_desc *entry; + + pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev); + + list_for_each_entry(entry, &pdev->msi_list, list) { + if (entry->irq == NO_IRQ) + continue; + + irq_set_msi_desc(entry->irq, NULL); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, + virq_to_hw(entry->irq), ALLOC_CHUNK); + irq_dispose_mapping(entry->irq); + } + + return; +} + +static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + unsigned int virq; + struct msi_desc *entry; + struct msi_msg msg; + int hwirq; + + if (type == PCI_CAP_ID_MSIX) + pr_debug("pasemi_msi: MSI-X untested, trying anyway\n"); + pr_debug("pasemi_msi_setup_msi_irqs, pdev %p nvec %d type %d\n", + pdev, nvec, type); + + msg.address_hi = 0; + msg.address_lo = PASEMI_MSI_ADDR; + + list_for_each_entry(entry, &pdev->msi_list, list) { + /* Allocate 16 interrupts for now, since that's the grouping for + * affinity. This can be changed later if it turns out 32 is too + * few MSIs for someone, but restrictions will apply to how the + * sources can be changed independently. + */ + hwirq = msi_bitmap_alloc_hwirqs(&msi_mpic->msi_bitmap, + ALLOC_CHUNK); + if (hwirq < 0) { + pr_debug("pasemi_msi: failed allocating hwirq\n"); + return hwirq; + } + + virq = irq_create_mapping(msi_mpic->irqhost, hwirq); + if (virq == NO_IRQ) { + pr_debug("pasemi_msi: failed mapping hwirq 0x%x\n", + hwirq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, + ALLOC_CHUNK); + return -ENOSPC; + } + + /* Vector on MSI is really an offset, the hardware adds + * it to the value written at the magic address. So set + * it to 0 to remain sane. + */ + mpic_set_vector(virq, 0); + + irq_set_msi_desc(virq, entry); + irq_set_chip(virq, &mpic_pasemi_msi_chip); + irq_set_irq_type(virq, IRQ_TYPE_EDGE_RISING); + + pr_debug("pasemi_msi: allocated virq 0x%x (hw 0x%x) " \ + "addr 0x%x\n", virq, hwirq, msg.address_lo); + + /* Likewise, the device writes [0...511] into the target + * register to generate MSI [512...1023] + */ + msg.data = hwirq-0x200; + pci_write_msi_msg(virq, &msg); + } + + return 0; +} + +int mpic_pasemi_msi_init(struct mpic *mpic) +{ + int rc; + + if (!mpic->irqhost->of_node || + !of_device_is_compatible(mpic->irqhost->of_node, + "pasemi,pwrficient-openpic")) + return -ENODEV; + + rc = mpic_msi_init_allocator(mpic); + if (rc) { + pr_debug("pasemi_msi: Error allocating bitmap!\n"); + return rc; + } + + pr_debug("pasemi_msi: Registering PA Semi MPIC MSI callbacks\n"); + + msi_mpic = mpic; + WARN_ON(ppc_md.setup_msi_irqs); + ppc_md.setup_msi_irqs = pasemi_msi_setup_msi_irqs; + ppc_md.teardown_msi_irqs = pasemi_msi_teardown_msi_irqs; + + return 0; +} diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index f7cb2a1b01fa..5b492a6438ff 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -2,7 +2,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -mpic-msi-obj-$(CONFIG_PCI_MSI) += mpic_msi.o mpic_u3msi.o mpic_pasemi_msi.o +mpic-msi-obj-$(CONFIG_PCI_MSI) += mpic_msi.o mpic_u3msi.o obj-$(CONFIG_MPIC) += mpic.o $(mpic-msi-obj-y) obj-$(CONFIG_MPIC_TIMER) += mpic_timer.o obj-$(CONFIG_FSL_MPIC_TIMER_WAKEUP) += fsl_mpic_timer_wakeup.o diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h index 24bf07a63924..32971a41853b 100644 --- a/arch/powerpc/sysdev/mpic.h +++ b/arch/powerpc/sysdev/mpic.h @@ -15,7 +15,6 @@ extern void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq); extern int mpic_msi_init_allocator(struct mpic *mpic); extern int mpic_u3msi_init(struct mpic *mpic); -extern int mpic_pasemi_msi_init(struct mpic *mpic); #else static inline void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq) @@ -27,11 +26,12 @@ static inline int mpic_u3msi_init(struct mpic *mpic) { return -1; } +#endif -static inline int mpic_pasemi_msi_init(struct mpic *mpic) -{ - return -1; -} +#if defined(CONFIG_PCI_MSI) && defined(CONFIG_PPC_PASEMI) +int mpic_pasemi_msi_init(struct mpic *mpic); +#else +static inline int mpic_pasemi_msi_init(struct mpic *mpic) { return -1; } #endif extern int mpic_set_irq_type(struct irq_data *d, unsigned int flow_type); diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c deleted file mode 100644 index a3f660eed6de..000000000000 --- a/arch/powerpc/sysdev/mpic_pasemi_msi.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright 2007, Olof Johansson, PA Semi - * - * Based on arch/powerpc/sysdev/mpic_u3msi.c: - * - * Copyright 2006, Segher Boessenkool, IBM Corporation. - * Copyright 2006-2007, Michael Ellerman, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 of the - * License. - * - */ - -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include - -#include "mpic.h" - -/* Allocate 16 interrupts per device, to give an alignment of 16, - * since that's the size of the grouping w.r.t. affinity. If someone - * needs more than 32 MSI's down the road we'll have to rethink this, - * but it should be OK for now. - */ -#define ALLOC_CHUNK 16 - -#define PASEMI_MSI_ADDR 0xfc080000 - -/* A bit ugly, can we get this from the pci_dev somehow? */ -static struct mpic *msi_mpic; - - -static void mpic_pasemi_msi_mask_irq(struct irq_data *data) -{ - pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq); - pci_msi_mask_irq(data); - mpic_mask_irq(data); -} - -static void mpic_pasemi_msi_unmask_irq(struct irq_data *data) -{ - pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq); - mpic_unmask_irq(data); - pci_msi_unmask_irq(data); -} - -static struct irq_chip mpic_pasemi_msi_chip = { - .irq_shutdown = mpic_pasemi_msi_mask_irq, - .irq_mask = mpic_pasemi_msi_mask_irq, - .irq_unmask = mpic_pasemi_msi_unmask_irq, - .irq_eoi = mpic_end_irq, - .irq_set_type = mpic_set_irq_type, - .irq_set_affinity = mpic_set_affinity, - .name = "PASEMI-MSI", -}; - -static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) -{ - struct msi_desc *entry; - - pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev); - - list_for_each_entry(entry, &pdev->msi_list, list) { - if (entry->irq == NO_IRQ) - continue; - - irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), ALLOC_CHUNK); - irq_dispose_mapping(entry->irq); - } - - return; -} - -static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) -{ - unsigned int virq; - struct msi_desc *entry; - struct msi_msg msg; - int hwirq; - - if (type == PCI_CAP_ID_MSIX) - pr_debug("pasemi_msi: MSI-X untested, trying anyway\n"); - pr_debug("pasemi_msi_setup_msi_irqs, pdev %p nvec %d type %d\n", - pdev, nvec, type); - - msg.address_hi = 0; - msg.address_lo = PASEMI_MSI_ADDR; - - list_for_each_entry(entry, &pdev->msi_list, list) { - /* Allocate 16 interrupts for now, since that's the grouping for - * affinity. This can be changed later if it turns out 32 is too - * few MSIs for someone, but restrictions will apply to how the - * sources can be changed independently. - */ - hwirq = msi_bitmap_alloc_hwirqs(&msi_mpic->msi_bitmap, - ALLOC_CHUNK); - if (hwirq < 0) { - pr_debug("pasemi_msi: failed allocating hwirq\n"); - return hwirq; - } - - virq = irq_create_mapping(msi_mpic->irqhost, hwirq); - if (virq == NO_IRQ) { - pr_debug("pasemi_msi: failed mapping hwirq 0x%x\n", - hwirq); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, - ALLOC_CHUNK); - return -ENOSPC; - } - - /* Vector on MSI is really an offset, the hardware adds - * it to the value written at the magic address. So set - * it to 0 to remain sane. - */ - mpic_set_vector(virq, 0); - - irq_set_msi_desc(virq, entry); - irq_set_chip(virq, &mpic_pasemi_msi_chip); - irq_set_irq_type(virq, IRQ_TYPE_EDGE_RISING); - - pr_debug("pasemi_msi: allocated virq 0x%x (hw 0x%x) " \ - "addr 0x%x\n", virq, hwirq, msg.address_lo); - - /* Likewise, the device writes [0...511] into the target - * register to generate MSI [512...1023] - */ - msg.data = hwirq-0x200; - pci_write_msi_msg(virq, &msg); - } - - return 0; -} - -int mpic_pasemi_msi_init(struct mpic *mpic) -{ - int rc; - - if (!mpic->irqhost->of_node || - !of_device_is_compatible(mpic->irqhost->of_node, - "pasemi,pwrficient-openpic")) - return -ENODEV; - - rc = mpic_msi_init_allocator(mpic); - if (rc) { - pr_debug("pasemi_msi: Error allocating bitmap!\n"); - return rc; - } - - pr_debug("pasemi_msi: Registering PA Semi MPIC MSI callbacks\n"); - - msi_mpic = mpic; - WARN_ON(ppc_md.setup_msi_irqs); - ppc_md.setup_msi_irqs = pasemi_msi_setup_msi_irqs; - ppc_md.teardown_msi_irqs = pasemi_msi_teardown_msi_irqs; - - return 0; -} -- cgit v1.2.3 From 5c0aebf6e101e9ea6ddea7aaba13582c89206333 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 15 Apr 2015 12:17:07 +1000 Subject: powerpc: Show utsname->machine in boot-up banner Currently we print "Starting Linux PPC64" at boot. But we don't mention anywhere whether the kernel is big or little endian. If we print the utsname->machine value instead we get either "ppc64" or "ppc64le" which is much more informative, eg: Starting Linux ppc64le #1 SMP Wed Apr 15 12:12:20 AEST 2015 Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index c69671c03c3b..9fe3dcdbfca7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -523,7 +523,8 @@ void __init setup_system(void) smp_release_cpus(); #endif - pr_info("Starting Linux PPC64 %s\n", init_utsname()->version); + pr_info("Starting Linux %s %s\n", init_utsname()->machine, + init_utsname()->version); pr_info("-----------------------------------------------------\n"); pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); -- cgit v1.2.3 From e79c8385c878abdf90290cba6bea08fd60058562 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 21 Apr 2015 22:26:57 +1000 Subject: powerpc: Don't do gcc version checks if we're building with clang We have several checks for bad gcc versions in our Makefile. These don't apply if we're building with clang, so skip them in that case. The obvious check would be for ${COMPILER} = "gcc", but because of the way the logic in the top level Makefile conditionally sets COMPILER, it's possible that we're building with gcc but COMPILER was not set. So instead check for ${COMPILER} != "clang", which we know is currently the only other possibility. Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 07a480861f78..3609504b0ad2 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -314,7 +314,8 @@ TOUT := .tmp_gas_check # - Require gcc 4.0 or above on 64-bit # - gcc-4.2.0 has issues compiling modules on 64-bit checkbin: - @if test "$(cc-version)" = "0304" ; then \ + @if test "${COMPILER}" != "clang" \ + && test "$(cc-version)" = "0304" ; then \ if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) >/dev/null 2>&1 ; then \ echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build '; \ echo 'correctly with gcc-3.4 and your version of binutils.'; \ @@ -322,13 +323,15 @@ checkbin: false; \ fi ; \ fi - @if test "$(cc-version)" -lt "0400" \ + @if test "${COMPILER}" != "clang" \ + && test "$(cc-version)" -lt "0400" \ && test "x${CONFIG_PPC64}" = "xy" ; then \ echo -n "Sorry, GCC v4.0 or above is required to build " ; \ echo "the 64-bit powerpc kernel." ; \ false ; \ fi - @if test "$(cc-fullversion)" = "040200" \ + @if test "${COMPILER}" != "clang" \ + && test "$(cc-fullversion)" = "040200" \ && test "x${CONFIG_MODULES}${CONFIG_PPC64}" = "xyy" ; then \ echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \ echo 'kernel with modules enabled.' ; \ -- cgit v1.2.3 From 60e065f70bdb0b0e916389024922ad40f3270c96 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 23 Apr 2015 17:27:12 +1000 Subject: powerpc: Reject binutils 2.24 when building little endian There is a bug in binutils 2.24 which causes miscompilation if we're building little endian and using weak symbols (which the kernel does). It is fixed in binutils commit 57fa7b8c7e59 "Correct elf_merge_st_other arguments for weak symbols", which is in binutils 2.25 and has been backported to the binutils 2.24 branch and has been picked up by most distros it seems. However if we're running stock 2.24 (no extra version) then the bug is present, so check for that and bail. Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 3609504b0ad2..a314cb024c8b 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -339,6 +339,14 @@ checkbin: echo 'disable kernel modules' ; \ false ; \ fi + @if test "x${CONFIG_CPU_LITTLE_ENDIAN}" = "xy" \ + && $(LD) --version | head -1 | grep ' 2\.24$$' >/dev/null ; then \ + echo -n '*** binutils 2.24 miscompiles weak symbols ' ; \ + echo 'in some circumstances.' ; \ + echo -n '*** Please use a different binutils version.' ; \ + false ; \ + fi + CLEAN_FILES += $(TOUT) -- cgit v1.2.3 From 63da88dd48268af415b3677e20f6bcb92c1bf32c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 22 Apr 2015 15:40:34 +1000 Subject: powerpc/vdso: Remove unused debug code It's in the git history if we ever need it back. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/vdso.c | 44 -------------------------------------------- 1 file changed, 44 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 305eb0d9b768..869fd0c53280 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -140,50 +140,6 @@ struct lib64_elfinfo }; -#ifdef __DEBUG -static void dump_one_vdso_page(struct page *pg, struct page *upg) -{ - printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT), - page_count(pg), - pg->flags); - if (upg && !IS_ERR(upg) /* && pg != upg*/) { - printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg) - << PAGE_SHIFT), - page_count(upg), - upg->flags); - } - printk("\n"); -} - -static void dump_vdso_pages(struct vm_area_struct * vma) -{ - int i; - - if (!vma || is_32bit_task()) { - printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase); - for (i=0; ivm_mm) ? - follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0) - : NULL; - dump_one_vdso_page(pg, upg); - } - } - if (!vma || !is_32bit_task()) { - printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase); - for (i=0; ivm_mm) ? - follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0) - : NULL; - dump_one_vdso_page(pg, upg); - } - } -} -#endif /* DEBUG */ - /* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree -- cgit v1.2.3 From 6e5c077519f2661c56647fc21b2ec2ba0a50bb75 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 22 Apr 2015 15:40:35 +1000 Subject: powerpc/vdso: Combine start/size variables In vdso_fixup_features() we have start64/start32 and size64/size32, but they have the same types, ie. void * and unsigned long. They're only used to save the return value from find_sectionXX() for the subsequent call to do_feature_fixups(), so there's no overlap in their usage either. So we can just consolidate them into start/size and avoid the duplication. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/vdso.c | 55 ++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 29 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 869fd0c53280..8331d0bef0fb 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -525,55 +525,52 @@ static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, static __init int vdso_fixup_features(struct lib32_elfinfo *v32, struct lib64_elfinfo *v64) { - void *start32; - unsigned long size32; + unsigned long size; + void *start; #ifdef CONFIG_PPC64 - void *start64; - unsigned long size64; - - start64 = find_section64(v64->hdr, "__ftr_fixup", &size64); - if (start64) + start = find_section64(v64->hdr, "__ftr_fixup", &size); + if (start) do_feature_fixups(cur_cpu_spec->cpu_features, - start64, start64 + size64); + start, start + size); - start64 = find_section64(v64->hdr, "__mmu_ftr_fixup", &size64); - if (start64) + start = find_section64(v64->hdr, "__mmu_ftr_fixup", &size); + if (start) do_feature_fixups(cur_cpu_spec->mmu_features, - start64, start64 + size64); + start, start + size); - start64 = find_section64(v64->hdr, "__fw_ftr_fixup", &size64); - if (start64) + start = find_section64(v64->hdr, "__fw_ftr_fixup", &size); + if (start) do_feature_fixups(powerpc_firmware_features, - start64, start64 + size64); + start, start + size); - start64 = find_section64(v64->hdr, "__lwsync_fixup", &size64); - if (start64) + start = find_section64(v64->hdr, "__lwsync_fixup", &size); + if (start) do_lwsync_fixups(cur_cpu_spec->cpu_features, - start64, start64 + size64); + start, start + size); #endif /* CONFIG_PPC64 */ - start32 = find_section32(v32->hdr, "__ftr_fixup", &size32); - if (start32) + start = find_section32(v32->hdr, "__ftr_fixup", &size); + if (start) do_feature_fixups(cur_cpu_spec->cpu_features, - start32, start32 + size32); + start, start + size); - start32 = find_section32(v32->hdr, "__mmu_ftr_fixup", &size32); - if (start32) + start = find_section32(v32->hdr, "__mmu_ftr_fixup", &size); + if (start) do_feature_fixups(cur_cpu_spec->mmu_features, - start32, start32 + size32); + start, start + size); #ifdef CONFIG_PPC64 - start32 = find_section32(v32->hdr, "__fw_ftr_fixup", &size32); - if (start32) + start = find_section32(v32->hdr, "__fw_ftr_fixup", &size); + if (start) do_feature_fixups(powerpc_firmware_features, - start32, start32 + size32); + start, start + size); #endif /* CONFIG_PPC64 */ - start32 = find_section32(v32->hdr, "__lwsync_fixup", &size32); - if (start32) + start = find_section32(v32->hdr, "__lwsync_fixup", &size); + if (start) do_lwsync_fixups(cur_cpu_spec->cpu_features, - start32, start32 + size32); + start, start + size); return 0; } -- cgit v1.2.3 From e0d0059169945c8ee16790d2e7244cea397dfd56 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 11 May 2015 20:01:02 +1000 Subject: powerpc/vdso: Disable building the 32-bit VDSO on little endian The only little endian configuration we support is ppc64le. As such if we're building little endian we don't need a 32-bit VDSO, because there is no 32-bit userspace. This patch is a fairly ugly mess of #ifdefs, but is the minimal logic required to disable the 32-bit VDSO. We can hopefully clean up the result in future with some further refactoring. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/Makefile | 3 ++- arch/powerpc/kernel/vdso.c | 36 ++++++++++++++++++++++++++++++++-- arch/powerpc/platforms/Kconfig.cputype | 10 ++++++++++ 3 files changed, 46 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index c1ebbdaac28f..87c7d1473488 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -33,11 +33,12 @@ obj-y := cputable.o ptrace.o syscalls.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o dma.o \ - misc_$(CONFIG_WORD_SIZE).o vdso32/ \ + misc_$(CONFIG_WORD_SIZE).o \ of_platform.o prom_parse.o obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ paca.o nvram_64.o firmware.o +obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 8331d0bef0fb..b457bfa28436 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -49,13 +49,16 @@ /* The alignment of the vDSO */ #define VDSO_ALIGNMENT (1 << 16) -extern char vdso32_start, vdso32_end; -static void *vdso32_kbase = &vdso32_start; static unsigned int vdso32_pages; +static void *vdso32_kbase; static struct page **vdso32_pagelist; unsigned long vdso32_sigtramp; unsigned long vdso32_rt_sigtramp; +#ifdef CONFIG_VDSO32 +extern char vdso32_start, vdso32_end; +#endif + #ifdef CONFIG_PPC64 extern char vdso64_start, vdso64_end; static void *vdso64_kbase = &vdso64_start; @@ -248,6 +251,7 @@ const char *arch_vma_name(struct vm_area_struct *vma) +#ifdef CONFIG_VDSO32 static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname, unsigned long *size) { @@ -335,6 +339,20 @@ static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32, return 0; } +#else /* !CONFIG_VDSO32 */ +static unsigned long __init find_function32(struct lib32_elfinfo *lib, + const char *symname) +{ + return 0; +} + +static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32, + struct lib64_elfinfo *v64, + const char *orig, const char *fix) +{ + return 0; +} +#endif /* CONFIG_VDSO32 */ #ifdef CONFIG_PPC64 @@ -445,6 +463,7 @@ static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, * Locate symbol tables & text section */ +#ifdef CONFIG_VDSO32 v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize); v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL); if (v32->dynsym == NULL || v32->dynstr == NULL) { @@ -457,6 +476,7 @@ static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, return -1; } v32->text = sect - vdso32_kbase; +#endif #ifdef CONFIG_PPC64 v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize); @@ -493,7 +513,9 @@ static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32, static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, struct lib64_elfinfo *v64) { +#ifdef CONFIG_VDSO32 Elf32_Sym *sym32; +#endif #ifdef CONFIG_PPC64 Elf64_Sym *sym64; @@ -508,6 +530,7 @@ static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, (sym64->st_value - VDSO64_LBASE); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_VDSO32 sym32 = find_symbol32(v32, "__kernel_datapage_offset"); if (sym32 == NULL) { printk(KERN_ERR "vDSO32: Can't find symbol " @@ -517,6 +540,7 @@ static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) = (vdso32_pages << PAGE_SHIFT) - (sym32->st_value - VDSO32_LBASE); +#endif return 0; } @@ -550,6 +574,7 @@ static __init int vdso_fixup_features(struct lib32_elfinfo *v32, start, start + size); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_VDSO32 start = find_section32(v32->hdr, "__ftr_fixup", &size); if (start) do_feature_fixups(cur_cpu_spec->cpu_features, @@ -571,6 +596,7 @@ static __init int vdso_fixup_features(struct lib32_elfinfo *v32, if (start) do_lwsync_fixups(cur_cpu_spec->cpu_features, start, start + size); +#endif return 0; } @@ -732,11 +758,15 @@ static int __init vdso_init(void) #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_VDSO32 + vdso32_kbase = &vdso32_start; + /* * Calculate the size of the 32 bits vDSO */ vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT; DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages); +#endif /* @@ -757,6 +787,7 @@ static int __init vdso_init(void) return 0; } +#ifdef CONFIG_VDSO32 /* Make sure pages are in the correct state */ vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 2), GFP_KERNEL); @@ -769,6 +800,7 @@ static int __init vdso_init(void) } vdso32_pagelist[i++] = virt_to_page(vdso_data); vdso32_pagelist[i] = NULL; +#endif #ifdef CONFIG_PPC64 vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 2), diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 7264e91190be..724ecc791404 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -405,6 +405,16 @@ config PPC_DOORBELL endmenu +config VDSO32 + def_bool y + depends on PPC32 || CPU_BIG_ENDIAN + help + This symbol controls whether we build the 32-bit VDSO. We obviously + want to do that if we're building a 32-bit kernel. If we're building + a 64-bit kernel then we only want a 32-bit VDSO if we're building for + big endian. That is because the only little endian configuration we + support is ppc64le which is 64-bit only. + choice prompt "Endianness selection" default CPU_BIG_ENDIAN -- cgit v1.2.3 From 38c0488770983b2654f075540cc1fc71f55b6df5 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 30 Apr 2015 13:50:07 +1000 Subject: powerpc/powernv: Silence SYSPARAM warning on boot OpenPower BMC machines do not place any sysparams in the device tree, so at every boot we get a warning: [ 0.437176] SYSPARAM: Opal sysparam node not found Remove the warning, and reorder the init so we don't peform allocations when there is no sysparam node in the device tree. Signed-off-by: Joel Stanley Acked-by: Neelesh Gupta Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-sysparam.c | 31 +++++++++++++------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c index 9d1acf22a099..2e52b47393e7 100644 --- a/arch/powerpc/platforms/powernv/opal-sysparam.c +++ b/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -162,10 +162,20 @@ void __init opal_sys_param_init(void) goto out; } + /* Some systems do not use sysparams; this is not an error */ + sysparam = of_find_node_by_path("/ibm,opal/sysparams"); + if (!sysparam) + goto out; + + if (!of_device_is_compatible(sysparam, "ibm,opal-sysparams")) { + pr_err("SYSPARAM: Opal sysparam node not compatible\n"); + goto out_node_put; + } + sysparam_kobj = kobject_create_and_add("sysparams", opal_kobj); if (!sysparam_kobj) { pr_err("SYSPARAM: Failed to create sysparam kobject\n"); - goto out; + goto out_node_put; } /* Allocate big enough buffer for any get/set transactions */ @@ -176,30 +186,19 @@ void __init opal_sys_param_init(void) goto out_kobj_put; } - sysparam = of_find_node_by_path("/ibm,opal/sysparams"); - if (!sysparam) { - pr_err("SYSPARAM: Opal sysparam node not found\n"); - goto out_param_buf; - } - - if (!of_device_is_compatible(sysparam, "ibm,opal-sysparams")) { - pr_err("SYSPARAM: Opal sysparam node not compatible\n"); - goto out_node_put; - } - /* Number of parameters exposed through DT */ count = of_property_count_strings(sysparam, "param-name"); if (count < 0) { pr_err("SYSPARAM: No string found of property param-name in " "the node %s\n", sysparam->name); - goto out_node_put; + goto out_param_buf; } id = kzalloc(sizeof(*id) * count, GFP_KERNEL); if (!id) { pr_err("SYSPARAM: Failed to allocate memory to read parameter " "id\n"); - goto out_node_put; + goto out_param_buf; } size = kzalloc(sizeof(*size) * count, GFP_KERNEL); @@ -293,12 +292,12 @@ out_free_size: kfree(size); out_free_id: kfree(id); -out_node_put: - of_node_put(sysparam); out_param_buf: kfree(param_data_buf); out_kobj_put: kobject_put(sysparam_kobj); +out_node_put: + of_node_put(sysparam); out: return; } -- cgit v1.2.3 From ed3e81ff2016b180d8f439a1981f2c8b59b0b53c Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 26 Mar 2015 16:42:07 +1100 Subject: powerpc/eeh: Move PE state constants around There are two equivalent sets of PE state constants, defined in arch/powerpc/include/asm/eeh.h and include/uapi/linux/vfio.h. Though the names are different, their corresponding values are exactly same. The former is used by EEH core and the latter is used by userspace. The patch moves those constants from arch/powerpc/include/asm/eeh.h to arch/powerpc/include/uapi/asm/eeh.h, which are expected to be used by userspace from now on. We can't delete those constants in vfio.h as it's uncertain that those constants have been or will be used by userspace. Suggested-by: David Gibson Signed-off-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 7 ++----- arch/powerpc/include/uapi/asm/eeh.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 5 deletions(-) create mode 100644 arch/powerpc/include/uapi/asm/eeh.h (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index a52db28ecc1e..7fd7b5429e5a 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -27,6 +27,8 @@ #include #include +#include + struct pci_dev; struct pci_bus; struct pci_dn; @@ -185,11 +187,6 @@ enum { #define EEH_STATE_DMA_ACTIVE (1 << 4) /* Active DMA */ #define EEH_STATE_MMIO_ENABLED (1 << 5) /* MMIO enabled */ #define EEH_STATE_DMA_ENABLED (1 << 6) /* DMA enabled */ -#define EEH_PE_STATE_NORMAL 0 /* Normal state */ -#define EEH_PE_STATE_RESET 1 /* PE reset asserted */ -#define EEH_PE_STATE_STOPPED_IO_DMA 2 /* Frozen PE */ -#define EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA, Enabled IO */ -#define EEH_PE_STATE_UNAVAIL 5 /* Unavailable */ #define EEH_RESET_DEACTIVATE 0 /* Deactivate the PE reset */ #define EEH_RESET_HOT 1 /* Hot reset */ #define EEH_RESET_FUNDAMENTAL 3 /* Fundamental reset */ diff --git a/arch/powerpc/include/uapi/asm/eeh.h b/arch/powerpc/include/uapi/asm/eeh.h new file mode 100644 index 000000000000..8bb34b004995 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/eeh.h @@ -0,0 +1,30 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2015 + * + * Authors: Gavin Shan + */ + +#ifndef _ASM_POWERPC_EEH_H +#define _ASM_POWERPC_EEH_H + +/* PE states */ +#define EEH_PE_STATE_NORMAL 0 /* Normal state */ +#define EEH_PE_STATE_RESET 1 /* PE reset asserted */ +#define EEH_PE_STATE_STOPPED_IO_DMA 2 /* Frozen PE */ +#define EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA only */ +#define EEH_PE_STATE_UNAVAIL 5 /* Unavailable */ + +#endif /* _ASM_POWERPC_EEH_H */ -- cgit v1.2.3 From ec33d36e5ab5d52d59a8f696f7efb682bfc58494 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 26 Mar 2015 16:42:08 +1100 Subject: powerpc/eeh: Introduce eeh_pe_inject_err() The patch defines PCI error types and functions in uapi/asm/eeh.h and exports function eeh_pe_inject_err(), which will be called by VFIO driver to inject the specified PCI error to the indicated PE for testing purpose. Signed-off-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 2 ++ arch/powerpc/include/uapi/asm/eeh.h | 26 ++++++++++++++++++++++++++ arch/powerpc/kernel/eeh.c | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 7fd7b5429e5a..c5eb86f3d452 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -291,6 +291,8 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option); int eeh_pe_get_state(struct eeh_pe *pe); int eeh_pe_reset(struct eeh_pe *pe, int option); int eeh_pe_configure(struct eeh_pe *pe); +int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask); /** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. diff --git a/arch/powerpc/include/uapi/asm/eeh.h b/arch/powerpc/include/uapi/asm/eeh.h index 8bb34b004995..291b7d1814a6 100644 --- a/arch/powerpc/include/uapi/asm/eeh.h +++ b/arch/powerpc/include/uapi/asm/eeh.h @@ -27,4 +27,30 @@ #define EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA only */ #define EEH_PE_STATE_UNAVAIL 5 /* Unavailable */ +/* EEH error types and functions */ +#define EEH_ERR_TYPE_32 0 /* 32-bits error */ +#define EEH_ERR_TYPE_64 1 /* 64-bits error */ +#define EEH_ERR_FUNC_MIN 0 +#define EEH_ERR_FUNC_LD_MEM_ADDR 0 /* Memory load */ +#define EEH_ERR_FUNC_LD_MEM_DATA 1 +#define EEH_ERR_FUNC_LD_IO_ADDR 2 /* IO load */ +#define EEH_ERR_FUNC_LD_IO_DATA 3 +#define EEH_ERR_FUNC_LD_CFG_ADDR 4 /* Config load */ +#define EEH_ERR_FUNC_LD_CFG_DATA 5 +#define EEH_ERR_FUNC_ST_MEM_ADDR 6 /* Memory store */ +#define EEH_ERR_FUNC_ST_MEM_DATA 7 +#define EEH_ERR_FUNC_ST_IO_ADDR 8 /* IO store */ +#define EEH_ERR_FUNC_ST_IO_DATA 9 +#define EEH_ERR_FUNC_ST_CFG_ADDR 10 /* Config store */ +#define EEH_ERR_FUNC_ST_CFG_DATA 11 +#define EEH_ERR_FUNC_DMA_RD_ADDR 12 /* DMA read */ +#define EEH_ERR_FUNC_DMA_RD_DATA 13 +#define EEH_ERR_FUNC_DMA_RD_MASTER 14 +#define EEH_ERR_FUNC_DMA_RD_TARGET 15 +#define EEH_ERR_FUNC_DMA_WR_ADDR 16 /* DMA write */ +#define EEH_ERR_FUNC_DMA_WR_DATA 17 +#define EEH_ERR_FUNC_DMA_WR_MASTER 18 +#define EEH_ERR_FUNC_DMA_WR_TARGET 19 +#define EEH_ERR_FUNC_MAX 19 + #endif /* _ASM_POWERPC_EEH_H */ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 9ee61d15653d..04b5d94973dc 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1647,6 +1647,41 @@ int eeh_pe_configure(struct eeh_pe *pe) } EXPORT_SYMBOL_GPL(eeh_pe_configure); +/** + * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE + * @pe: the indicated PE + * @type: error type + * @function: error function + * @addr: address + * @mask: address mask + * + * The routine is called to inject the specified PCI error, which + * is determined by @type and @function, to the indicated PE for + * testing purpose. + */ +int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask) +{ + /* Invalid PE ? */ + if (!pe) + return -ENODEV; + + /* Unsupported operation ? */ + if (!eeh_ops || !eeh_ops->err_inject) + return -ENOENT; + + /* Check on PCI error type */ + if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) + return -EINVAL; + + /* Check on PCI error function */ + if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) + return -EINVAL; + + return eeh_ops->err_inject(pe, type, func, addr, mask); +} +EXPORT_SYMBOL_GPL(eeh_pe_inject_err); + static int proc_eeh_show(struct seq_file *m, void *v) { if (!eeh_enabled()) { -- cgit v1.2.3 From 3721352990ba5eafa7e92a1c9cf86af3a46de151 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 27 Apr 2015 09:25:09 +0800 Subject: powerpc/eeh: fix start/end/flags type in struct pci_io_addr_range{} struct pci_io_addr_range{} stores the information of pci resources. It would be better to keep these related fields have the same type as in struct resource{}. This patch fixes the start/end/flags type in struct pci_io_addr_range{} to have the same type as in struct resource{}. Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_cache.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index eeabeabea49c..a1e86e172e3c 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -48,11 +48,11 @@ */ struct pci_io_addr_range { struct rb_node rb_node; - unsigned long addr_lo; - unsigned long addr_hi; + resource_size_t addr_lo; + resource_size_t addr_hi; struct eeh_dev *edev; struct pci_dev *pcidev; - unsigned int flags; + unsigned long flags; }; static struct pci_io_addr_cache { @@ -125,8 +125,8 @@ static void eeh_addr_cache_print(struct pci_io_addr_cache *cache) /* Insert address range into the rb tree. */ static struct pci_io_addr_range * -eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo, - unsigned long ahi, unsigned int flags) +eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo, + resource_size_t ahi, unsigned long flags) { struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node; struct rb_node *parent = NULL; @@ -197,9 +197,9 @@ static void __eeh_addr_cache_insert_dev(struct pci_dev *dev) /* Walk resources on this device, poke them into the tree */ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - unsigned long start = pci_resource_start(dev,i); - unsigned long end = pci_resource_end(dev,i); - unsigned int flags = pci_resource_flags(dev,i); + resource_size_t start = pci_resource_start(dev,i); + resource_size_t end = pci_resource_end(dev,i); + unsigned long flags = pci_resource_flags(dev,i); /* We are interested only bus addresses, not dma or other stuff */ if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM))) -- cgit v1.2.3 From 2ac3990cc36b1e42feca733b25254fb6dae15431 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 27 Apr 2015 09:25:10 +0800 Subject: powerpc/eeh: fix comment for wait_state() To retrieve the PCI slot state, EEH driver would set a timeout for that. While current comment is not aligned to what the code does. This patch fixes those comments according to the code. Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 2 +- arch/powerpc/platforms/powernv/eeh-powernv.c | 2 +- arch/powerpc/platforms/pseries/eeh_pseries.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 24768ff3cb73..89eb4bc34d3a 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -660,7 +660,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) eeh_pe_dev_traverse(pe, eeh_report_error, &result); /* Get the current PCI slot state. This can take a long time, - * sometimes over 3 seconds for certain systems. + * sometimes over 300 seconds for certain systems. */ rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index ce738ab3d5a9..d0254710595d 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -979,7 +979,7 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) /** * pnv_eeh_wait_state - Wait for PE state * @pe: EEH PE - * @max_wait: maximal period in microsecond + * @max_wait: maximal period in millisecond * * Wait for the state of associated PE. It might take some time * to retrieve the PE's state. diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 2039397cc75d..1ba55d0bb449 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -519,7 +519,7 @@ static int pseries_eeh_reset(struct eeh_pe *pe, int option) /** * pseries_eeh_wait_state - Wait for PE state * @pe: EEH PE - * @max_wait: maximal period in microsecond + * @max_wait: maximal period in millisecond * * Wait for the state of associated PE. It might take some time * to retrieve the PE's state. -- cgit v1.2.3 From e17866d5593dc6ea7bff9ee56751fae6c3f56223 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 27 Apr 2015 09:25:11 +0800 Subject: powerpc/eeh: fix powernv_eeh_wait_state delay logic As the comment indicates, powernv_eeh_get_state() will inform EEH core to delay 1 second. This means the delay doesn't happen when powernv_eeh_get_state() returns. This patch moves the delay subtraction just before msleep(), which is the same logic in pseries_eeh_wait_state(). Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/eeh-powernv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index d0254710595d..622f08cf54b6 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1000,13 +1000,13 @@ static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait) if (ret != EEH_STATE_UNAVAILABLE) return ret; - max_wait -= mwait; if (max_wait <= 0) { pr_warn("%s: Timeout getting PE#%x's state (%d)\n", __func__, pe->addr, max_wait); return EEH_STATE_NOT_SUPPORT; } + max_wait -= mwait; msleep(mwait); } -- cgit v1.2.3 From f77ceb717d0955f1df20601683ea55675101bab6 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 27 Apr 2015 09:25:12 +0800 Subject: powerpc/eeh: remove unused macro IS_BRIDGE Currently, the macro IS_BRIDGE is not used any where. This patch just removes it. Signed-off-by: Wei Yang Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 04b5d94973dc..23e0aa773643 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -144,8 +144,6 @@ struct eeh_stats { static struct eeh_stats eeh_stats; -#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) - static int __init eeh_setup(char *str) { if (!strcmp(str, "off")) -- cgit v1.2.3 From a22e5f579b98f16e24b7184d01c35de26eb5a7f7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 May 2015 10:54:25 +0200 Subject: arch: Remove __ARCH_HAVE_CMPXCHG We removed the only user of this define in the rtmutex code. Get rid of it. Signed-off-by: Thomas Gleixner Cc: Sebastian Andrzej Siewior --- arch/alpha/include/asm/cmpxchg.h | 2 -- arch/avr32/include/asm/cmpxchg.h | 2 -- arch/hexagon/include/asm/cmpxchg.h | 1 - arch/ia64/include/uapi/asm/cmpxchg.h | 2 -- arch/m32r/include/asm/cmpxchg.h | 2 -- arch/m68k/include/asm/cmpxchg.h | 1 - arch/metag/include/asm/cmpxchg.h | 2 -- arch/mips/include/asm/cmpxchg.h | 2 -- arch/parisc/include/asm/cmpxchg.h | 2 -- arch/powerpc/include/asm/cmpxchg.h | 1 - arch/s390/include/asm/cmpxchg.h | 2 -- arch/score/include/asm/cmpxchg.h | 2 -- arch/sh/include/asm/cmpxchg.h | 2 -- arch/sparc/include/asm/cmpxchg_32.h | 1 - arch/sparc/include/asm/cmpxchg_64.h | 2 -- arch/tile/include/asm/atomic_64.h | 3 --- arch/x86/include/asm/cmpxchg.h | 2 -- 17 files changed, 31 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h index 429e8cd0d78e..e5117766529e 100644 --- a/arch/alpha/include/asm/cmpxchg.h +++ b/arch/alpha/include/asm/cmpxchg.h @@ -66,6 +66,4 @@ #undef __ASM__MB #undef ____cmpxchg -#define __HAVE_ARCH_CMPXCHG 1 - #endif /* _ALPHA_CMPXCHG_H */ diff --git a/arch/avr32/include/asm/cmpxchg.h b/arch/avr32/include/asm/cmpxchg.h index 962a6aeab787..366bbeaeb405 100644 --- a/arch/avr32/include/asm/cmpxchg.h +++ b/arch/avr32/include/asm/cmpxchg.h @@ -70,8 +70,6 @@ extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels( if something tries to do an invalid cmpxchg(). */ extern void __cmpxchg_called_with_bad_pointer(void); -#define __HAVE_ARCH_CMPXCHG 1 - static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) { diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h index 9e7802911a57..a6e34e2acbba 100644 --- a/arch/hexagon/include/asm/cmpxchg.h +++ b/arch/hexagon/include/asm/cmpxchg.h @@ -64,7 +64,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, * looks just like atomic_cmpxchg on our arch currently with a bunch of * variable casting. */ -#define __HAVE_ARCH_CMPXCHG 1 #define cmpxchg(ptr, old, new) \ ({ \ diff --git a/arch/ia64/include/uapi/asm/cmpxchg.h b/arch/ia64/include/uapi/asm/cmpxchg.h index f35109b1d907..a0e3620f8f13 100644 --- a/arch/ia64/include/uapi/asm/cmpxchg.h +++ b/arch/ia64/include/uapi/asm/cmpxchg.h @@ -61,8 +61,6 @@ extern void ia64_xchg_called_with_bad_pointer(void); * indicated by comparing RETURN with OLD. */ -#define __HAVE_ARCH_CMPXCHG 1 - /* * This function doesn't exist, so you'll get a linker error * if something tries to do an invalid cmpxchg(). diff --git a/arch/m32r/include/asm/cmpxchg.h b/arch/m32r/include/asm/cmpxchg.h index de651db20b43..14bf9b739dd2 100644 --- a/arch/m32r/include/asm/cmpxchg.h +++ b/arch/m32r/include/asm/cmpxchg.h @@ -107,8 +107,6 @@ __xchg_local(unsigned long x, volatile void *ptr, int size) ((__typeof__(*(ptr)))__xchg_local((unsigned long)(x), (ptr), \ sizeof(*(ptr)))) -#define __HAVE_ARCH_CMPXCHG 1 - static inline unsigned long __cmpxchg_u32(volatile unsigned int *p, unsigned int old, unsigned int new) { diff --git a/arch/m68k/include/asm/cmpxchg.h b/arch/m68k/include/asm/cmpxchg.h index bc755bc620ad..83b1df80f0ac 100644 --- a/arch/m68k/include/asm/cmpxchg.h +++ b/arch/m68k/include/asm/cmpxchg.h @@ -90,7 +90,6 @@ extern unsigned long __invalid_cmpxchg_size(volatile void *, * indicated by comparing RETURN with OLD. */ #ifdef CONFIG_RMW_INSNS -#define __HAVE_ARCH_CMPXCHG 1 static inline unsigned long __cmpxchg(volatile void *p, unsigned long old, unsigned long new, int size) diff --git a/arch/metag/include/asm/cmpxchg.h b/arch/metag/include/asm/cmpxchg.h index b1bc1be8540f..be29e3e44321 100644 --- a/arch/metag/include/asm/cmpxchg.h +++ b/arch/metag/include/asm/cmpxchg.h @@ -51,8 +51,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, return old; } -#define __HAVE_ARCH_CMPXCHG 1 - #define cmpxchg(ptr, o, n) \ ({ \ __typeof__(*(ptr)) _o_ = (o); \ diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index 412f945f1f5e..b71ab4a5fd50 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -138,8 +138,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \ }) -#define __HAVE_ARCH_CMPXCHG 1 - #define __cmpxchg_asm(ld, st, m, old, new) \ ({ \ __typeof(*(m)) __ret; \ diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h index dbd13354ec41..0a90b965cccb 100644 --- a/arch/parisc/include/asm/cmpxchg.h +++ b/arch/parisc/include/asm/cmpxchg.h @@ -46,8 +46,6 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size) #define xchg(ptr, x) \ ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) -#define __HAVE_ARCH_CMPXCHG 1 - /* bug catcher for when unsupported size is used - won't link */ extern void __cmpxchg_called_with_bad_pointer(void); diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index d463c68fe7f0..ad6263cffb0f 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -144,7 +144,6 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size) * Compare and exchange - if *p == old, set it to new, * and return the old value of *p. */ -#define __HAVE_ARCH_CMPXCHG 1 static __always_inline unsigned long __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 4eadec466b8c..411464f4c97a 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -32,8 +32,6 @@ __old; \ }) -#define __HAVE_ARCH_CMPXCHG - #define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \ ({ \ register __typeof__(*(p1)) __old1 asm("2") = (o1); \ diff --git a/arch/score/include/asm/cmpxchg.h b/arch/score/include/asm/cmpxchg.h index f384839c3ee5..cc3f6420b71c 100644 --- a/arch/score/include/asm/cmpxchg.h +++ b/arch/score/include/asm/cmpxchg.h @@ -42,8 +42,6 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m, (unsigned long)(o), \ (unsigned long)(n))) -#define __HAVE_ARCH_CMPXCHG 1 - #include #endif /* _ASM_SCORE_CMPXCHG_H */ diff --git a/arch/sh/include/asm/cmpxchg.h b/arch/sh/include/asm/cmpxchg.h index f6bd1406b897..85c97b188d71 100644 --- a/arch/sh/include/asm/cmpxchg.h +++ b/arch/sh/include/asm/cmpxchg.h @@ -46,8 +46,6 @@ extern void __xchg_called_with_bad_pointer(void); * if something tries to do an invalid cmpxchg(). */ extern void __cmpxchg_called_with_bad_pointer(void); -#define __HAVE_ARCH_CMPXCHG 1 - static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old, unsigned long new, int size) { diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h index d38b52dca216..83ffb83c5397 100644 --- a/arch/sparc/include/asm/cmpxchg_32.h +++ b/arch/sparc/include/asm/cmpxchg_32.h @@ -34,7 +34,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int * * Cribbed from */ -#define __HAVE_ARCH_CMPXCHG 1 /* bug catcher for when unsupported size is used - won't link */ void __cmpxchg_called_with_bad_pointer(void); diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h index 0e1ed6cfbf68..faa2f61058c2 100644 --- a/arch/sparc/include/asm/cmpxchg_64.h +++ b/arch/sparc/include/asm/cmpxchg_64.h @@ -65,8 +65,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, #include -#define __HAVE_ARCH_CMPXCHG 1 - static inline unsigned long __cmpxchg_u32(volatile int *m, int old, int new) { diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h index 7b11c5fadd42..0496970cef82 100644 --- a/arch/tile/include/asm/atomic_64.h +++ b/arch/tile/include/asm/atomic_64.h @@ -105,9 +105,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) -/* Define this to indicate that cmpxchg is an efficient operation. */ -#define __HAVE_ARCH_CMPXCHG - #endif /* !__ASSEMBLY__ */ #endif /* _ASM_TILE_ATOMIC_64_H */ diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 99c105d78b7e..ad19841eddfe 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -4,8 +4,6 @@ #include #include /* Provides LOCK_PREFIX */ -#define __HAVE_ARCH_CMPXCHG 1 - /* * Non-existant functions to indicate usage errors at link time * (or compile-time if the compiler implements __compiletime_error(). -- cgit v1.2.3 From a401521321925c02733515cd7f9f13ae4597a743 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Sun, 17 May 2015 12:54:14 +0200 Subject: crypto: powerpc/md5 - use md5 IV MD5_HX instead of their raw value Since MD5 IV are now available in crypto/md5.h, use them. Signed-off-by: LABBE Corentin Signed-off-by: Herbert Xu --- arch/powerpc/crypto/md5-glue.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c index 452fb4dc575f..92289679b4c4 100644 --- a/arch/powerpc/crypto/md5-glue.c +++ b/arch/powerpc/crypto/md5-glue.c @@ -37,10 +37,10 @@ static int ppc_md5_init(struct shash_desc *desc) { struct md5_state *sctx = shash_desc_ctx(desc); - sctx->hash[0] = 0x67452301; - sctx->hash[1] = 0xefcdab89; - sctx->hash[2] = 0x98badcfe; - sctx->hash[3] = 0x10325476; + sctx->hash[0] = MD5_H0; + sctx->hash[1] = MD5_H1; + sctx->hash[2] = MD5_H2; + sctx->hash[3] = MD5_H3; sctx->byte_count = 0; return 0; -- cgit v1.2.3 From d4d4add9eacf0a3e642228a22df722f00921a184 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 22 Apr 2015 15:36:50 +1000 Subject: powerpc: Little endian should depend on PPC_BOOK3S_64 The only little endian configuration we support is ppc64le, all other configurations are big endian. So we should only offer a choice of endian if we're building for 64-bit Book3S, ie. PPC_BOOK3S_64. Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/Kconfig.cputype | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 724ecc791404..c140e94c7c72 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -431,6 +431,7 @@ config CPU_BIG_ENDIAN config CPU_LITTLE_ENDIAN bool "Build little endian kernel" + depends on PPC_BOOK3S_64 select PPC64_BOOT_WRAPPER help Build a little endian kernel. -- cgit v1.2.3 From ab3f02fc237211f0583c1e7ba3bf504747be9b8d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 May 2015 10:52:27 +0200 Subject: locking/arch: Add WRITE_ONCE() to set_mb() Since we assume set_mb() to result in a single store followed by a full memory barrier, employ WRITE_ONCE(). Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm/include/asm/barrier.h | 2 +- arch/arm64/include/asm/barrier.h | 2 +- arch/ia64/include/asm/barrier.h | 2 +- arch/metag/include/asm/barrier.h | 2 +- arch/mips/include/asm/barrier.h | 2 +- arch/powerpc/include/asm/barrier.h | 2 +- arch/s390/include/asm/barrier.h | 2 +- arch/sparc/include/asm/barrier_64.h | 2 +- arch/x86/include/asm/barrier.h | 2 +- arch/x86/um/asm/barrier.h | 2 +- include/asm-generic/barrier.h | 2 +- include/linux/compiler.h | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index d2f81e6b8c1c..993150aea681 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -81,7 +81,7 @@ do { \ #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { var = value; smp_mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 71f19c4dc0de..ff7de78d01b8 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -114,7 +114,7 @@ do { \ #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { var = value; smp_mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define nop() asm volatile("nop"); #define smp_mb__before_atomic() smp_mb() diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h index f6769eb2bbf9..03117e7b2ab8 100644 --- a/arch/ia64/include/asm/barrier.h +++ b/arch/ia64/include/asm/barrier.h @@ -82,7 +82,7 @@ do { \ * acquire vs release semantics but we can't discuss this stuff with * Linus just yet. Grrr... */ -#define set_mb(var, value) do { (var) = (value); mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) /* * The group barrier in front of the rsm & ssm are necessary to ensure diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h index d703d8e26a65..97eb018a2933 100644 --- a/arch/metag/include/asm/barrier.h +++ b/arch/metag/include/asm/barrier.h @@ -84,7 +84,7 @@ static inline void fence(void) #define read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0) -#define set_mb(var, value) do { var = value; smp_mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index 2b8bbbcb9be0..cff1bbdaa74a 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -113,7 +113,7 @@ #endif #define set_mb(var, value) \ - do { var = value; smp_mb(); } while (0) + do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index a3bf5be111ff..2a072e48780d 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -34,7 +34,7 @@ #define rmb() __asm__ __volatile__ ("sync" : : : "memory") #define wmb() __asm__ __volatile__ ("sync" : : : "memory") -#define set_mb(var, value) do { var = value; mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #ifdef __SUBARCH_HAS_LWSYNC # define SMPWMB LWSYNC diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index 8d724718ec21..b66cd53d35fc 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -36,7 +36,7 @@ #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() -#define set_mb(var, value) do { var = value; mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h index 76648941fea7..125fec7512f4 100644 --- a/arch/sparc/include/asm/barrier_64.h +++ b/arch/sparc/include/asm/barrier_64.h @@ -41,7 +41,7 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ #define dma_wmb() wmb() #define set_mb(__var, __value) \ - do { __var = __value; membar_safe("#StoreLoad"); } while(0) + do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0) #ifdef CONFIG_SMP #define smp_mb() mb() diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 959e45b81fe2..9de5cde133a1 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -40,7 +40,7 @@ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define set_mb(var, value) do { var = value; barrier(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) #endif /* SMP */ #define read_barrier_depends() do { } while (0) diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index 7e8a1a650435..cc0cb01f346d 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -39,7 +39,7 @@ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define set_mb(var, value) do { var = value; barrier(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) #define read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0) diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index f5c40b0fadc2..3938716b44d7 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -67,7 +67,7 @@ #endif #ifndef set_mb -#define set_mb(var, value) do { (var) = (value); mb(); } while (0) +#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #endif #ifndef smp_mb__before_atomic diff --git a/include/linux/compiler.h b/include/linux/compiler.h index a7c0941d10da..03e227ba481c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -250,7 +250,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) #define WRITE_ONCE(x, val) \ - ({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; }) + ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) #endif /* __KERNEL__ */ -- cgit v1.2.3 From b92b8b35a2e38bde319fd1d68ec84628c1f1b0fb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 May 2015 10:51:55 +0200 Subject: locking/arch: Rename set_mb() to smp_store_mb() Since set_mb() is really about an smp_mb() -- not a IO/DMA barrier like mb() rename it to match the recent smp_load_acquire() and smp_store_release(). Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- Documentation/memory-barriers.txt | 6 +++--- arch/arm/include/asm/barrier.h | 2 +- arch/arm64/include/asm/barrier.h | 2 +- arch/ia64/include/asm/barrier.h | 7 +------ arch/metag/include/asm/barrier.h | 2 +- arch/mips/include/asm/barrier.h | 2 +- arch/powerpc/include/asm/barrier.h | 2 +- arch/s390/include/asm/barrier.h | 2 +- arch/sh/include/asm/barrier.h | 2 +- arch/sparc/include/asm/barrier_64.h | 2 +- arch/x86/include/asm/barrier.h | 4 ++-- arch/x86/um/asm/barrier.h | 3 ++- fs/select.c | 6 +++--- include/asm-generic/barrier.h | 4 ++-- include/linux/sched.h | 8 ++++---- kernel/futex.c | 2 +- kernel/locking/qspinlock_paravirt.h | 2 +- kernel/sched/wait.c | 4 ++-- 18 files changed, 29 insertions(+), 33 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index f95746189b5d..fe4020e4b468 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1662,7 +1662,7 @@ CPU from reordering them. There are some more advanced barrier functions: - (*) set_mb(var, value) + (*) smp_store_mb(var, value) This assigns the value to the variable and then inserts a full memory barrier after it, depending on the function. It isn't guaranteed to @@ -1975,7 +1975,7 @@ after it has altered the task state: CPU 1 =============================== set_current_state(); - set_mb(); + smp_store_mb(); STORE current->state LOAD event_indicated @@ -2016,7 +2016,7 @@ between the STORE to indicate the event and the STORE to set TASK_RUNNING: CPU 1 CPU 2 =============================== =============================== set_current_state(); STORE event_indicated - set_mb(); wake_up(); + smp_store_mb(); wake_up(); STORE current->state STORE current->state LOAD event_indicated diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 993150aea681..6c2327e1c732 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -81,7 +81,7 @@ do { \ #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index ff7de78d01b8..0fa47c4275cb 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -114,7 +114,7 @@ do { \ #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define nop() asm volatile("nop"); #define smp_mb__before_atomic() smp_mb() diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h index 03117e7b2ab8..843ba435e43b 100644 --- a/arch/ia64/include/asm/barrier.h +++ b/arch/ia64/include/asm/barrier.h @@ -77,12 +77,7 @@ do { \ ___p1; \ }) -/* - * XXX check on this ---I suspect what Linus really wants here is - * acquire vs release semantics but we can't discuss this stuff with - * Linus just yet. Grrr... - */ -#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) /* * The group barrier in front of the rsm & ssm are necessary to ensure diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h index 97eb018a2933..5a696e507930 100644 --- a/arch/metag/include/asm/barrier.h +++ b/arch/metag/include/asm/barrier.h @@ -84,7 +84,7 @@ static inline void fence(void) #define read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0) -#define set_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index cff1bbdaa74a..7ecba84656d4 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -112,7 +112,7 @@ #define __WEAK_LLSC_MB " \n" #endif -#define set_mb(var, value) \ +#define smp_store_mb(var, value) \ do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 2a072e48780d..39505d660a70 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -34,7 +34,7 @@ #define rmb() __asm__ __volatile__ ("sync" : : : "memory") #define wmb() __asm__ __volatile__ ("sync" : : : "memory") -#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #ifdef __SUBARCH_HAS_LWSYNC # define SMPWMB LWSYNC diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index b66cd53d35fc..e6f8615a11eb 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -36,7 +36,7 @@ #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() -#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h index 43715308b068..bf91037db4e0 100644 --- a/arch/sh/include/asm/barrier.h +++ b/arch/sh/include/asm/barrier.h @@ -32,7 +32,7 @@ #define ctrl_barrier() __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop") #endif -#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) +#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) #include diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h index 125fec7512f4..809941e33e12 100644 --- a/arch/sparc/include/asm/barrier_64.h +++ b/arch/sparc/include/asm/barrier_64.h @@ -40,7 +40,7 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ #define dma_rmb() rmb() #define dma_wmb() wmb() -#define set_mb(__var, __value) \ +#define smp_store_mb(__var, __value) \ do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0) #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 9de5cde133a1..e51a8f803f55 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -35,12 +35,12 @@ #define smp_mb() mb() #define smp_rmb() dma_rmb() #define smp_wmb() barrier() -#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) +#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) #else /* !SMP */ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define set_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) #endif /* SMP */ #define read_barrier_depends() do { } while (0) diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index cc0cb01f346d..b9531d343134 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -39,7 +39,8 @@ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() -#define set_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) + +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0) #define read_barrier_depends() do { } while (0) #define smp_read_barrier_depends() do { } while (0) diff --git a/fs/select.c b/fs/select.c index f684c750e08a..015547330e88 100644 --- a/fs/select.c +++ b/fs/select.c @@ -189,7 +189,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) * doesn't imply write barrier and the users expect write * barrier semantics on wakeup functions. The following * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() - * and is paired with set_mb() in poll_schedule_timeout. + * and is paired with smp_store_mb() in poll_schedule_timeout. */ smp_wmb(); pwq->triggered = 1; @@ -244,7 +244,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, /* * Prepare for the next iteration. * - * The following set_mb() serves two purposes. First, it's + * The following smp_store_mb() serves two purposes. First, it's * the counterpart rmb of the wmb in pollwake() such that data * written before wake up is always visible after wake up. * Second, the full barrier guarantees that triggered clearing @@ -252,7 +252,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, * this problem doesn't exist for the first iteration as * add_wait_queue() has full barrier semantics. */ - set_mb(pwq->triggered, 0); + smp_store_mb(pwq->triggered, 0); return rc; } diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 3938716b44d7..e6a83d712ef6 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -66,8 +66,8 @@ #define smp_read_barrier_depends() do { } while (0) #endif -#ifndef set_mb -#define set_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#ifndef smp_store_mb +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) #endif #ifndef smp_mb__before_atomic diff --git a/include/linux/sched.h b/include/linux/sched.h index 26a2e6122734..18f197223ebd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -252,7 +252,7 @@ extern char ___assert_task_state[1 - 2*!!( #define set_task_state(tsk, state_value) \ do { \ (tsk)->task_state_change = _THIS_IP_; \ - set_mb((tsk)->state, (state_value)); \ + smp_store_mb((tsk)->state, (state_value)); \ } while (0) /* @@ -274,7 +274,7 @@ extern char ___assert_task_state[1 - 2*!!( #define set_current_state(state_value) \ do { \ current->task_state_change = _THIS_IP_; \ - set_mb(current->state, (state_value)); \ + smp_store_mb(current->state, (state_value)); \ } while (0) #else @@ -282,7 +282,7 @@ extern char ___assert_task_state[1 - 2*!!( #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) #define set_task_state(tsk, state_value) \ - set_mb((tsk)->state, (state_value)) + smp_store_mb((tsk)->state, (state_value)) /* * set_current_state() includes a barrier so that the write of current->state @@ -298,7 +298,7 @@ extern char ___assert_task_state[1 - 2*!!( #define __set_current_state(state_value) \ do { current->state = (state_value); } while (0) #define set_current_state(state_value) \ - set_mb(current->state, (state_value)) + smp_store_mb(current->state, (state_value)) #endif diff --git a/kernel/futex.c b/kernel/futex.c index 2579e407ff67..55ca63ad9622 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2055,7 +2055,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, { /* * The task state is guaranteed to be set before another task can - * wake it. set_current_state() is implemented using set_mb() and + * wake it. set_current_state() is implemented using smp_store_mb() and * queue_me() calls spin_unlock() upon completion, both serializing * access to the hash list and forcing another memory barrier. */ diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 27ab96dca68c..04ab18151cc8 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -175,7 +175,7 @@ static void pv_wait_node(struct mcs_spinlock *node) * * Matches the xchg() from pv_kick_node(). */ - set_mb(pn->state, vcpu_halted); + smp_store_mb(pn->state, vcpu_halted); if (!READ_ONCE(node->locked)) pv_wait(&pn->state, vcpu_halted); diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 852143a79f36..9bc82329eaad 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -341,7 +341,7 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout) * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss * an event. */ - set_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */ + smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */ return timeout; } @@ -354,7 +354,7 @@ int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) * doesn't imply write barrier and the users expects write * barrier semantics on wakeup functions. The following * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() - * and is paired with set_mb() in wait_woken(). + * and is paired with smp_store_mb() in wait_woken(). */ smp_wmb(); /* C */ wait->flags |= WQ_FLAG_WOKEN; -- cgit v1.2.3 From 2cb7c9cb426660b5ed58b643d9e7dd5d50ba901f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 11 May 2015 17:52:09 +0200 Subject: sched/preempt, mm/kmap: Explicitly disable/enable preemption in kmap_atomic_* The existing code relies on pagefault_disable() implicitly disabling preemption, so that no schedule will happen between kmap_atomic() and kunmap_atomic(). Let's make this explicit, to prepare for pagefault_disable() not touching preemption anymore. Reviewed-and-tested-by: Thomas Gleixner Signed-off-by: David Hildenbrand Signed-off-by: Peter Zijlstra (Intel) Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: airlied@linux.ie Cc: akpm@linux-foundation.org Cc: benh@kernel.crashing.org Cc: bigeasy@linutronix.de Cc: borntraeger@de.ibm.com Cc: daniel.vetter@intel.com Cc: heiko.carstens@de.ibm.com Cc: herbert@gondor.apana.org.au Cc: hocko@suse.cz Cc: hughd@google.com Cc: mst@redhat.com Cc: paulus@samba.org Cc: ralf@linux-mips.org Cc: schwidefsky@de.ibm.com Cc: yang.shi@windriver.com Link: http://lkml.kernel.org/r/1431359540-32227-5-git-send-email-dahi@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/arm/mm/highmem.c | 3 +++ arch/frv/mm/highmem.c | 2 ++ arch/metag/mm/highmem.c | 4 +++- arch/microblaze/mm/highmem.c | 4 +++- arch/mips/mm/highmem.c | 5 ++++- arch/mn10300/include/asm/highmem.h | 3 +++ arch/parisc/include/asm/cacheflush.h | 2 ++ arch/powerpc/mm/highmem.c | 4 +++- arch/sparc/mm/highmem.c | 4 +++- arch/tile/mm/highmem.c | 3 ++- arch/x86/mm/highmem_32.c | 3 ++- arch/x86/mm/iomap_32.c | 2 ++ arch/xtensa/mm/highmem.c | 2 ++ include/linux/highmem.h | 2 ++ include/linux/io-mapping.h | 2 ++ 15 files changed, 38 insertions(+), 7 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index b98895d9fe57..ee8dfa793989 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -59,6 +59,7 @@ void *kmap_atomic(struct page *page) void *kmap; int type; + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -121,6 +122,7 @@ void __kunmap_atomic(void *kvaddr) kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); } pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); @@ -130,6 +132,7 @@ void *kmap_atomic_pfn(unsigned long pfn) int idx, type; struct page *page = pfn_to_page(pfn); + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); diff --git a/arch/frv/mm/highmem.c b/arch/frv/mm/highmem.c index bed9a9bd3c10..785344bbdc07 100644 --- a/arch/frv/mm/highmem.c +++ b/arch/frv/mm/highmem.c @@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page) unsigned long paddr; int type; + preempt_disable(); pagefault_disable(); type = kmap_atomic_idx_push(); paddr = page_to_phys(page); @@ -85,5 +86,6 @@ void __kunmap_atomic(void *kvaddr) } kmap_atomic_idx_pop(); pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/metag/mm/highmem.c b/arch/metag/mm/highmem.c index d71f621a2c0b..807f1b1c4e65 100644 --- a/arch/metag/mm/highmem.c +++ b/arch/metag/mm/highmem.c @@ -43,7 +43,7 @@ void *kmap_atomic(struct page *page) unsigned long vaddr; int type; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -82,6 +82,7 @@ void __kunmap_atomic(void *kvaddr) } pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); @@ -95,6 +96,7 @@ void *kmap_atomic_pfn(unsigned long pfn) unsigned long vaddr; int type; + preempt_disable(); pagefault_disable(); type = kmap_atomic_idx_push(); diff --git a/arch/microblaze/mm/highmem.c b/arch/microblaze/mm/highmem.c index 5a92576fad92..2fcc5a52d84d 100644 --- a/arch/microblaze/mm/highmem.c +++ b/arch/microblaze/mm/highmem.c @@ -37,7 +37,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) unsigned long vaddr; int idx, type; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -63,6 +63,7 @@ void __kunmap_atomic(void *kvaddr) if (vaddr < __fix_to_virt(FIX_KMAP_END)) { pagefault_enable(); + preempt_enable(); return; } @@ -84,5 +85,6 @@ void __kunmap_atomic(void *kvaddr) #endif kmap_atomic_idx_pop(); pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c index da815d295239..11661cbc11a8 100644 --- a/arch/mips/mm/highmem.c +++ b/arch/mips/mm/highmem.c @@ -47,7 +47,7 @@ void *kmap_atomic(struct page *page) unsigned long vaddr; int idx, type; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -72,6 +72,7 @@ void __kunmap_atomic(void *kvaddr) if (vaddr < FIXADDR_START) { // FIXME pagefault_enable(); + preempt_enable(); return; } @@ -92,6 +93,7 @@ void __kunmap_atomic(void *kvaddr) #endif kmap_atomic_idx_pop(); pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); @@ -104,6 +106,7 @@ void *kmap_atomic_pfn(unsigned long pfn) unsigned long vaddr; int idx, type; + preempt_disable(); pagefault_disable(); type = kmap_atomic_idx_push(); diff --git a/arch/mn10300/include/asm/highmem.h b/arch/mn10300/include/asm/highmem.h index 2fbbe4d920aa..1ddea5afba09 100644 --- a/arch/mn10300/include/asm/highmem.h +++ b/arch/mn10300/include/asm/highmem.h @@ -75,6 +75,7 @@ static inline void *kmap_atomic(struct page *page) unsigned long vaddr; int idx, type; + preempt_disable(); pagefault_disable(); if (page < highmem_start_page) return page_address(page); @@ -98,6 +99,7 @@ static inline void __kunmap_atomic(unsigned long vaddr) if (vaddr < FIXADDR_START) { /* FIXME */ pagefault_enable(); + preempt_enable(); return; } @@ -122,6 +124,7 @@ static inline void __kunmap_atomic(unsigned long vaddr) kmap_atomic_idx_pop(); pagefault_enable(); + preempt_enable(); } #endif /* __KERNEL__ */ diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index de65f66ea64e..ec2df4bab302 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -142,6 +142,7 @@ static inline void kunmap(struct page *page) static inline void *kmap_atomic(struct page *page) { + preempt_disable(); pagefault_disable(); return page_address(page); } @@ -150,6 +151,7 @@ static inline void __kunmap_atomic(void *addr) { flush_kernel_dcache_page_addr(addr); pagefault_enable(); + preempt_enable(); } #define kmap_atomic_prot(page, prot) kmap_atomic(page) diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c index e7450bdbe83a..e292c8a60952 100644 --- a/arch/powerpc/mm/highmem.c +++ b/arch/powerpc/mm/highmem.c @@ -34,7 +34,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) unsigned long vaddr; int idx, type; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -59,6 +59,7 @@ void __kunmap_atomic(void *kvaddr) if (vaddr < __fix_to_virt(FIX_KMAP_END)) { pagefault_enable(); + preempt_enable(); return; } @@ -82,5 +83,6 @@ void __kunmap_atomic(void *kvaddr) kmap_atomic_idx_pop(); pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c index 449f864f0cef..a454ec5ff07a 100644 --- a/arch/sparc/mm/highmem.c +++ b/arch/sparc/mm/highmem.c @@ -53,7 +53,7 @@ void *kmap_atomic(struct page *page) unsigned long vaddr; long idx, type; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -91,6 +91,7 @@ void __kunmap_atomic(void *kvaddr) if (vaddr < FIXADDR_START) { // FIXME pagefault_enable(); + preempt_enable(); return; } @@ -126,5 +127,6 @@ void __kunmap_atomic(void *kvaddr) kmap_atomic_idx_pop(); pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c index 6aa2f2625447..fcd545014e79 100644 --- a/arch/tile/mm/highmem.c +++ b/arch/tile/mm/highmem.c @@ -201,7 +201,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) int idx, type; pte_t *pte; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); /* Avoid icache flushes by disallowing atomic executable mappings. */ @@ -259,6 +259,7 @@ void __kunmap_atomic(void *kvaddr) } pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 4500142bc4aa..eecb207a2037 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -35,7 +35,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) unsigned long vaddr; int idx, type; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) @@ -100,6 +100,7 @@ void __kunmap_atomic(void *kvaddr) #endif pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 9ca35fc60cfe..2b7ece0e103a 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -59,6 +59,7 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) unsigned long vaddr; int idx, type; + preempt_disable(); pagefault_disable(); type = kmap_atomic_idx_push(); @@ -117,5 +118,6 @@ iounmap_atomic(void __iomem *kvaddr) } pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL_GPL(iounmap_atomic); diff --git a/arch/xtensa/mm/highmem.c b/arch/xtensa/mm/highmem.c index 8cfb71ec0937..184ceadccc1a 100644 --- a/arch/xtensa/mm/highmem.c +++ b/arch/xtensa/mm/highmem.c @@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page) enum fixed_addresses idx; unsigned long vaddr; + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -79,6 +80,7 @@ void __kunmap_atomic(void *kvaddr) } pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 9286a46b7d69..6aefcd0031a6 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -65,6 +65,7 @@ static inline void kunmap(struct page *page) static inline void *kmap_atomic(struct page *page) { + preempt_disable(); pagefault_disable(); return page_address(page); } @@ -73,6 +74,7 @@ static inline void *kmap_atomic(struct page *page) static inline void __kunmap_atomic(void *addr) { pagefault_enable(); + preempt_enable(); } #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 657fab4efab3..c27dde7215b5 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -141,6 +141,7 @@ static inline void __iomem * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) { + preempt_disable(); pagefault_disable(); return ((char __force __iomem *) mapping) + offset; } @@ -149,6 +150,7 @@ static inline void io_mapping_unmap_atomic(void __iomem *vaddr) { pagefault_enable(); + preempt_enable(); } /* Non-atomic map/unmap */ -- cgit v1.2.3 From 70ffdb9393a7264a069265edded729078dcf0425 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 11 May 2015 17:52:11 +0200 Subject: mm/fault, arch: Use pagefault_disable() to check for disabled pagefaults in the handler Introduce faulthandler_disabled() and use it to check for irq context and disabled pagefaults (via pagefault_disable()) in the pagefault handlers. Please note that we keep the in_atomic() checks in place - to detect whether in irq context (in which case preemption is always properly disabled). In contrast, preempt_disable() should never be used to disable pagefaults. With !CONFIG_PREEMPT_COUNT, preempt_disable() doesn't modify the preempt counter, and therefore the result of in_atomic() differs. We validate that condition by using might_fault() checks when calling might_sleep(). Therefore, add a comment to faulthandler_disabled(), describing why this is needed. faulthandler_disabled() and pagefault_disable() are defined in linux/uaccess.h, so let's properly add that include to all relevant files. This patch is based on a patch from Thomas Gleixner. Reviewed-and-tested-by: Thomas Gleixner Signed-off-by: David Hildenbrand Signed-off-by: Peter Zijlstra (Intel) Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: airlied@linux.ie Cc: akpm@linux-foundation.org Cc: benh@kernel.crashing.org Cc: bigeasy@linutronix.de Cc: borntraeger@de.ibm.com Cc: daniel.vetter@intel.com Cc: heiko.carstens@de.ibm.com Cc: herbert@gondor.apana.org.au Cc: hocko@suse.cz Cc: hughd@google.com Cc: mst@redhat.com Cc: paulus@samba.org Cc: ralf@linux-mips.org Cc: schwidefsky@de.ibm.com Cc: yang.shi@windriver.com Link: http://lkml.kernel.org/r/1431359540-32227-7-git-send-email-dahi@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/alpha/mm/fault.c | 5 ++--- arch/arc/mm/fault.c | 2 +- arch/arm/mm/fault.c | 2 +- arch/arm64/mm/fault.c | 2 +- arch/avr32/mm/fault.c | 4 ++-- arch/cris/mm/fault.c | 6 +++--- arch/frv/mm/fault.c | 4 ++-- arch/ia64/mm/fault.c | 4 ++-- arch/m32r/mm/fault.c | 8 ++++---- arch/m68k/mm/fault.c | 4 ++-- arch/metag/mm/fault.c | 2 +- arch/microblaze/mm/fault.c | 8 ++++---- arch/mips/mm/fault.c | 4 ++-- arch/mn10300/mm/fault.c | 4 ++-- arch/nios2/mm/fault.c | 2 +- arch/parisc/kernel/traps.c | 4 ++-- arch/parisc/mm/fault.c | 4 ++-- arch/powerpc/mm/fault.c | 9 +++++---- arch/s390/mm/fault.c | 2 +- arch/score/mm/fault.c | 3 ++- arch/sh/mm/fault.c | 5 +++-- arch/sparc/mm/fault_32.c | 4 ++-- arch/sparc/mm/fault_64.c | 4 ++-- arch/sparc/mm/init_64.c | 2 +- arch/tile/mm/fault.c | 4 ++-- arch/um/kernel/trap.c | 4 ++-- arch/unicore32/mm/fault.c | 2 +- arch/x86/mm/fault.c | 5 +++-- arch/xtensa/mm/fault.c | 4 ++-- include/linux/uaccess.h | 12 ++++++++++++ 30 files changed, 72 insertions(+), 57 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 9d0ac091a52a..4a905bd667e2 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -23,8 +23,7 @@ #include #include #include - -#include +#include extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *); @@ -107,7 +106,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, /* If we're in an interrupt context, or have no user context, we must not take the fault. */ - if (!mm || in_atomic()) + if (!mm || faulthandler_disabled()) goto no_context; #ifdef CONFIG_ALPHA_LARGE_VMALLOC diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 6a2e006cbcce..d948e4e9d89c 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -86,7 +86,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 6333d9c17875..0d629b8f973f 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -276,7 +276,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 96da13167d4a..0948d327d013 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -211,7 +211,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, * If we're in an interrupt or have no user context, we must not take * the fault. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c index d223a8b57c1e..c03533937a9f 100644 --- a/arch/avr32/mm/fault.c +++ b/arch/avr32/mm/fault.c @@ -14,11 +14,11 @@ #include #include #include +#include #include #include #include -#include #ifdef CONFIG_KPROBES static inline int notify_page_fault(struct pt_regs *regs, int trap) @@ -81,7 +81,7 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs) * If we're in an interrupt or have no user context, we must * not take the fault... */ - if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM)) + if (faulthandler_disabled() || !mm || regs->sr & SYSREG_BIT(GM)) goto no_context; local_irq_enable(); diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index 83f12f2ed9e3..3066d40a6db1 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include extern int find_fixup_code(struct pt_regs *); @@ -109,11 +109,11 @@ do_page_fault(unsigned long address, struct pt_regs *regs, info.si_code = SEGV_MAPERR; /* - * If we're in an interrupt or "atomic" operation or have no + * If we're in an interrupt, have pagefaults disabled or have no * user context, we must not take the fault. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c index ec4917ddf678..61d99767fe16 100644 --- a/arch/frv/mm/fault.c +++ b/arch/frv/mm/fault.c @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include #include /*****************************************************************************/ @@ -78,7 +78,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(__frame)) diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index ba5ba7accd0d..70b40d1205a6 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -11,10 +11,10 @@ #include #include #include +#include #include #include -#include extern int die(char *, struct pt_regs *, long); @@ -96,7 +96,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re /* * If we're in an interrupt or have no user context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; #ifdef CONFIG_VIRTUAL_MEM_MAP diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c index e3d4d4890104..8f9875b7933d 100644 --- a/arch/m32r/mm/fault.c +++ b/arch/m32r/mm/fault.c @@ -24,9 +24,9 @@ #include /* For unblank_screen() */ #include #include +#include #include -#include #include #include #include @@ -111,10 +111,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, mm = tsk->mm; /* - * If we're in an interrupt or have no user context or are running in an - * atomic region then we must not take the fault.. + * If we're in an interrupt or have no user context or have pagefaults + * disabled then we must not take the fault. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto bad_area_nosemaphore; if (error_code & ACE_USERMODE) diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index b2f04aee46ec..6a94cdd0c830 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -10,10 +10,10 @@ #include #include #include +#include #include #include -#include #include extern void die_if_kernel(char *, struct pt_regs *, long); @@ -81,7 +81,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c index 2de5dc695a87..f57edca63609 100644 --- a/arch/metag/mm/fault.c +++ b/arch/metag/mm/fault.c @@ -105,7 +105,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, mm = tsk->mm; - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index d46a5ebb7570..177dfc003643 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -107,14 +107,14 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11) is_write = 0; - if (unlikely(in_atomic() || !mm)) { + if (unlikely(faulthandler_disabled() || !mm)) { if (kernel_mode(regs)) goto bad_area_nosemaphore; - /* in_atomic() in user mode is really bad, + /* faulthandler_disabled() in user mode is really bad, as is current->mm == NULL. */ - pr_emerg("Page fault in user mode with in_atomic(), mm = %p\n", - mm); + pr_emerg("Page fault in user mode with faulthandler_disabled(), mm = %p\n", + mm); pr_emerg("r15 = %lx MSR = %lx\n", regs->r15, regs->msr); die("Weird page fault", regs, SIGSEGV); diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 7ff8637e530d..36c0f26fac6b 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -21,10 +21,10 @@ #include #include #include +#include #include #include -#include #include #include /* For VMALLOC_END */ #include @@ -94,7 +94,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto bad_area_nosemaphore; if (user_mode(regs)) diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c index 0c2cc5d39c8e..4a1d181ed32f 100644 --- a/arch/mn10300/mm/fault.c +++ b/arch/mn10300/mm/fault.c @@ -23,8 +23,8 @@ #include #include #include /* For unblank_screen() */ +#include -#include #include #include #include @@ -168,7 +168,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR) diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c index 0c9b6afe69e9..b51878b0c6b8 100644 --- a/arch/nios2/mm/fault.c +++ b/arch/nios2/mm/fault.c @@ -77,7 +77,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto bad_area_nosemaphore; if (user_mode(regs)) diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 47ee620d15d2..6548fd1d2e62 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -26,9 +26,9 @@ #include #include #include +#include #include -#include #include #include #include @@ -800,7 +800,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) * unless pagefault_disable() was called before. */ - if (fault_space == 0 && !in_atomic()) + if (fault_space == 0 && !faulthandler_disabled()) { pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC); parisc_terminate("Kernel Fault", regs, code, fault_address); diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index e5120e653240..15503adddf4f 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -15,8 +15,8 @@ #include #include #include +#include -#include #include /* Various important other fields */ @@ -207,7 +207,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, int fault; unsigned int flags; - if (in_atomic()) + if (pagefault_disabled()) goto no_context; tsk = current; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index b396868d2aa7..6d535973b200 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -33,13 +33,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include #include @@ -272,15 +272,16 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, if (!arch_irq_disabled_regs(regs)) local_irq_enable(); - if (in_atomic() || mm == NULL) { + if (faulthandler_disabled() || mm == NULL) { if (!user_mode(regs)) { rc = SIGSEGV; goto bail; } - /* in_atomic() in user mode is really bad, + /* faulthandler_disabled() in user mode is really bad, as is current->mm == NULL. */ printk(KERN_EMERG "Page fault in user mode with " - "in_atomic() = %d mm = %p\n", in_atomic(), mm); + "faulthandler_disabled() = %d mm = %p\n", + faulthandler_disabled(), mm); printk(KERN_EMERG "NIP = %lx MSR = %lx\n", regs->nip, regs->msr); die("Weird page fault", regs, SIGSEGV); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 76515bcea2f1..4c8f5d7f9c23 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -399,7 +399,7 @@ static inline int do_exception(struct pt_regs *regs, int access) * user context. */ fault = VM_FAULT_BADCONTEXT; - if (unlikely(!user_space_fault(regs) || in_atomic() || !mm)) + if (unlikely(!user_space_fault(regs) || faulthandler_disabled() || !mm)) goto out; address = trans_exc_code & __FAIL_ADDR_MASK; diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c index 6860beb2a280..37a6c2e0e969 100644 --- a/arch/score/mm/fault.c +++ b/arch/score/mm/fault.c @@ -34,6 +34,7 @@ #include #include #include +#include /* * This routine handles page faults. It determines the address, @@ -73,7 +74,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (pagefault_disabled() || !mm) goto bad_area_nosemaphore; if (user_mode(regs)) diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index a58fec9b55e0..79d8276377d1 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -438,9 +439,9 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, /* * If we're in an interrupt, have no user context or are running - * in an atomic region then we must not take the fault: + * with pagefaults disabled then we must not take the fault: */ - if (unlikely(in_atomic() || !mm)) { + if (unlikely(faulthandler_disabled() || !mm)) { bad_area_nosemaphore(regs, error_code, address); return; } diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 70d817154fe8..c399e7b3b035 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -29,7 +30,6 @@ #include #include #include -#include #include "mm_32.h" @@ -196,7 +196,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (pagefault_disabled() || !mm) goto no_context; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 479823249429..e9268ea1a68d 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -22,12 +22,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include @@ -330,7 +330,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto intr_or_no_mm; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 4ca0d6ba5ec8..cee9b77ddd05 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2706,7 +2706,7 @@ void hugetlb_setup(struct pt_regs *regs) struct mm_struct *mm = current->mm; struct tsb_config *tp; - if (in_atomic() || !mm) { + if (faulthandler_disabled() || !mm) { const struct exception_table_entry *entry; entry = search_exception_tables(regs->tpc); diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index e83cc999da02..3f4f58d34a92 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -354,9 +354,9 @@ static int handle_page_fault(struct pt_regs *regs, /* * If we're in an interrupt, have no user context or are running in an - * atomic region then we must not take the fault. + * region with pagefaults disabled then we must not take the fault. */ - if (in_atomic() || !mm) { + if (pagefault_disabled() || !mm) { vma = NULL; /* happy compiler */ goto bad_area_nosemaphore; } diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 8e4daf44e980..f9c9e5a6beba 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -35,10 +35,10 @@ int handle_page_fault(unsigned long address, unsigned long ip, *code_out = SEGV_MAPERR; /* - * If the fault was during atomic operation, don't take the fault, just + * If the fault was with pagefaults disabled, don't take the fault, just * fail. */ - if (in_atomic()) + if (faulthandler_disabled()) goto out_nosemaphore; if (is_user) diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index 0dc922dba915..afccef5529cc 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c @@ -218,7 +218,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 181c53bac3a7..9dc909841739 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -13,6 +13,7 @@ #include /* hstate_index_to_shift */ #include /* prefetchw */ #include /* exception_enter(), ... */ +#include /* faulthandler_disabled() */ #include /* dotraplinkage, ... */ #include /* pgd_*(), ... */ @@ -1126,9 +1127,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, /* * If we're in an interrupt, have no user context or are running - * in an atomic region then we must not take the fault: + * in a region with pagefaults disabled then we must not take the fault */ - if (unlikely(in_atomic() || !mm)) { + if (unlikely(faulthandler_disabled() || !mm)) { bad_area_nosemaphore(regs, error_code, address); return; } diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 9e3571a6535c..83a44a33cfa1 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -15,10 +15,10 @@ #include #include #include +#include #include #include #include -#include #include DEFINE_PER_CPU(unsigned long, asid_cache) = ASID_USER_FIRST; @@ -57,7 +57,7 @@ void do_page_fault(struct pt_regs *regs) /* If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) { + if (faulthandler_disabled() || !mm) { bad_page_fault(regs, address, SIGSEGV); return; } diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 23290cc93a24..90786d2d74e5 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -59,6 +59,18 @@ static inline void pagefault_enable(void) */ #define pagefault_disabled() (current->pagefault_disabled != 0) +/* + * The pagefault handler is in general disabled by pagefault_disable() or + * when in irq context (via in_atomic()). + * + * This function should only be used by the fault handlers. Other users should + * stick to pagefault_disabled(). + * Please NEVER use preempt_disable() to disable the fault handler. With + * !CONFIG_PREEMPT_COUNT, this is like a NOP. So the handler won't be disabled. + * in_atomic() will report different values based on !CONFIG_PREEMPT_COUNT. + */ +#define faulthandler_disabled() (pagefault_disabled() || in_atomic()) + #ifndef ARCH_HAS_NOCACHE_UACCESS static inline unsigned long __copy_from_user_inatomic_nocache(void *to, -- cgit v1.2.3 From 5f76eea88dcbe75506d98e0207b9e3bd47941f2d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 11 May 2015 17:52:18 +0200 Subject: sched/preempt, powerpc: Disable preemption in enable_kernel_altivec() explicitly enable_kernel_altivec() has to be called with disabled preemption. Let's make this explicit, to prepare for pagefault_disable() not touching preemption anymore. Reviewed-and-tested-by: Thomas Gleixner Signed-off-by: David Hildenbrand Signed-off-by: Peter Zijlstra (Intel) Acked-by: Benjamin Herrenschmidt Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: airlied@linux.ie Cc: akpm@linux-foundation.org Cc: bigeasy@linutronix.de Cc: borntraeger@de.ibm.com Cc: daniel.vetter@intel.com Cc: heiko.carstens@de.ibm.com Cc: herbert@gondor.apana.org.au Cc: hocko@suse.cz Cc: hughd@google.com Cc: mst@redhat.com Cc: paulus@samba.org Cc: ralf@linux-mips.org Cc: schwidefsky@de.ibm.com Cc: yang.shi@windriver.com Link: http://lkml.kernel.org/r/1431359540-32227-14-git-send-email-dahi@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/powerpc/lib/vmx-helper.c | 11 ++++++----- drivers/crypto/vmx/aes.c | 8 +++++++- drivers/crypto/vmx/aes_cbc.c | 6 ++++++ drivers/crypto/vmx/ghash.c | 8 ++++++++ 4 files changed, 27 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index 3cf529ceec5b..ac93a3bd2730 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c @@ -27,11 +27,11 @@ int enter_vmx_usercopy(void) if (in_interrupt()) return 0; - /* This acts as preempt_disable() as well and will make - * enable_kernel_altivec(). We need to disable page faults - * as they can call schedule and thus make us lose the VMX - * context. So on page faults, we just fail which will cause - * a fallback to the normal non-vmx copy. + preempt_disable(); + /* + * We need to disable page faults as they can call schedule and + * thus make us lose the VMX context. So on page faults, we just + * fail which will cause a fallback to the normal non-vmx copy. */ pagefault_disable(); @@ -47,6 +47,7 @@ int enter_vmx_usercopy(void) int exit_vmx_usercopy(void) { pagefault_enable(); + preempt_enable(); return 0; } diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c index ab300ea19434..a9064e36e7b5 100644 --- a/drivers/crypto/vmx/aes.c +++ b/drivers/crypto/vmx/aes.c @@ -78,12 +78,14 @@ static int p8_aes_setkey(struct crypto_tfm *tfm, const u8 *key, int ret; struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); + preempt_disable(); pagefault_disable(); enable_kernel_altivec(); ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); pagefault_enable(); - + preempt_enable(); + ret += crypto_cipher_setkey(ctx->fallback, key, keylen); return ret; } @@ -95,10 +97,12 @@ static void p8_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) if (in_interrupt()) { crypto_cipher_encrypt_one(ctx->fallback, dst, src); } else { + preempt_disable(); pagefault_disable(); enable_kernel_altivec(); aes_p8_encrypt(src, dst, &ctx->enc_key); pagefault_enable(); + preempt_enable(); } } @@ -109,10 +113,12 @@ static void p8_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) if (in_interrupt()) { crypto_cipher_decrypt_one(ctx->fallback, dst, src); } else { + preempt_disable(); pagefault_disable(); enable_kernel_altivec(); aes_p8_decrypt(src, dst, &ctx->dec_key); pagefault_enable(); + preempt_enable(); } } diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c index 1a559b7dddb5..477284abdd11 100644 --- a/drivers/crypto/vmx/aes_cbc.c +++ b/drivers/crypto/vmx/aes_cbc.c @@ -79,11 +79,13 @@ static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key, int ret; struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm); + preempt_disable(); pagefault_disable(); enable_kernel_altivec(); ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); pagefault_enable(); + preempt_enable(); ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); return ret; @@ -106,6 +108,7 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc, if (in_interrupt()) { ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes); } else { + preempt_disable(); pagefault_disable(); enable_kernel_altivec(); @@ -119,6 +122,7 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc, } pagefault_enable(); + preempt_enable(); } return ret; @@ -141,6 +145,7 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc, if (in_interrupt()) { ret = crypto_blkcipher_decrypt(&fallback_desc, dst, src, nbytes); } else { + preempt_disable(); pagefault_disable(); enable_kernel_altivec(); @@ -154,6 +159,7 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc, } pagefault_enable(); + preempt_enable(); } return ret; diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c index d0ffe277af5c..f255ec4a04d4 100644 --- a/drivers/crypto/vmx/ghash.c +++ b/drivers/crypto/vmx/ghash.c @@ -114,11 +114,13 @@ static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key, if (keylen != GHASH_KEY_LEN) return -EINVAL; + preempt_disable(); pagefault_disable(); enable_kernel_altivec(); enable_kernel_fp(); gcm_init_p8(ctx->htable, (const u64 *) key); pagefault_enable(); + preempt_enable(); return crypto_shash_setkey(ctx->fallback, key, keylen); } @@ -140,23 +142,27 @@ static int p8_ghash_update(struct shash_desc *desc, } memcpy(dctx->buffer + dctx->bytes, src, GHASH_DIGEST_SIZE - dctx->bytes); + preempt_disable(); pagefault_disable(); enable_kernel_altivec(); enable_kernel_fp(); gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, GHASH_DIGEST_SIZE); pagefault_enable(); + preempt_enable(); src += GHASH_DIGEST_SIZE - dctx->bytes; srclen -= GHASH_DIGEST_SIZE - dctx->bytes; dctx->bytes = 0; } len = srclen & ~(GHASH_DIGEST_SIZE - 1); if (len) { + preempt_disable(); pagefault_disable(); enable_kernel_altivec(); enable_kernel_fp(); gcm_ghash_p8(dctx->shash, ctx->htable, src, len); pagefault_enable(); + preempt_enable(); src += len; srclen -= len; } @@ -180,12 +186,14 @@ static int p8_ghash_final(struct shash_desc *desc, u8 *out) if (dctx->bytes) { for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++) dctx->buffer[i] = 0; + preempt_disable(); pagefault_disable(); enable_kernel_altivec(); enable_kernel_fp(); gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, GHASH_DIGEST_SIZE); pagefault_enable(); + preempt_enable(); dctx->bytes = 0; } memcpy(out, dctx->shash, GHASH_DIGEST_SIZE); -- cgit v1.2.3 From c546d5db75b452737e554bb9c33dedb46847c4fd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 May 2015 08:36:29 +0200 Subject: remove scatterlist.h generation from arch Kbuild files Signed-off-by: Christoph Hellwig Reported-by: Geert Uytterhoeven Signed-off-by: Jens Axboe --- arch/alpha/include/asm/Kbuild | 1 - arch/arc/include/asm/Kbuild | 1 - arch/arm/include/asm/Kbuild | 1 - arch/arm64/include/asm/Kbuild | 1 - arch/avr32/include/asm/Kbuild | 1 - arch/blackfin/include/asm/Kbuild | 1 - arch/c6x/include/asm/Kbuild | 1 - arch/cris/include/asm/Kbuild | 1 - arch/frv/include/asm/Kbuild | 1 - arch/hexagon/include/asm/Kbuild | 1 - arch/ia64/include/asm/Kbuild | 1 - arch/m32r/include/asm/Kbuild | 1 - arch/m68k/include/asm/Kbuild | 1 - arch/metag/include/asm/Kbuild | 1 - arch/microblaze/include/asm/Kbuild | 1 - arch/mips/include/asm/Kbuild | 1 - arch/mn10300/include/asm/Kbuild | 1 - arch/nios2/include/asm/Kbuild | 1 - arch/openrisc/include/asm/Kbuild | 1 - arch/parisc/include/asm/Kbuild | 1 - arch/powerpc/include/asm/Kbuild | 1 - arch/s390/include/asm/Kbuild | 1 - arch/score/include/asm/Kbuild | 1 - arch/sh/include/asm/Kbuild | 1 - arch/sparc/include/asm/Kbuild | 1 - arch/tile/include/asm/Kbuild | 1 - arch/um/include/asm/Kbuild | 1 - arch/unicore32/include/asm/Kbuild | 1 - arch/x86/include/asm/Kbuild | 1 - arch/xtensa/include/asm/Kbuild | 1 - 30 files changed, 30 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 76aeb8fa551a..cde23cd03609 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -6,6 +6,5 @@ generic-y += exec.h generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h -generic-y += scatterlist.h generic-y += sections.h generic-y += trace_clock.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index be0c39e76f7c..769b312c1abb 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -33,7 +33,6 @@ generic-y += poll.h generic-y += posix_types.h generic-y += preempt.h generic-y += resource.h -generic-y += scatterlist.h generic-y += sembuf.h generic-y += shmbuf.h generic-y += siginfo.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 3c4596d0ce6c..83c50193626c 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -20,7 +20,6 @@ generic-y += poll.h generic-y += preempt.h generic-y += resource.h generic-y += rwsem.h -generic-y += scatterlist.h generic-y += seccomp.h generic-y += sections.h generic-y += segment.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 55103e50c51b..b112a39834d0 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -35,7 +35,6 @@ generic-y += poll.h generic-y += preempt.h generic-y += resource.h generic-y += rwsem.h -generic-y += scatterlist.h generic-y += sections.h generic-y += segment.h generic-y += sembuf.h diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index 528d70d47a54..1d66afdfac07 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -15,7 +15,6 @@ generic-y += mcs_spinlock.h generic-y += param.h generic-y += percpu.h generic-y += preempt.h -generic-y += scatterlist.h generic-y += sections.h generic-y += topology.h generic-y += trace_clock.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 4bd3c3cfc9ab..07051a63415d 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -29,7 +29,6 @@ generic-y += percpu.h generic-y += pgalloc.h generic-y += preempt.h generic-y += resource.h -generic-y += scatterlist.h generic-y += sembuf.h generic-y += serial.h generic-y += setup.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index ae0a51f5376c..7aeb32272975 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -38,7 +38,6 @@ generic-y += poll.h generic-y += posix_types.h generic-y += preempt.h generic-y += resource.h -generic-y += scatterlist.h generic-y += segment.h generic-y += sembuf.h generic-y += serial.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 057e51859b0a..d294f6aaff1d 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -21,7 +21,6 @@ generic-y += mcs_spinlock.h generic-y += module.h generic-y += percpu.h generic-y += preempt.h -generic-y += scatterlist.h generic-y += sections.h generic-y += topology.h generic-y += trace_clock.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index e3f81b53578e..30edce31e5c2 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -5,5 +5,4 @@ generic-y += exec.h generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h -generic-y += scatterlist.h generic-y += trace_clock.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index c7a99f860b40..5ade4a163558 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -37,7 +37,6 @@ generic-y += posix_types.h generic-y += preempt.h generic-y += resource.h generic-y += rwsem.h -generic-y += scatterlist.h generic-y += sections.h generic-y += segment.h generic-y += sembuf.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index 9b41b4bcc073..ccff13d33fa2 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -5,6 +5,5 @@ generic-y += irq_work.h generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += preempt.h -generic-y += scatterlist.h generic-y += trace_clock.h generic-y += vtime.h diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index 2edc793372fc..ba1cdc018731 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -6,6 +6,5 @@ generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += module.h generic-y += preempt.h -generic-y += scatterlist.h generic-y += sections.h generic-y += trace_clock.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 1517ed1c6471..1555bc189c7d 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -23,7 +23,6 @@ generic-y += mutex.h generic-y += percpu.h generic-y += preempt.h generic-y += resource.h -generic-y += scatterlist.h generic-y += sections.h generic-y += shmparam.h generic-y += siginfo.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index 0bf5d525b945..199320f3c345 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -33,7 +33,6 @@ generic-y += percpu.h generic-y += poll.h generic-y += posix_types.h generic-y += preempt.h -generic-y += scatterlist.h generic-y += sections.h generic-y += sembuf.h generic-y += serial.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index ab564a6db5c3..9989ddb169ca 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -7,6 +7,5 @@ generic-y += exec.h generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h -generic-y += scatterlist.h generic-y += syscalls.h generic-y += trace_clock.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 526539cbc99f..7fe5c61a3cb8 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -11,7 +11,6 @@ generic-y += mutex.h generic-y += parport.h generic-y += percpu.h generic-y += preempt.h -generic-y += scatterlist.h generic-y += sections.h generic-y += segment.h generic-y += serial.h diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index f892d9de47d9..de30b0c88796 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -6,6 +6,5 @@ generic-y += exec.h generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h -generic-y += scatterlist.h generic-y += sections.h generic-y += trace_clock.h diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index 24b3d8999ac7..434639d510b3 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -40,7 +40,6 @@ generic-y += poll.h generic-y += posix_types.h generic-y += preempt.h generic-y += resource.h -generic-y += scatterlist.h generic-y += sections.h generic-y += segment.h generic-y += sembuf.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 91f1f360a7c4..2a2e39b8109a 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -45,7 +45,6 @@ generic-y += poll.h generic-y += posix_types.h generic-y += preempt.h generic-y += resource.h -generic-y += scatterlist.h generic-y += sections.h generic-y += segment.h generic-y += sembuf.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index 7a4bcc36303d..12b341d04f88 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -20,7 +20,6 @@ generic-y += param.h generic-y += percpu.h generic-y += poll.h generic-y += preempt.h -generic-y += scatterlist.h generic-y += seccomp.h generic-y += segment.h generic-y += topology.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 4b87205c230c..050712e1ce41 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -6,6 +6,5 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += rwsem.h -generic-y += scatterlist.h generic-y += trace_clock.h generic-y += vtime.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index c631f98fd524..dc5385ebb071 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -4,5 +4,4 @@ generic-y += clkdev.h generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h -generic-y += scatterlist.h generic-y += trace_clock.h diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index 83ed116d414c..138fb3db45ba 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -8,7 +8,6 @@ generic-y += cputime.h generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h -generic-y += scatterlist.h generic-y += sections.h generic-y += trace_clock.h generic-y += xor.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 654ebb6bd5d8..9ac4626e7284 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -24,7 +24,6 @@ generic-y += percpu.h generic-y += poll.h generic-y += preempt.h generic-y += resource.h -generic-y += scatterlist.h generic-y += sembuf.h generic-y += serial.h generic-y += shmbuf.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 94f36e7086a7..2b2a69dcc467 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -15,7 +15,6 @@ generic-y += mcs_spinlock.h generic-y += module.h generic-y += mutex.h generic-y += preempt.h -generic-y += scatterlist.h generic-y += serial.h generic-y += trace_clock.h generic-y += types.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index f5433e0e34e0..d53654488c2c 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -27,7 +27,6 @@ generic-y += poll.h generic-y += posix_types.h generic-y += preempt.h generic-y += resource.h -generic-y += scatterlist.h generic-y += sembuf.h generic-y += serial.h generic-y += shmbuf.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 9176fa11d49b..b7df3ae9be51 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -21,7 +21,6 @@ generic-y += param.h generic-y += pci.h generic-y += percpu.h generic-y += preempt.h -generic-y += scatterlist.h generic-y += sections.h generic-y += switch_to.h generic-y += topology.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 3e0c19d0f4c5..d12b377b5a8b 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -36,7 +36,6 @@ generic-y += poll.h generic-y += posix_types.h generic-y += preempt.h generic-y += resource.h -generic-y += scatterlist.h generic-y += sections.h generic-y += segment.h generic-y += sembuf.h diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index d55a210a49bf..4dd1f2d770af 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -9,4 +9,3 @@ generic-y += cputime.h generic-y += dma-contiguous.h generic-y += early_ioremap.h generic-y += mcs_spinlock.h -generic-y += scatterlist.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 86a9ab2e2ca9..14d15bf1a95b 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -22,7 +22,6 @@ generic-y += mcs_spinlock.h generic-y += percpu.h generic-y += preempt.h generic-y += resource.h -generic-y += scatterlist.h generic-y += sections.h generic-y += siginfo.h generic-y += statfs.h -- cgit v1.2.3 From 7978f76c4495c194183e03f65406f78cdda37882 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Tue, 5 May 2015 17:30:21 +0200 Subject: powerpc: Enable sys_kcmp() for CRIU The commit 8170a83f15ee ("powerpc: Wireup the kcmp syscall to sys_ni") has disabled the kcmp syscall for powerpc. This has been done due to the use of unsigned long parameters which may require a dedicated wrapper to handle 32bit process on top of 64bit kernel. However in the kcmp() case, the 2 unsigned long parameters are currently only used to carry file descriptors from user space to the kernel. Since such a parameter is passed through register, and file descriptor doesn't need to get extended, there is, today, no need for a wrapper. In the case there will be a need to pass address in or out of this system call, then a wrapper could be required, it will then be to care of it. As today this is not the case, it is safe to enable kcmp() on powerpc. Tested (by Laurent) on 64-bit, 32-bit, and 32-bit userspace on 64-bit kernel using tools/testing/selftests/kcmp [mpe]. Signed-off-by: Laurent Dufour Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/systbl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index f1863a138b4a..71f2b3f02cf8 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -358,7 +358,7 @@ SYSCALL_SPU(setns) COMPAT_SYS(process_vm_readv) COMPAT_SYS(process_vm_writev) SYSCALL(finit_module) -SYSCALL(ni_syscall) /* sys_kcmp */ +SYSCALL(kcmp) /* sys_kcmp */ SYSCALL_SPU(sched_setattr) SYSCALL_SPU(sched_getattr) SYSCALL_SPU(renameat2) -- cgit v1.2.3 From ecc8617053e0a97272ef2eee138809f30080e84b Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 30 Mar 2015 16:20:03 -0700 Subject: module: add extra argument for parse_params() callback This adds an extra argument onto parse_params() to be used as a way to make the unused callback a bit more useful and generic by allowing the caller to pass on a data structure of its choice. An example use case is to allow us to easily make module parameters for every module which we will do next. @ parse @ identifier name, args, params, num, level_min, level_max; identifier unknown, param, val, doing; type s16; @@ extern char *parse_args(const char *name, char *args, const struct kernel_param *params, unsigned num, s16 level_min, s16 level_max, + void *arg, int (*unknown)(char *param, char *val, const char *doing + , void *arg )); @ parse_mod @ identifier name, args, params, num, level_min, level_max; identifier unknown, param, val, doing; type s16; @@ char *parse_args(const char *name, char *args, const struct kernel_param *params, unsigned num, s16 level_min, s16 level_max, + void *arg, int (*unknown)(char *param, char *val, const char *doing + , void *arg )) { ... } @ parse_args_found @ expression R, E1, E2, E3, E4, E5, E6; identifier func; @@ ( R = parse_args(E1, E2, E3, E4, E5, E6, + NULL, func); | R = parse_args(E1, E2, E3, E4, E5, E6, + NULL, &func); | R = parse_args(E1, E2, E3, E4, E5, E6, + NULL, NULL); | parse_args(E1, E2, E3, E4, E5, E6, + NULL, func); | parse_args(E1, E2, E3, E4, E5, E6, + NULL, &func); | parse_args(E1, E2, E3, E4, E5, E6, + NULL, NULL); ) @ parse_args_unused depends on parse_args_found @ identifier parse_args_found.func; @@ int func(char *param, char *val, const char *unused + , void *arg ) { ... } @ mod_unused depends on parse_args_found @ identifier parse_args_found.func; expression A1, A2, A3; @@ - func(A1, A2, A3); + func(A1, A2, A3, NULL); Generated-by: Coccinelle SmPL Cc: cocci@systeme.lip6.fr Cc: Tejun Heo Cc: Arjan van de Ven Cc: Greg Kroah-Hartman Cc: Rusty Russell Cc: Christoph Hellwig Cc: Felipe Contreras Cc: Ewan Milne Cc: Jean Delvare Cc: Hannes Reinecke Cc: Jani Nikula Cc: linux-kernel@vger.kernel.org Reviewed-by: Tejun Heo Acked-by: Rusty Russell Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/hugetlbpage.c | 4 ++-- include/linux/moduleparam.h | 3 ++- init/main.c | 25 +++++++++++++++---------- kernel/module.c | 6 ++++-- kernel/params.c | 11 +++++++---- lib/dynamic_debug.c | 4 ++-- 6 files changed, 32 insertions(+), 21 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0ce968b00b7c..d230158c9770 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -336,7 +336,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate) unsigned long gpage_npages[MMU_PAGE_COUNT]; static int __init do_gpage_early_setup(char *param, char *val, - const char *unused) + const char *unused, void *arg) { static phys_addr_t size; unsigned long npages; @@ -385,7 +385,7 @@ void __init reserve_hugetlb_gpages(void) strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE); parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0, - &do_gpage_early_setup); + NULL, &do_gpage_early_setup); /* * Walk gpage list in reverse, allocating larger page sizes first. diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 1c9effa25e26..13923709d30d 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -357,8 +357,9 @@ extern char *parse_args(const char *name, unsigned num, s16 level_min, s16 level_max, + void *arg, int (*unknown)(char *param, char *val, - const char *doing)); + const char *doing, void *arg)); /* Called by module remove. */ #ifdef CONFIG_SYSFS diff --git a/init/main.c b/init/main.c index 2115055faeac..edcb13440646 100644 --- a/init/main.c +++ b/init/main.c @@ -235,7 +235,8 @@ static int __init loglevel(char *str) early_param("loglevel", loglevel); /* Change NUL term back to "=", to make "param" the whole string. */ -static int __init repair_env_string(char *param, char *val, const char *unused) +static int __init repair_env_string(char *param, char *val, + const char *unused, void *arg) { if (val) { /* param=val or param="val"? */ @@ -252,14 +253,15 @@ static int __init repair_env_string(char *param, char *val, const char *unused) } /* Anything after -- gets handed straight to init. */ -static int __init set_init_arg(char *param, char *val, const char *unused) +static int __init set_init_arg(char *param, char *val, + const char *unused, void *arg) { unsigned int i; if (panic_later) return 0; - repair_env_string(param, val, unused); + repair_env_string(param, val, unused, NULL); for (i = 0; argv_init[i]; i++) { if (i == MAX_INIT_ARGS) { @@ -276,9 +278,10 @@ static int __init set_init_arg(char *param, char *val, const char *unused) * Unknown boot options get handed to init, unless they look like * unused parameters (modprobe will find them in /proc/cmdline). */ -static int __init unknown_bootoption(char *param, char *val, const char *unused) +static int __init unknown_bootoption(char *param, char *val, + const char *unused, void *arg) { - repair_env_string(param, val, unused); + repair_env_string(param, val, unused, NULL); /* Handle obsolete-style parameters */ if (obsolete_checksetup(param)) @@ -410,7 +413,8 @@ static noinline void __init_refok rest_init(void) } /* Check for early params. */ -static int __init do_early_param(char *param, char *val, const char *unused) +static int __init do_early_param(char *param, char *val, + const char *unused, void *arg) { const struct obs_kernel_param *p; @@ -429,7 +433,8 @@ static int __init do_early_param(char *param, char *val, const char *unused) void __init parse_early_options(char *cmdline) { - parse_args("early options", cmdline, NULL, 0, 0, 0, do_early_param); + parse_args("early options", cmdline, NULL, 0, 0, 0, NULL, + do_early_param); } /* Arch code calls this early on, or if not, just before other parsing. */ @@ -535,10 +540,10 @@ asmlinkage __visible void __init start_kernel(void) after_dashes = parse_args("Booting kernel", static_command_line, __start___param, __stop___param - __start___param, - -1, -1, &unknown_bootoption); + -1, -1, NULL, &unknown_bootoption); if (!IS_ERR_OR_NULL(after_dashes)) parse_args("Setting init args", after_dashes, NULL, 0, -1, -1, - set_init_arg); + NULL, set_init_arg); jump_label_init(); @@ -847,7 +852,7 @@ static void __init do_initcall_level(int level) initcall_command_line, __start___param, __stop___param - __start___param, level, level, - &repair_env_string); + NULL, &repair_env_string); for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++) do_one_initcall(*fn); diff --git a/kernel/module.c b/kernel/module.c index 42a1d2afb217..24d1f31d02f2 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3237,7 +3237,8 @@ out: return err; } -static int unknown_module_param_cb(char *param, char *val, const char *modname) +static int unknown_module_param_cb(char *param, char *val, const char *modname, + void *arg) { /* Check for magic 'dyndbg' arg */ int ret = ddebug_dyndbg_module_param_cb(param, val, modname); @@ -3342,7 +3343,8 @@ static int load_module(struct load_info *info, const char __user *uargs, /* Module is ready to execute: parsing args may do that. */ after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, - -32768, 32767, unknown_module_param_cb); + -32768, 32767, NULL, + unknown_module_param_cb); if (IS_ERR(after_dashes)) { err = PTR_ERR(after_dashes); goto bug_cleanup; diff --git a/kernel/params.c b/kernel/params.c index a22d6a759b1a..30288c1e15dd 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -100,8 +100,9 @@ static int parse_one(char *param, unsigned num_params, s16 min_level, s16 max_level, + void *arg, int (*handle_unknown)(char *param, char *val, - const char *doing)) + const char *doing, void *arg)) { unsigned int i; int err; @@ -128,7 +129,7 @@ static int parse_one(char *param, if (handle_unknown) { pr_debug("doing %s: %s='%s'\n", doing, param, val); - return handle_unknown(param, val, doing); + return handle_unknown(param, val, doing, arg); } pr_debug("Unknown argument '%s'\n", param); @@ -194,7 +195,9 @@ char *parse_args(const char *doing, unsigned num, s16 min_level, s16 max_level, - int (*unknown)(char *param, char *val, const char *doing)) + void *arg, + int (*unknown)(char *param, char *val, + const char *doing, void *arg)) { char *param, *val; @@ -214,7 +217,7 @@ char *parse_args(const char *doing, return args; irq_was_disabled = irqs_disabled(); ret = parse_one(param, val, doing, params, num, - min_level, max_level, unknown); + min_level, max_level, arg, unknown); if (irq_was_disabled && !irqs_disabled()) pr_warn("%s: option '%s' enabled irq's!\n", doing, param); diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index d8f3d3150603..e491e02eff54 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -887,7 +887,7 @@ static int ddebug_dyndbg_param_cb(char *param, char *val, /* handle both dyndbg and $module.dyndbg params at boot */ static int ddebug_dyndbg_boot_param_cb(char *param, char *val, - const char *unused) + const char *unused, void *arg) { vpr_info("%s=\"%s\"\n", param, val); return ddebug_dyndbg_param_cb(param, val, NULL, 0); @@ -1028,7 +1028,7 @@ static int __init dynamic_debug_init(void) */ cmdline = kstrdup(saved_command_line, GFP_KERNEL); parse_args("dyndbg params", cmdline, NULL, - 0, 0, 0, &ddebug_dyndbg_boot_param_cb); + 0, 0, 0, NULL, &ddebug_dyndbg_boot_param_cb); kfree(cmdline); return 0; -- cgit v1.2.3 From e602ffb2fa6b5533acd6847bd19a135e14ed4c54 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Mon, 20 Apr 2015 10:32:56 +0530 Subject: powerpc: Fix cpu_online_cores_map to return only online threads mask Currently, cpu_online_cores_map returns a mask, which for every core with at least one online thread, has the bit for thread 0 of the core set to 1, and the bits for all other threads of the core set to 0. But thread 0 of the core itself may not be online always. In such cases, if the returned mask is used for IPI, then it'll cause IPIs to be skipped on cores where the first thread is offline, because the IPI code refuses to send IPIs to offline threads. Fix this by setting the bit of the first online thread in the core. This is done by fixing this in the underlying function cpu_thread_mask_to_cores. The result has the property that for all cores with online threads, there is one bit set in the returned map. And further, all bits that are set in the returned map correspond to online threads. Signed-off-by: Shreyas B. Prabhu Reviewed-by: Preeti U Murthy [ Changelog from Michael Ellerman ] Reviewed-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cputhreads.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index 5be6c4753667..ba42e46ea58e 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -31,9 +31,9 @@ extern cpumask_t threads_core_mask; /* cpu_thread_mask_to_cores - Return a cpumask of one per cores * hit by the argument * - * @threads: a cpumask of threads + * @threads: a cpumask of online threads * - * This function returns a cpumask which will have one "cpu" (or thread) + * This function returns a cpumask which will have one online cpu's * bit set for each core that has at least one thread set in the argument. * * This can typically be used for things like IPI for tlb invalidations @@ -42,13 +42,16 @@ extern cpumask_t threads_core_mask; static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads) { cpumask_t tmp, res; - int i; + int i, cpu; cpumask_clear(&res); for (i = 0; i < NR_CPUS; i += threads_per_core) { cpumask_shift_left(&tmp, &threads_core_mask, i); - if (cpumask_intersects(threads, &tmp)) - cpumask_set_cpu(i, &res); + if (cpumask_intersects(threads, &tmp)) { + cpu = cpumask_next_and(-1, &tmp, cpu_online_mask); + if (cpu < nr_cpu_ids) + cpumask_set_cpu(cpu, &res); + } } return res; } -- cgit v1.2.3 From d405a98c70ebbaa6ef276d7307d034a682321b95 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Mon, 20 Apr 2015 10:32:57 +0530 Subject: powerpc/powernv: Move cpuidle related code from setup.c to new file This is a cleanup patch; doesn't change any functionality. Moves all cpuidle related code from setup.c to a new file. Signed-off-by: Shreyas B. Prabhu Reviewed-by: Preeti U Murthy [mpe: Fix the SMP=n build by including asm/smp.h in idle.c] Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/Makefile | 2 +- arch/powerpc/platforms/powernv/idle.c | 192 ++++++++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/setup.c | 171 ---------------------------- 3 files changed, 193 insertions(+), 172 deletions(-) create mode 100644 arch/powerpc/platforms/powernv/idle.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 33e44f37212f..bee923585afc 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,4 +1,4 @@ -obj-y += setup.o opal-wrappers.o opal.o opal-async.o +obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o obj-y += opal-msglog.o opal-hmi.o opal-power.o diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c new file mode 100644 index 000000000000..5c799324b695 --- /dev/null +++ b/arch/powerpc/platforms/powernv/idle.c @@ -0,0 +1,192 @@ +/* + * PowerNV cpuidle code + * + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "powernv.h" +#include "subcore.h" + +static u32 supported_cpuidle_states; + +int pnv_save_sprs_for_winkle(void) +{ + int cpu; + int rc; + + /* + * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross + * all cpus at boot. Get these reg values of current cpu and use the + * same accross all cpus. + */ + uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; + uint64_t hid0_val = mfspr(SPRN_HID0); + uint64_t hid1_val = mfspr(SPRN_HID1); + uint64_t hid4_val = mfspr(SPRN_HID4); + uint64_t hid5_val = mfspr(SPRN_HID5); + uint64_t hmeer_val = mfspr(SPRN_HMEER); + + for_each_possible_cpu(cpu) { + uint64_t pir = get_hard_smp_processor_id(cpu); + uint64_t hsprg0_val = (uint64_t)&paca[cpu]; + + /* + * HSPRG0 is used to store the cpu's pointer to paca. Hence last + * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0 + * with 63rd bit set, so that when a thread wakes up at 0x100 we + * can use this bit to distinguish between fastsleep and + * deep winkle. + */ + hsprg0_val |= 1; + + rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); + if (rc != 0) + return rc; + + /* HIDs are per core registers */ + if (cpu_thread_in_core(cpu) == 0) { + + rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); + if (rc != 0) + return rc; + } + } + + return 0; +} + +static void pnv_alloc_idle_core_states(void) +{ + int i, j; + int nr_cores = cpu_nr_cores(); + u32 *core_idle_state; + + /* + * core_idle_state - First 8 bits track the idle state of each thread + * of the core. The 8th bit is the lock bit. Initially all thread bits + * are set. They are cleared when the thread enters deep idle state + * like sleep and winkle. Initially the lock bit is cleared. + * The lock bit has 2 purposes + * a. While the first thread is restoring core state, it prevents + * other threads in the core from switching to process context. + * b. While the last thread in the core is saving the core state, it + * prevents a different thread from waking up. + */ + for (i = 0; i < nr_cores; i++) { + int first_cpu = i * threads_per_core; + int node = cpu_to_node(first_cpu); + + core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); + *core_idle_state = PNV_CORE_IDLE_THREAD_BITS; + + for (j = 0; j < threads_per_core; j++) { + int cpu = first_cpu + j; + + paca[cpu].core_idle_state_ptr = core_idle_state; + paca[cpu].thread_idle_state = PNV_THREAD_RUNNING; + paca[cpu].thread_mask = 1 << j; + } + } + + update_subcore_sibling_mask(); + + if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) + pnv_save_sprs_for_winkle(); +} + +u32 pnv_get_supported_cpuidle_states(void) +{ + return supported_cpuidle_states; +} +EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); + +static int __init pnv_init_idle_states(void) +{ + struct device_node *power_mgt; + int dt_idle_states; + u32 *flags; + int i; + + supported_cpuidle_states = 0; + + if (cpuidle_disable != IDLE_NO_OVERRIDE) + goto out; + + if (!firmware_has_feature(FW_FEATURE_OPALv3)) + goto out; + + power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); + if (!power_mgt) { + pr_warn("opal: PowerMgmt Node not found\n"); + goto out; + } + dt_idle_states = of_property_count_u32_elems(power_mgt, + "ibm,cpu-idle-state-flags"); + if (dt_idle_states < 0) { + pr_warn("cpuidle-powernv: no idle states found in the DT\n"); + goto out; + } + + flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL); + if (of_property_read_u32_array(power_mgt, + "ibm,cpu-idle-state-flags", flags, dt_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); + goto out_free; + } + + for (i = 0; i < dt_idle_states; i++) + supported_cpuidle_states |= flags[i]; + + if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { + patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_entry, + PPC_INST_NOP); + patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_exit, + PPC_INST_NOP); + } + pnv_alloc_idle_core_states(); +out_free: + kfree(flags); +out: + return 0; +} + +subsys_initcall(pnv_init_idle_states); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 16fdcb23f4c3..509cdd2f7ad8 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -35,12 +35,8 @@ #include #include #include -#include -#include -#include #include "powernv.h" -#include "subcore.h" static void __init pnv_setup_arch(void) { @@ -277,173 +273,6 @@ static void __init pnv_setup_machdep_opal(void) ppc_md.handle_hmi_exception = opal_handle_hmi_exception; } -static u32 supported_cpuidle_states; - -int pnv_save_sprs_for_winkle(void) -{ - int cpu; - int rc; - - /* - * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross - * all cpus at boot. Get these reg values of current cpu and use the - * same accross all cpus. - */ - uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; - uint64_t hid0_val = mfspr(SPRN_HID0); - uint64_t hid1_val = mfspr(SPRN_HID1); - uint64_t hid4_val = mfspr(SPRN_HID4); - uint64_t hid5_val = mfspr(SPRN_HID5); - uint64_t hmeer_val = mfspr(SPRN_HMEER); - - for_each_possible_cpu(cpu) { - uint64_t pir = get_hard_smp_processor_id(cpu); - uint64_t hsprg0_val = (uint64_t)&paca[cpu]; - - /* - * HSPRG0 is used to store the cpu's pointer to paca. Hence last - * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0 - * with 63rd bit set, so that when a thread wakes up at 0x100 we - * can use this bit to distinguish between fastsleep and - * deep winkle. - */ - hsprg0_val |= 1; - - rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); - if (rc != 0) - return rc; - - rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); - if (rc != 0) - return rc; - - /* HIDs are per core registers */ - if (cpu_thread_in_core(cpu) == 0) { - - rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); - if (rc != 0) - return rc; - - rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); - if (rc != 0) - return rc; - - rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); - if (rc != 0) - return rc; - - rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); - if (rc != 0) - return rc; - - rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); - if (rc != 0) - return rc; - } - } - - return 0; -} - -static void pnv_alloc_idle_core_states(void) -{ - int i, j; - int nr_cores = cpu_nr_cores(); - u32 *core_idle_state; - - /* - * core_idle_state - First 8 bits track the idle state of each thread - * of the core. The 8th bit is the lock bit. Initially all thread bits - * are set. They are cleared when the thread enters deep idle state - * like sleep and winkle. Initially the lock bit is cleared. - * The lock bit has 2 purposes - * a. While the first thread is restoring core state, it prevents - * other threads in the core from switching to process context. - * b. While the last thread in the core is saving the core state, it - * prevents a different thread from waking up. - */ - for (i = 0; i < nr_cores; i++) { - int first_cpu = i * threads_per_core; - int node = cpu_to_node(first_cpu); - - core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); - *core_idle_state = PNV_CORE_IDLE_THREAD_BITS; - - for (j = 0; j < threads_per_core; j++) { - int cpu = first_cpu + j; - - paca[cpu].core_idle_state_ptr = core_idle_state; - paca[cpu].thread_idle_state = PNV_THREAD_RUNNING; - paca[cpu].thread_mask = 1 << j; - } - } - - update_subcore_sibling_mask(); - - if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) - pnv_save_sprs_for_winkle(); -} - -u32 pnv_get_supported_cpuidle_states(void) -{ - return supported_cpuidle_states; -} -EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); - -static int __init pnv_init_idle_states(void) -{ - struct device_node *power_mgt; - int dt_idle_states; - u32 *flags; - int i; - - supported_cpuidle_states = 0; - - if (cpuidle_disable != IDLE_NO_OVERRIDE) - goto out; - - if (!firmware_has_feature(FW_FEATURE_OPALv3)) - goto out; - - power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); - if (!power_mgt) { - pr_warn("opal: PowerMgmt Node not found\n"); - goto out; - } - dt_idle_states = of_property_count_u32_elems(power_mgt, - "ibm,cpu-idle-state-flags"); - if (dt_idle_states < 0) { - pr_warn("cpuidle-powernv: no idle states found in the DT\n"); - goto out; - } - - flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL); - if (of_property_read_u32_array(power_mgt, - "ibm,cpu-idle-state-flags", flags, dt_idle_states)) { - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); - goto out_free; - } - - for (i = 0; i < dt_idle_states; i++) - supported_cpuidle_states |= flags[i]; - - if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { - patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_entry, - PPC_INST_NOP); - patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_exit, - PPC_INST_NOP); - } - pnv_alloc_idle_core_states(); -out_free: - kfree(flags); -out: - return 0; -} - -subsys_initcall(pnv_init_idle_states); - static int __init pnv_probe(void) { unsigned long root = of_get_flat_dt_root(); -- cgit v1.2.3 From 5703d2f4a1da6d23b3be896947ce255226fc4295 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Mon, 20 Apr 2015 10:32:58 +0530 Subject: powerpc/powernv: Introduce sysfs control for fastsleep workaround behavior Fastsleep is one of the idle state which cpuidle subsystem currently uses on power8 machines. In this state L2 cache is brought down to a threshold voltage. Therefore when the core is in fastsleep, the communication between L2 and L3 needs to be fenced. But there is a bug in the current power8 chips surrounding this fencing. OPAL provides a workaround which precludes the possibility of hitting this bug. But running with this workaround applied causes checkstop if any correctable error in L2 cache directory is detected. Hence OPAL also provides a way to undo the workaround. In the existing implementation, workaround is applied by the last thread of the core entering fastsleep and undone by the first thread waking up. But this has a performance cost. These OPAL calls account for roughly 4000 cycles everytime the core has to enter or wakeup from fastsleep. This patch introduces a sysfs attribute (fastsleep_workaround_applyonce) to choose the behavior of this workaround. By default, fastsleep_workaround_applyonce = 0. In this case, workaround is applied/undone everytime the core enters/exits fastsleep. fastsleep_workaround_applyonce = 1. In this case the workaround is applied once on all the cores and never undone. This can be triggered by echo 1 > /sys/devices/system/cpu/fastsleep_workaround_applyonce For simplicity this attribute can be modified only once. Implying, once fastsleep_workaround_applyonce is changed to 1, it cannot be reverted to the default state. Signed-off-by: Shreyas B. Prabhu Reviewed-by: Preeti U Murthy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 7 ++ arch/powerpc/include/asm/opal.h | 1 + arch/powerpc/platforms/powernv/idle.c | 101 +++++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + 4 files changed, 110 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 0321a909e663..a49e5fa6f939 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -165,6 +165,13 @@ #define OPAL_PM_WINKLE_ENABLED 0x00040000 #define OPAL_PM_SLEEP_ENABLED_ER1 0x00080000 /* with workaround */ +/* + * OPAL_CONFIG_CPU_IDLE_STATE parameters + */ +#define OPAL_CONFIG_IDLE_FASTSLEEP 1 +#define OPAL_CONFIG_IDLE_UNDO 0 +#define OPAL_CONFIG_IDLE_APPLY 1 + #ifndef __ASSEMBLY__ /* Other enums */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 042af1abfc4d..9a4781357fa6 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -186,6 +186,7 @@ int64_t opal_handle_hmi(void); int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end); int64_t opal_unregister_dump_region(uint32_t id); int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val); +int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag); int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number); int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg, uint64_t msg_len); diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 5c799324b695..bd39a120bd60 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include @@ -137,6 +139,96 @@ u32 pnv_get_supported_cpuidle_states(void) } EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); + +static void pnv_fastsleep_workaround_apply(void *info) + +{ + int rc; + int *err = info; + + rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, + OPAL_CONFIG_IDLE_APPLY); + if (rc) + *err = 1; +} + +/* + * Used to store fastsleep workaround state + * 0 - Workaround applied/undone at fastsleep entry/exit path (Default) + * 1 - Workaround applied once, never undone. + */ +static u8 fastsleep_workaround_applyonce; + +static ssize_t show_fastsleep_workaround_applyonce(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", fastsleep_workaround_applyonce); +} + +static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + cpumask_t primary_thread_mask; + int err; + u8 val; + + if (kstrtou8(buf, 0, &val) || val != 1) + return -EINVAL; + + if (fastsleep_workaround_applyonce == 1) + return count; + + /* + * fastsleep_workaround_applyonce = 1 implies + * fastsleep workaround needs to be left in 'applied' state on all + * the cores. Do this by- + * 1. Patching out the call to 'undo' workaround in fastsleep exit path + * 2. Sending ipi to all the cores which have atleast one online thread + * 3. Patching out the call to 'apply' workaround in fastsleep entry + * path + * There is no need to send ipi to cores which have all threads + * offlined, as last thread of the core entering fastsleep or deeper + * state would have applied workaround. + */ + err = patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_exit, + PPC_INST_NOP); + if (err) { + pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit"); + goto fail; + } + + get_online_cpus(); + primary_thread_mask = cpu_online_cores_map(); + on_each_cpu_mask(&primary_thread_mask, + pnv_fastsleep_workaround_apply, + &err, 1); + put_online_cpus(); + if (err) { + pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); + goto fail; + } + + err = patch_instruction( + (unsigned int *)pnv_fastsleep_workaround_at_entry, + PPC_INST_NOP); + if (err) { + pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry"); + goto fail; + } + + fastsleep_workaround_applyonce = 1; + + return count; +fail: + return -EIO; +} + +static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, + show_fastsleep_workaround_applyonce, + store_fastsleep_workaround_applyonce); + static int __init pnv_init_idle_states(void) { struct device_node *power_mgt; @@ -181,7 +273,16 @@ static int __init pnv_init_idle_states(void) patch_instruction( (unsigned int *)pnv_fastsleep_workaround_at_exit, PPC_INST_NOP); + } else { + /* + * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that + * workaround is needed to use fastsleep. Provide sysfs + * control to choose how this workaround has to be applied. + */ + device_create_file(cpu_subsys.dev_root, + &dev_attr_fastsleep_workaround_applyonce); } + pnv_alloc_idle_core_states(); out_free: kfree(flags); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index a7ade94cdf87..bf15ead00e12 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -283,6 +283,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); OPAL_CALL(opal_get_param, OPAL_GET_PARAM); OPAL_CALL(opal_set_param, OPAL_SET_PARAM); OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); +OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); -- cgit v1.2.3 From 96e023e7534c16ab54e236c114340e2447c36d2f Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 15 May 2015 14:06:36 +1000 Subject: powerpc/powernv: Reorder OPAL subsystem initialisation Most of the OPAL subsystems are always compiled in for PowerNV and many of them need to be initialised before or after other OPAL subsystems. Rather than trying to control this ordering through machine initcalls it is clearer and easier to control initialisation order with explicit calls in opal_init. Signed-off-by: Alistair Popple Cc: Mahesh Jagannath Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 3 +++ arch/powerpc/platforms/powernv/opal-async.c | 3 +-- arch/powerpc/platforms/powernv/opal-hmi.c | 3 +-- arch/powerpc/platforms/powernv/opal-memory-errors.c | 2 +- arch/powerpc/platforms/powernv/opal-sensor.c | 3 +-- arch/powerpc/platforms/powernv/opal.c | 13 ++++++++++++- 6 files changed, 19 insertions(+), 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 9a4781357fa6..dcdf83c86256 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -240,6 +240,9 @@ extern int opal_elog_init(void); extern void opal_platform_dump_init(void); extern void opal_sys_param_init(void); extern void opal_msglog_init(void); +extern int opal_async_comp_init(void); +extern int opal_sensor_init(void); +extern int opal_hmi_handler_init(void); extern int opal_machine_check(struct pt_regs *regs); extern bool opal_mce_check_early_recovery(struct pt_regs *regs); diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c index 693b6cdac691..bdc8c0c71d15 100644 --- a/arch/powerpc/platforms/powernv/opal-async.c +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -151,7 +151,7 @@ static struct notifier_block opal_async_comp_nb = { .priority = 0, }; -static int __init opal_async_comp_init(void) +int __init opal_async_comp_init(void) { struct device_node *opal_node; const __be32 *async; @@ -205,4 +205,3 @@ out_opal_node: out: return err; } -machine_subsys_initcall(powernv, opal_async_comp_init); diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index b322bfb51343..a8f49d380449 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -170,7 +170,7 @@ static struct notifier_block opal_hmi_handler_nb = { .priority = 0, }; -static int __init opal_hmi_handler_init(void) +int __init opal_hmi_handler_init(void) { int ret; @@ -186,4 +186,3 @@ static int __init opal_hmi_handler_init(void) } return 0; } -machine_subsys_initcall(powernv, opal_hmi_handler_init); diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c index 43db2136dbff..00a29432be39 100644 --- a/arch/powerpc/platforms/powernv/opal-memory-errors.c +++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c @@ -144,4 +144,4 @@ static int __init opal_mem_err_init(void) } return 0; } -machine_subsys_initcall(powernv, opal_mem_err_init); +machine_device_initcall(powernv, opal_mem_err_init); diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c index 655250499d18..a06059df9239 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor.c +++ b/arch/powerpc/platforms/powernv/opal-sensor.c @@ -77,7 +77,7 @@ out: } EXPORT_SYMBOL_GPL(opal_get_sensor_data); -static __init int opal_sensor_init(void) +int __init opal_sensor_init(void) { struct platform_device *pdev; struct device_node *sensor; @@ -93,4 +93,3 @@ static __init int opal_sensor_init(void) return PTR_ERR_OR_ZERO(pdev); } -machine_subsys_initcall(powernv, opal_sensor_init); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 2241565b0739..eb3decc67c43 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -393,7 +393,6 @@ static int __init opal_message_init(void) } return 0; } -machine_early_initcall(powernv, opal_message_init); int opal_get_chars(uint32_t vtermno, char *buf, int count) { @@ -807,6 +806,18 @@ static int __init opal_init(void) of_node_put(consoles); } + /* Initialise OPAL messaging system */ + opal_message_init(); + + /* Initialise OPAL asynchronous completion interface */ + opal_async_comp_init(); + + /* Initialise OPAL sensor interface */ + opal_sensor_init(); + + /* Initialise OPAL hypervisor maintainence interrupt handling */ + opal_hmi_handler_init(); + /* Create i2c platform devices */ opal_i2c_create_devs(); -- cgit v1.2.3 From 9f0fd0499d30dbd61632463f293e2e826fa363b1 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 15 May 2015 14:06:37 +1000 Subject: powerpc/powernv: Add a virtual irqchip for opal events Whenever an interrupt is received for opal the linux kernel gets a bitfield indicating certain events that have occurred and need handling by the various device drivers. Currently this is handled using a notifier interface where we call every device driver that has registered to receive opal events. This approach has several drawbacks. For example each driver has to do its own checking to see if the event is relevant as well as event masking. There is also no easy method of recording the number of times we receive particular events. This patch solves these issues by exposing opal events via the standard interrupt APIs by adding a new interrupt chip and domain. Drivers can then register for the appropriate events using standard kernel calls such as irq_of_parse_and_map(). Signed-off-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 3 + arch/powerpc/platforms/powernv/Makefile | 2 +- arch/powerpc/platforms/powernv/opal-irqchip.c | 253 ++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal.c | 78 ++------ arch/powerpc/platforms/powernv/powernv.h | 4 + 5 files changed, 273 insertions(+), 67 deletions(-) create mode 100644 arch/powerpc/platforms/powernv/opal-irqchip.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index dcdf83c86256..1412814347ba 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -243,6 +243,7 @@ extern void opal_msglog_init(void); extern int opal_async_comp_init(void); extern int opal_sensor_init(void); extern int opal_hmi_handler_init(void); +extern int opal_event_init(void); extern int opal_machine_check(struct pt_regs *regs); extern bool opal_mce_check_early_recovery(struct pt_regs *regs); @@ -254,6 +255,8 @@ extern int opal_resync_timebase(void); extern void opal_lpc_init(void); +extern int opal_event_request(unsigned int opal_event_nr); + struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, unsigned long vmalloc_size); void opal_free_sg_list(struct opal_sg_list *sg); diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index bee923585afc..26bd7e417b7c 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,7 +1,7 @@ obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o -obj-y += opal-msglog.o opal-hmi.o opal-power.o +obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c new file mode 100644 index 000000000000..bd5125dcf0d7 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -0,0 +1,253 @@ +/* + * This file implements an irqchip for OPAL events. Whenever there is + * an interrupt that is handled by OPAL we get passed a list of events + * that Linux needs to do something about. These basically look like + * interrupts to Linux so we implement an irqchip to handle them. + * + * Copyright Alistair Popple, IBM Corporation 2014. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "powernv.h" + +/* Maximum number of events supported by OPAL firmware */ +#define MAX_NUM_EVENTS 64 + +struct opal_event_irqchip { + struct irq_chip irqchip; + struct irq_domain *domain; + unsigned long mask; +}; +static struct opal_event_irqchip opal_event_irqchip; + +static unsigned int opal_irq_count; +static unsigned int *opal_irqs; + +static void opal_handle_irq_work(struct irq_work *work); +static __be64 last_outstanding_events; +static struct irq_work opal_event_irq_work = { + .func = opal_handle_irq_work, +}; + +static void opal_event_mask(struct irq_data *d) +{ + clear_bit(d->hwirq, &opal_event_irqchip.mask); +} + +static void opal_event_unmask(struct irq_data *d) +{ + set_bit(d->hwirq, &opal_event_irqchip.mask); + + opal_poll_events(&last_outstanding_events); + if (last_outstanding_events & opal_event_irqchip.mask) + /* Need to retrigger the interrupt */ + irq_work_queue(&opal_event_irq_work); +} + +static int opal_event_set_type(struct irq_data *d, unsigned int flow_type) +{ + /* + * For now we only support level triggered events. The irq + * handler will be called continuously until the event has + * been cleared in OPAL. + */ + if (flow_type != IRQ_TYPE_LEVEL_HIGH) + return -EINVAL; + + return 0; +} + +static struct opal_event_irqchip opal_event_irqchip = { + .irqchip = { + .name = "OPAL EVT", + .irq_mask = opal_event_mask, + .irq_unmask = opal_event_unmask, + .irq_set_type = opal_event_set_type, + }, + .mask = 0, +}; + +static int opal_event_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + irq_set_chip_data(irq, &opal_event_irqchip); + irq_set_chip_and_handler(irq, &opal_event_irqchip.irqchip, + handle_level_irq); + + return 0; +} + +void opal_handle_events(uint64_t events) +{ + int virq, hwirq = 0; + u64 mask = opal_event_irqchip.mask; + u64 notifier_mask = 0; + + if (!in_irq() && (events & mask)) { + last_outstanding_events = events; + irq_work_queue(&opal_event_irq_work); + return; + } + + while (events) { + hwirq = fls64(events) - 1; + virq = irq_find_mapping(opal_event_irqchip.domain, + hwirq); + if (virq) { + if (BIT_ULL(hwirq) & mask) + generic_handle_irq(virq); + } else + notifier_mask |= BIT_ULL(hwirq); + events &= ~BIT_ULL(hwirq); + } + + opal_do_notifier(notifier_mask); +} + +static irqreturn_t opal_interrupt(int irq, void *data) +{ + __be64 events; + + opal_handle_interrupt(virq_to_hw(irq), &events); + opal_handle_events(be64_to_cpu(events)); + + return IRQ_HANDLED; +} + +static void opal_handle_irq_work(struct irq_work *work) +{ + opal_handle_events(be64_to_cpu(last_outstanding_events)); +} + +static int opal_event_match(struct irq_domain *h, struct device_node *node) +{ + return h->of_node == node; +} + +static int opal_event_xlate(struct irq_domain *h, struct device_node *np, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) +{ + *out_hwirq = intspec[0]; + *out_flags = IRQ_TYPE_LEVEL_HIGH; + + return 0; +} + +static const struct irq_domain_ops opal_event_domain_ops = { + .match = opal_event_match, + .map = opal_event_map, + .xlate = opal_event_xlate, +}; + +void opal_event_shutdown(void) +{ + unsigned int i; + + /* First free interrupts, which will also mask them */ + for (i = 0; i < opal_irq_count; i++) { + if (opal_irqs[i]) + free_irq(opal_irqs[i], NULL); + opal_irqs[i] = 0; + } +} + +int __init opal_event_init(void) +{ + struct device_node *dn, *opal_node; + const __be32 *irqs; + int i, irqlen, rc = 0; + + opal_node = of_find_node_by_path("/ibm,opal"); + if (!opal_node) { + pr_warn("opal: Node not found\n"); + return -ENODEV; + } + + /* If dn is NULL it means the domain won't be linked to a DT + * node so therefore irq_of_parse_and_map(...) wont work. But + * that shouldn't be problem because if we're running a + * version of skiboot that doesn't have the dn then the + * devices won't have the correct properties and will have to + * fall back to the legacy method (opal_event_request(...)) + * anyway. */ + dn = of_find_compatible_node(NULL, NULL, "ibm,opal-event"); + opal_event_irqchip.domain = irq_domain_add_linear(dn, MAX_NUM_EVENTS, + &opal_event_domain_ops, &opal_event_irqchip); + of_node_put(dn); + if (!opal_event_irqchip.domain) { + pr_warn("opal: Unable to create irq domain\n"); + rc = -ENOMEM; + goto out; + } + + /* Get interrupt property */ + irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); + opal_irq_count = irqs ? (irqlen / 4) : 0; + pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count); + + /* Install interrupt handlers */ + opal_irqs = kcalloc(opal_irq_count, sizeof(*opal_irqs), GFP_KERNEL); + for (i = 0; irqs && i < opal_irq_count; i++, irqs++) { + unsigned int irq, virq; + + /* Get hardware and virtual IRQ */ + irq = be32_to_cpup(irqs); + virq = irq_create_mapping(NULL, irq); + if (virq == NO_IRQ) { + pr_warn("Failed to map irq 0x%x\n", irq); + continue; + } + + /* Install interrupt handler */ + rc = request_irq(virq, opal_interrupt, 0, "opal", NULL); + if (rc) { + irq_dispose_mapping(virq); + pr_warn("Error %d requesting irq %d (0x%x)\n", + rc, virq, irq); + continue; + } + + /* Cache IRQ */ + opal_irqs[i] = virq; + } + +out: + of_node_put(opal_node); + return rc; +} + +/** + * opal_event_request(unsigned int opal_event_nr) - Request an event + * @opal_event_nr: the opal event number to request + * + * This routine can be used to find the linux virq number which can + * then be passed to request_irq to assign a handler for a particular + * opal event. This should only be used by legacy devices which don't + * have proper device tree bindings. Most devices should use + * irq_of_parse_and_map() instead. + */ +int opal_event_request(unsigned int opal_event_nr) +{ + return irq_create_mapping(opal_event_irqchip.domain, opal_event_nr); +} +EXPORT_SYMBOL(opal_event_request); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index eb3decc67c43..3baca718261a 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -53,8 +53,6 @@ static int mc_recoverable_range_len; struct device_node *opal_node; static DEFINE_SPINLOCK(opal_write_lock); -static unsigned int *opal_irqs; -static unsigned int opal_irq_count; static ATOMIC_NOTIFIER_HEAD(opal_notifier_head); static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX]; static DEFINE_SPINLOCK(opal_notifier_lock); @@ -251,7 +249,7 @@ int opal_notifier_unregister(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(opal_notifier_unregister); -static void opal_do_notifier(uint64_t events) +void opal_do_notifier(uint64_t events) { unsigned long flags; uint64_t changed_mask; @@ -571,8 +569,10 @@ int opal_handle_hmi_exception(struct pt_regs *regs) local_paca->hmi_event_available = 0; rc = opal_poll_events(&evt); - if (rc == OPAL_SUCCESS && evt) + if (rc == OPAL_SUCCESS && evt) { opal_do_notifier(be64_to_cpu(evt)); + opal_handle_events(be64_to_cpu(evt)); + } return 1; } @@ -609,17 +609,6 @@ out: return !!recover_addr; } -static irqreturn_t opal_interrupt(int irq, void *data) -{ - __be64 events; - - opal_handle_interrupt(virq_to_hw(irq), &events); - - opal_do_notifier(be64_to_cpu(events)); - - return IRQ_HANDLED; -} - static int opal_sysfs_init(void) { opal_kobj = kobject_create_and_add("opal", firmware_kobj); @@ -718,52 +707,15 @@ static void opal_i2c_create_devs(void) of_platform_device_create(np, NULL, NULL); } -static void __init opal_irq_init(struct device_node *dn) -{ - const __be32 *irqs; - int i, irqlen; - - /* Get interrupt property */ - irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); - opal_irq_count = irqs ? (irqlen / 4) : 0; - pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count); - if (!opal_irq_count) - return; - - /* Install interrupt handlers */ - opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL); - for (i = 0; irqs && i < opal_irq_count; i++, irqs++) { - unsigned int irq, virq; - int rc; - - /* Get hardware and virtual IRQ */ - irq = be32_to_cpup(irqs); - virq = irq_create_mapping(NULL, irq); - if (virq == NO_IRQ) { - pr_warn("Failed to map irq 0x%x\n", irq); - continue; - } - - /* Install interrupt handler */ - rc = request_irq(virq, opal_interrupt, 0, "opal", NULL); - if (rc) { - irq_dispose_mapping(virq); - pr_warn("Error %d requesting irq %d (0x%x)\n", - rc, virq, irq); - continue; - } - - /* Cache IRQ */ - opal_irqs[i] = virq; - } -} - static int kopald(void *unused) { + __be64 events; + set_freezable(); do { try_to_freeze(); - opal_poll_events(NULL); + opal_poll_events(&events); + opal_handle_events(be64_to_cpu(events)); msleep_interruptible(opal_heartbeat); } while (!kthread_should_stop()); @@ -792,6 +744,9 @@ static int __init opal_init(void) return -ENODEV; } + /* Initialise OPAL events */ + opal_event_init(); + /* Register OPAL consoles if any ports */ if (firmware_has_feature(FW_FEATURE_OPALv2)) consoles = of_find_node_by_path("/ibm,opal/consoles"); @@ -824,9 +779,6 @@ static int __init opal_init(void) /* Setup a heatbeat thread if requested by OPAL */ opal_init_heartbeat(); - /* Find all OPAL interrupts and request them */ - opal_irq_init(opal_node); - /* Create "opal" kobject under /sys/firmware */ rc = opal_sysfs_init(); if (rc == 0) { @@ -857,15 +809,9 @@ machine_subsys_initcall(powernv, opal_init); void opal_shutdown(void) { - unsigned int i; long rc = OPAL_BUSY; - /* First free interrupts, which will also mask them */ - for (i = 0; i < opal_irq_count; i++) { - if (opal_irqs[i]) - free_irq(opal_irqs[i], NULL); - opal_irqs[i] = 0; - } + opal_event_shutdown(); /* * Then sync with OPAL which ensure anything that can diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 826d2c9bea56..221d4c827fc5 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -35,6 +35,10 @@ extern u32 pnv_get_supported_cpuidle_states(void); extern void pnv_lpc_init(void); +extern void opal_do_notifier(uint64_t events); +extern void opal_handle_events(uint64_t events); +extern void opal_event_shutdown(void); + bool cpu_core_split_required(void); #endif /* _POWERNV_H */ -- cgit v1.2.3 From 79231448c929cc0870f6e5cc10c485661dfb4a9f Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 15 May 2015 14:06:40 +1000 Subject: powernv/eeh: Update the EEH code to use the opal irq domain The eeh code currently uses the old notifier method to get eeh events from OPAL. It also contains some logic to filter opal events which has been moved into the virtual irqchip. This patch converts the eeh code to the new event interface which simplifies event handling. Signed-off-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/eeh-powernv.c | 58 +++++++++++++++------------- 1 file changed, 31 insertions(+), 27 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 622f08cf54b6..5cf5e6ea213b 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ #include "pci.h" static bool pnv_eeh_nb_init = false; +static int eeh_event_irq = -EINVAL; /** * pnv_eeh_init - EEH platform dependent initialization @@ -88,34 +90,22 @@ static int pnv_eeh_init(void) return 0; } -static int pnv_eeh_event(struct notifier_block *nb, - unsigned long events, void *change) +static irqreturn_t pnv_eeh_event(int irq, void *data) { - uint64_t changed_evts = (uint64_t)change; - /* - * We simply send special EEH event if EEH has - * been enabled, or clear pending events in - * case that we enable EEH soon + * We simply send a special EEH event if EEH has been + * enabled. We don't care about EEH events until we've + * finished processing the outstanding ones. Event processing + * gets unmasked in next_error() if EEH is enabled. */ - if (!(changed_evts & OPAL_EVENT_PCI_ERROR) || - !(events & OPAL_EVENT_PCI_ERROR)) - return 0; + disable_irq_nosync(irq); if (eeh_enabled()) eeh_send_failure_event(NULL); - else - opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); - return 0; + return IRQ_HANDLED; } -static struct notifier_block pnv_eeh_nb = { - .notifier_call = pnv_eeh_event, - .next = NULL, - .priority = 0 -}; - #ifdef CONFIG_DEBUG_FS static ssize_t pnv_eeh_ei_write(struct file *filp, const char __user *user_buf, @@ -237,16 +227,28 @@ static int pnv_eeh_post_init(void) /* Register OPAL event notifier */ if (!pnv_eeh_nb_init) { - ret = opal_notifier_register(&pnv_eeh_nb); - if (ret) { - pr_warn("%s: Can't register OPAL event notifier (%d)\n", - __func__, ret); + eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR)); + if (eeh_event_irq < 0) { + pr_err("%s: Can't register OPAL event interrupt (%d)\n", + __func__, eeh_event_irq); + return eeh_event_irq; + } + + ret = request_irq(eeh_event_irq, pnv_eeh_event, + IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL); + if (ret < 0) { + irq_dispose_mapping(eeh_event_irq); + pr_err("%s: Can't request OPAL event interrupt (%d)\n", + __func__, eeh_event_irq); return ret; } pnv_eeh_nb_init = true; } + if (!eeh_enabled()) + disable_irq(eeh_event_irq); + list_for_each_entry(hose, &hose_list, list_node) { phb = hose->private_data; @@ -1303,12 +1305,10 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) int state, ret = EEH_NEXT_ERR_NONE; /* - * While running here, it's safe to purge the event queue. - * And we should keep the cached OPAL notifier event sychronized - * between the kernel and firmware. + * While running here, it's safe to purge the event queue. The + * event should still be masked. */ eeh_remove_event(NULL, false); - opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); list_for_each_entry(hose, &hose_list, list_node) { /* @@ -1477,6 +1477,10 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) break; } + /* Unmask the event */ + if (eeh_enabled()) + enable_irq(eeh_event_irq); + return ret; } -- cgit v1.2.3 From a295af24d0d2af238d74a994603407b72de157b3 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 15 May 2015 14:06:41 +1000 Subject: powernv/opal: Convert opal message events to opal irq domain This patch converts the opal message event to use the new opal irq domain. Signed-off-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 3baca718261a..cd5718b74d7c 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -362,33 +362,34 @@ static void opal_handle_message(void) opal_message_do_notify(type, (void *)&msg); } -static int opal_message_notify(struct notifier_block *nb, - unsigned long events, void *change) +static irqreturn_t opal_message_notify(int irq, void *data) { - if (events & OPAL_EVENT_MSG_PENDING) - opal_handle_message(); - return 0; + opal_handle_message(); + return IRQ_HANDLED; } -static struct notifier_block opal_message_nb = { - .notifier_call = opal_message_notify, - .next = NULL, - .priority = 0, -}; - static int __init opal_message_init(void) { - int ret, i; + int ret, i, irq; for (i = 0; i < OPAL_MSG_TYPE_MAX; i++) ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]); - ret = opal_notifier_register(&opal_message_nb); + irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING)); + if (!irq) { + pr_err("%s: Can't register OPAL event irq (%d)\n", + __func__, irq); + return irq; + } + + ret = request_irq(irq, opal_message_notify, + IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL); if (ret) { - pr_err("%s: Can't register OPAL event notifier (%d)\n", + pr_err("%s: Can't request OPAL event irq (%d)\n", __func__, ret); return ret; } + return 0; } -- cgit v1.2.3 From 74159a70283ba685cd4d856cfc4438596524b1f2 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 15 May 2015 14:06:42 +1000 Subject: powernv/elog: Convert elog to opal irq domain This patch converts the elog code to use the opal irq domain instead of notifier events. Signed-off-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-elog.c | 32 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 38ce757e5e2a..4949ef0d9400 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -10,6 +10,7 @@ */ #include #include +#include #include #include #include @@ -276,24 +277,15 @@ static void elog_work_fn(struct work_struct *work) static DECLARE_WORK(elog_work, elog_work_fn); -static int elog_event(struct notifier_block *nb, - unsigned long events, void *change) +static irqreturn_t elog_event(int irq, void *data) { - /* check for error log event */ - if (events & OPAL_EVENT_ERROR_LOG_AVAIL) - schedule_work(&elog_work); - return 0; + schedule_work(&elog_work); + return IRQ_HANDLED; } -static struct notifier_block elog_nb = { - .notifier_call = elog_event, - .next = NULL, - .priority = 0 -}; - int __init opal_elog_init(void) { - int rc = 0; + int rc = 0, irq; /* ELOG not supported by firmware */ if (!opal_check_token(OPAL_ELOG_READ)) @@ -305,10 +297,18 @@ int __init opal_elog_init(void) return -1; } - rc = opal_notifier_register(&elog_nb); + irq = opal_event_request(ilog2(OPAL_EVENT_ERROR_LOG_AVAIL)); + if (!irq) { + pr_err("%s: Can't register OPAL event irq (%d)\n", + __func__, irq); + return irq; + } + + rc = request_irq(irq, elog_event, + IRQ_TYPE_LEVEL_HIGH, "opal-elog", NULL); if (rc) { - pr_err("%s: Can't register OPAL event notifier (%d)\n", - __func__, rc); + pr_err("%s: Can't request OPAL event irq (%d)\n", + __func__, rc); return rc; } -- cgit v1.2.3 From 8034f715f0a5fce97b14bacd47eefb1513316b17 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 15 May 2015 14:06:43 +1000 Subject: powernv/opal-dump: Convert to irq domain Convert the opal dump driver to the new opal irq domain. Signed-off-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-dump.c | 56 +++++++++--------------------- 1 file changed, 17 insertions(+), 39 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 5aa9c1ce4de3..2ee96431f736 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -60,7 +61,7 @@ static ssize_t dump_type_show(struct dump_obj *dump_obj, struct dump_attribute *attr, char *buf) { - + return sprintf(buf, "0x%x %s\n", dump_obj->type, dump_type_to_string(dump_obj->type)); } @@ -363,7 +364,7 @@ static struct dump_obj *create_dump_obj(uint32_t id, size_t size, return dump; } -static int process_dump(void) +static irqreturn_t process_dump(int irq, void *data) { int rc; uint32_t dump_id, dump_size, dump_type; @@ -387,45 +388,13 @@ static int process_dump(void) if (!dump) return -1; - return 0; -} - -static void dump_work_fn(struct work_struct *work) -{ - process_dump(); + return IRQ_HANDLED; } -static DECLARE_WORK(dump_work, dump_work_fn); - -static void schedule_process_dump(void) -{ - schedule_work(&dump_work); -} - -/* - * New dump available notification - * - * Once we get notification, we add sysfs entries for it. - * We only fetch the dump on demand, and create sysfs asynchronously. - */ -static int dump_event(struct notifier_block *nb, - unsigned long events, void *change) -{ - if (events & OPAL_EVENT_DUMP_AVAIL) - schedule_process_dump(); - - return 0; -} - -static struct notifier_block dump_nb = { - .notifier_call = dump_event, - .next = NULL, - .priority = 0 -}; - void __init opal_platform_dump_init(void) { int rc; + int dump_irq; /* ELOG not supported by firmware */ if (!opal_check_token(OPAL_DUMP_READ)) @@ -445,10 +414,19 @@ void __init opal_platform_dump_init(void) return; } - rc = opal_notifier_register(&dump_nb); + dump_irq = opal_event_request(ilog2(OPAL_EVENT_DUMP_AVAIL)); + if (!dump_irq) { + pr_err("%s: Can't register OPAL event irq (%d)\n", + __func__, dump_irq); + return; + } + + rc = request_threaded_irq(dump_irq, NULL, process_dump, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + "opal-dump", NULL); if (rc) { - pr_warn("%s: Can't register OPAL event notifier (%d)\n", - __func__, rc); + pr_err("%s: Can't request OPAL event irq (%d)\n", + __func__, rc); return; } -- cgit v1.2.3 From 81f2f7ce4c5bb688ad691cb3ee37e81ca26a8a3b Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 15 May 2015 14:06:44 +1000 Subject: opal: Remove events notifier All users of the old opal events notifier have been converted over to the irq domain so remove the event notifier functions. Signed-off-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-irqchip.c | 16 ++--- arch/powerpc/platforms/powernv/opal.c | 84 +-------------------------- arch/powerpc/platforms/powernv/powernv.h | 1 - arch/powerpc/platforms/powernv/setup.c | 2 +- 4 files changed, 8 insertions(+), 95 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index bd5125dcf0d7..841135f48981 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -100,7 +100,6 @@ void opal_handle_events(uint64_t events) { int virq, hwirq = 0; u64 mask = opal_event_irqchip.mask; - u64 notifier_mask = 0; if (!in_irq() && (events & mask)) { last_outstanding_events = events; @@ -108,19 +107,16 @@ void opal_handle_events(uint64_t events) return; } - while (events) { + while (events & mask) { hwirq = fls64(events) - 1; - virq = irq_find_mapping(opal_event_irqchip.domain, - hwirq); - if (virq) { - if (BIT_ULL(hwirq) & mask) + if (BIT_ULL(hwirq) & mask) { + virq = irq_find_mapping(opal_event_irqchip.domain, + hwirq); + if (virq) generic_handle_irq(virq); - } else - notifier_mask |= BIT_ULL(hwirq); + } events &= ~BIT_ULL(hwirq); } - - opal_do_notifier(notifier_mask); } static irqreturn_t opal_interrupt(int irq, void *data) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index cd5718b74d7c..8403307c5362 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -53,11 +53,7 @@ static int mc_recoverable_range_len; struct device_node *opal_node; static DEFINE_SPINLOCK(opal_write_lock); -static ATOMIC_NOTIFIER_HEAD(opal_notifier_head); static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX]; -static DEFINE_SPINLOCK(opal_notifier_lock); -static uint64_t last_notified_mask = 0x0ul; -static atomic_t opal_notifier_hold = ATOMIC_INIT(0); static uint32_t opal_heartbeat; static void opal_reinit_cores(void) @@ -223,82 +219,6 @@ static int __init opal_register_exception_handlers(void) } machine_early_initcall(powernv, opal_register_exception_handlers); -int opal_notifier_register(struct notifier_block *nb) -{ - if (!nb) { - pr_warning("%s: Invalid argument (%p)\n", - __func__, nb); - return -EINVAL; - } - - atomic_notifier_chain_register(&opal_notifier_head, nb); - return 0; -} -EXPORT_SYMBOL_GPL(opal_notifier_register); - -int opal_notifier_unregister(struct notifier_block *nb) -{ - if (!nb) { - pr_warning("%s: Invalid argument (%p)\n", - __func__, nb); - return -EINVAL; - } - - atomic_notifier_chain_unregister(&opal_notifier_head, nb); - return 0; -} -EXPORT_SYMBOL_GPL(opal_notifier_unregister); - -void opal_do_notifier(uint64_t events) -{ - unsigned long flags; - uint64_t changed_mask; - - if (atomic_read(&opal_notifier_hold)) - return; - - spin_lock_irqsave(&opal_notifier_lock, flags); - changed_mask = last_notified_mask ^ events; - last_notified_mask = events; - spin_unlock_irqrestore(&opal_notifier_lock, flags); - - /* - * We feed with the event bits and changed bits for - * enough information to the callback. - */ - atomic_notifier_call_chain(&opal_notifier_head, - events, (void *)changed_mask); -} - -void opal_notifier_update_evt(uint64_t evt_mask, - uint64_t evt_val) -{ - unsigned long flags; - - spin_lock_irqsave(&opal_notifier_lock, flags); - last_notified_mask &= ~evt_mask; - last_notified_mask |= evt_val; - spin_unlock_irqrestore(&opal_notifier_lock, flags); -} - -void opal_notifier_enable(void) -{ - int64_t rc; - __be64 evt = 0; - - atomic_set(&opal_notifier_hold, 0); - - /* Process pending events */ - rc = opal_poll_events(&evt); - if (rc == OPAL_SUCCESS && evt) - opal_do_notifier(be64_to_cpu(evt)); -} - -void opal_notifier_disable(void) -{ - atomic_set(&opal_notifier_hold, 1); -} - /* * Opal message notifier based on message type. Allow subscribers to get * notified for specific messgae type. @@ -570,10 +490,8 @@ int opal_handle_hmi_exception(struct pt_regs *regs) local_paca->hmi_event_available = 0; rc = opal_poll_events(&evt); - if (rc == OPAL_SUCCESS && evt) { - opal_do_notifier(be64_to_cpu(evt)); + if (rc == OPAL_SUCCESS && evt) opal_handle_events(be64_to_cpu(evt)); - } return 1; } diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 221d4c827fc5..f907f0a494da 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -35,7 +35,6 @@ extern u32 pnv_get_supported_cpuidle_states(void); extern void pnv_lpc_init(void); -extern void opal_do_notifier(uint64_t events); extern void opal_handle_events(uint64_t events); extern void opal_event_shutdown(void); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 509cdd2f7ad8..15e9337b392c 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -107,7 +107,7 @@ static void pnv_prepare_going_down(void) * Disable all notifiers from OPAL, we can't * service interrupts anymore anyway */ - opal_notifier_disable(); + opal_event_shutdown(); /* Soft disable interrupts */ local_irq_disable(); -- cgit v1.2.3 From e059b105d157d0231e2f0a7fba996724d856114b Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:27:54 +1000 Subject: powerpc: Add MSI operations to pci_controller_ops struct Add MSI setup and teardown functions to pci_controller_ops. Patch the callsites (arch_{setup,teardown}_msi_irqs) to prefer the controller ops version if it's available. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 6 ++++++ arch/powerpc/kernel/msi.c | 18 +++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 1811c44bf34b..a3b6252bcfc9 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -30,6 +30,12 @@ struct pci_controller_ops { /* Called during PCI resource reassignment */ resource_size_t (*window_alignment)(struct pci_bus *, unsigned long type); void (*reset_secondary_bus)(struct pci_dev *dev); + +#ifdef CONFIG_PCI_MSI + int (*setup_msi_irqs)(struct pci_dev *dev, + int nvec, int type); + void (*teardown_msi_irqs)(struct pci_dev *dev); +#endif }; /* diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c index 71bd161640cf..3d452f71fa25 100644 --- a/arch/powerpc/kernel/msi.c +++ b/arch/powerpc/kernel/msi.c @@ -15,7 +15,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - if (!ppc_md.setup_msi_irqs || !ppc_md.teardown_msi_irqs) { + struct pci_controller *phb = pci_bus_to_host(dev->bus); + + if ((!phb->controller_ops.setup_msi_irqs || + !phb->controller_ops.teardown_msi_irqs) && + (!ppc_md.setup_msi_irqs || !ppc_md.teardown_msi_irqs)) { pr_debug("msi: Platform doesn't provide MSI callbacks.\n"); return -ENOSYS; } @@ -24,10 +28,18 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; - return ppc_md.setup_msi_irqs(dev, nvec, type); + if (phb->controller_ops.setup_msi_irqs) + return phb->controller_ops.setup_msi_irqs(dev, nvec, type); + else + return ppc_md.setup_msi_irqs(dev, nvec, type); } void arch_teardown_msi_irqs(struct pci_dev *dev) { - ppc_md.teardown_msi_irqs(dev); + struct pci_controller *phb = pci_bus_to_host(dev->bus); + + if (phb->controller_ops.teardown_msi_irqs) + phb->controller_ops.teardown_msi_irqs(dev); + else + ppc_md.teardown_msi_irqs(dev); } -- cgit v1.2.3 From d6381119a415316c5602710cf7fbf388d15d6d3c Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:27:55 +1000 Subject: powerpc/powernv: Move MSI-related ops to pci_controller_ops Move the PowerNV/BML platform to use the pci_controller_ops structure rather than ppc_md for MSI related PCI controller operations. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index bca2aeb6e4b6..0f04940b7fb0 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -768,16 +768,14 @@ void __init pnv_pci_init(void) ppc_md.tce_free_rm = pnv_tce_free_rm; ppc_md.tce_get = pnv_tce_get; set_pci_dma_ops(&dma_iommu_ops); - - /* Configure MSIs */ -#ifdef CONFIG_PCI_MSI - ppc_md.setup_msi_irqs = pnv_setup_msi_irqs; - ppc_md.teardown_msi_irqs = pnv_teardown_msi_irqs; -#endif } machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init); struct pci_controller_ops pnv_pci_controller_ops = { .dma_dev_setup = pnv_pci_dma_dev_setup, +#ifdef CONFIG_PCI_MSI + .setup_msi_irqs = pnv_setup_msi_irqs, + .teardown_msi_irqs = pnv_teardown_msi_irqs, +#endif }; -- cgit v1.2.3 From 7e3d6c5a4be45769a9d439a4b62ad85cfe9e6754 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:27:56 +1000 Subject: powerpc/cell: Move MSI-related ops to pci_controller_ops Move the Cell platform to use the pci_controller_ops structure rather than ppc_md for MSI related PCI controller operations. We can be confident that the functions will be added to the platform's ops struct before any PCI controller's ops struct is populated because: 1) These ops are added to the struct in a subsys initcall. We populate the ops in axon_msi_probe, which is the probe call for the axon-msi driver. However the driver is registered in axon_msi_init, which is a subsys initcall, so this will happen at the subsys level. 2) The controller recieves the struct later, in a device initcall. Cell populates the controller in cell_setup_phb, which is hooked up to ppc_md.pci_setup_phb. ppc_md.pci_setup_phb is only ever called in of_platform.c, as part of the OpenFirmware PCI driver's probe routine. That driver is registered in a device initcall, so it will occur *after* the struct is properly populated. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/cell/axon_msi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 623bd961465a..d9b8a443458f 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -22,6 +22,7 @@ #include #include +#include "cell.h" /* * MSIC registers, specified as offsets from dcr_base @@ -406,8 +407,8 @@ static int axon_msi_probe(struct platform_device *device) dev_set_drvdata(&device->dev, msic); - ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs; - ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs; + cell_pci_controller_ops.setup_msi_irqs = axon_msi_setup_msi_irqs; + cell_pci_controller_ops.teardown_msi_irqs = axon_msi_teardown_msi_irqs; axon_msi_debug_setup(dn, msic); -- cgit v1.2.3 From 9f6b8029787bb37170d4535e9fc09158f634282c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 17 May 2015 16:20:07 +0200 Subject: KVM: use kvm_memslots whenever possible kvm_memslots provides lockdep checking. Use it consistently instead of explicit dereferencing of kvm->memslots. Reviewed-by: Radim Krcmar Signed-off-by: Paolo Bonzini --- arch/arm/kvm/mmu.c | 3 ++- arch/mips/kvm/mips.c | 4 +++- arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 8 ++++++-- arch/powerpc/kvm/book3s_pr.c | 4 +++- arch/s390/kvm/kvm-s390.c | 4 +++- arch/x86/kvm/x86.c | 4 +++- virt/kvm/kvm_main.c | 16 ++++++++++------ 8 files changed, 31 insertions(+), 14 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 1d5accbd3dcf..6f0f8f3ac7df 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1155,7 +1155,8 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) */ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) { - struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot); + struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memory_slot *memslot = id_to_memslot(slots, slot); phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index a8e660a44474..bc5ddd973b44 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -968,6 +968,7 @@ out: /* Get (and clear) the dirty memory log for a memory slot. */ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { + struct kvm_memslots *slots; struct kvm_memory_slot *memslot; unsigned long ga, ga_end; int is_dirty = 0; @@ -982,7 +983,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) /* If nothing is dirty, don't bother messing with page tables. */ if (is_dirty) { - memslot = id_to_memslot(kvm->memslots, log->slot); + slots = kvm_memslots(kvm); + memslot = id_to_memslot(slots, log->slot); ga = memslot->base_gfn << PAGE_SHIFT; ga_end = ga + (memslot->npages << PAGE_SHIFT); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 1a4acf8bf4f4..dab68b7af3f2 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -650,7 +650,7 @@ static void kvmppc_rmap_reset(struct kvm *kvm) int srcu_idx; srcu_idx = srcu_read_lock(&kvm->srcu); - slots = kvm->memslots; + slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, slots) { /* * This assumes it is acceptable to lose reference and diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index df81caab7383..6aff5a990492 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2321,6 +2321,7 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm, static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, struct kvm_dirty_log *log) { + struct kvm_memslots *slots; struct kvm_memory_slot *memslot; int r; unsigned long n; @@ -2331,7 +2332,8 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, if (log->slot >= KVM_USER_MEM_SLOTS) goto out; - memslot = id_to_memslot(kvm->memslots, log->slot); + slots = kvm_memslots(kvm); + memslot = id_to_memslot(slots, log->slot); r = -ENOENT; if (!memslot->dirty_bitmap) goto out; @@ -2384,6 +2386,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, const struct kvm_memory_slot *old) { unsigned long npages = mem->memory_size >> PAGE_SHIFT; + struct kvm_memslots *slots; struct kvm_memory_slot *memslot; if (npages && old->npages) { @@ -2393,7 +2396,8 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, * since the rmap array starts out as all zeroes, * i.e. no pages are dirty. */ - memslot = id_to_memslot(kvm->memslots, mem->slot); + slots = kvm_memslots(kvm); + memslot = id_to_memslot(slots, mem->slot); kvmppc_hv_get_dirty_log(kvm, memslot, NULL); } } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index f57383941d03..c01dfc798c66 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1530,6 +1530,7 @@ out: static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm, struct kvm_dirty_log *log) { + struct kvm_memslots *slots; struct kvm_memory_slot *memslot; struct kvm_vcpu *vcpu; ulong ga, ga_end; @@ -1545,7 +1546,8 @@ static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm, /* If nothing is dirty, don't bother messing with page tables. */ if (is_dirty) { - memslot = id_to_memslot(kvm->memslots, log->slot); + slots = kvm_memslots(kvm); + memslot = id_to_memslot(slots, log->slot); ga = memslot->base_gfn << PAGE_SHIFT; ga_end = ga + (memslot->npages << PAGE_SHIFT); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d461f8a15c07..a05107e9b2bf 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -236,6 +236,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, { int r; unsigned long n; + struct kvm_memslots *slots; struct kvm_memory_slot *memslot; int is_dirty = 0; @@ -245,7 +246,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (log->slot >= KVM_USER_MEM_SLOTS) goto out; - memslot = id_to_memslot(kvm->memslots, log->slot); + slots = kvm_memslots(kvm); + memslot = id_to_memslot(slots, log->slot); r = -ENOENT; if (!memslot->dirty_bitmap) goto out; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d42d8ace90f1..8918e23e0e8e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7782,6 +7782,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old, enum kvm_mr_change change) { + struct kvm_memslots *slots; struct kvm_memory_slot *new; int nr_mmu_pages = 0; @@ -7803,7 +7804,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); /* It's OK to get 'new' slot here as it has already been installed */ - new = id_to_memslot(kvm->memslots, mem->slot); + slots = kvm_memslots(kvm); + new = id_to_memslot(slots, mem->slot); /* * Dirty logging tracks sptes in 4k granularity, meaning that large diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e299763ef744..42df724071c0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -734,7 +734,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) static struct kvm_memslots *install_new_memslots(struct kvm *kvm, struct kvm_memslots *slots) { - struct kvm_memslots *old_memslots = kvm->memslots; + struct kvm_memslots *old_memslots = kvm_memslots(kvm); /* * Set the low bit in the generation, which disables SPTE caching @@ -799,7 +799,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) goto out; - slot = id_to_memslot(kvm->memslots, mem->slot); + slot = id_to_memslot(kvm_memslots(kvm), mem->slot); base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; npages = mem->memory_size >> PAGE_SHIFT; @@ -842,7 +842,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { /* Check for overlaps */ r = -EEXIST; - kvm_for_each_memslot(slot, kvm->memslots) { + kvm_for_each_memslot(slot, kvm_memslots(kvm)) { if ((slot->id >= KVM_USER_MEM_SLOTS) || (slot->id == mem->slot)) continue; @@ -873,7 +873,7 @@ int __kvm_set_memory_region(struct kvm *kvm, slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); if (!slots) goto out_free; - memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); + memcpy(slots, kvm_memslots(kvm), sizeof(struct kvm_memslots)); if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { slot = id_to_memslot(slots, mem->slot); @@ -966,6 +966,7 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, int *is_dirty) { + struct kvm_memslots *slots; struct kvm_memory_slot *memslot; int r, i; unsigned long n; @@ -975,7 +976,8 @@ int kvm_get_dirty_log(struct kvm *kvm, if (log->slot >= KVM_USER_MEM_SLOTS) goto out; - memslot = id_to_memslot(kvm->memslots, log->slot); + slots = kvm_memslots(kvm); + memslot = id_to_memslot(slots, log->slot); r = -ENOENT; if (!memslot->dirty_bitmap) goto out; @@ -1024,6 +1026,7 @@ EXPORT_SYMBOL_GPL(kvm_get_dirty_log); int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log, bool *is_dirty) { + struct kvm_memslots *slots; struct kvm_memory_slot *memslot; int r, i; unsigned long n; @@ -1034,7 +1037,8 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, if (log->slot >= KVM_USER_MEM_SLOTS) goto out; - memslot = id_to_memslot(kvm->memslots, log->slot); + slots = kvm_memslots(kvm); + memslot = id_to_memslot(slots, log->slot); dirty_bitmap = memslot->dirty_bitmap; r = -ENOENT; -- cgit v1.2.3 From 09170a49422bd786be3eac5cec1955257c5a34b7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 May 2015 13:59:39 +0200 Subject: KVM: const-ify uses of struct kvm_userspace_memory_region Architecture-specific helpers are not supposed to muck with struct kvm_userspace_memory_region contents. Add const to enforce this. In order to eliminate the only write in __kvm_set_memory_region, the cleaning of deleted slots is pulled up from update_memslots to __kvm_set_memory_region. Reviewed-by: Takuya Yoshikawa Reviewed-by: Radim Krcmar Signed-off-by: Paolo Bonzini --- arch/arm/kvm/mmu.c | 4 ++-- arch/mips/kvm/mips.c | 4 ++-- arch/powerpc/include/asm/kvm_ppc.h | 8 ++++---- arch/powerpc/kvm/book3s.c | 4 ++-- arch/powerpc/kvm/book3s_hv.c | 4 ++-- arch/powerpc/kvm/book3s_pr.c | 4 ++-- arch/powerpc/kvm/booke.c | 4 ++-- arch/powerpc/kvm/powerpc.c | 4 ++-- arch/s390/kvm/kvm-s390.c | 4 ++-- arch/x86/kvm/x86.c | 4 ++-- include/linux/kvm_host.h | 8 ++++---- virt/kvm/kvm_main.c | 22 +++++++++++----------- 12 files changed, 37 insertions(+), 37 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 6f0f8f3ac7df..b5c023a37aec 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1719,7 +1719,7 @@ out: } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, enum kvm_mr_change change) { @@ -1734,7 +1734,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { hva_t hva = mem->userspace_addr; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index bc5ddd973b44..5963e2e8a6d7 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -198,14 +198,14 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { return 0; } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, enum kvm_mr_change change) { diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index b8475daad884..aff563b5f001 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -182,9 +182,9 @@ extern int kvmppc_core_create_memslot(struct kvm *kvm, unsigned long npages); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region *mem); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old); extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info); @@ -243,9 +243,9 @@ struct kvmppc_ops { void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot); int (*prepare_memory_region)(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region *mem); void (*commit_memory_region)(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old); int (*unmap_hva)(struct kvm *kvm, unsigned long hva); int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 453a8a47a467..60aa0726dccc 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -757,13 +757,13 @@ void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem) + const struct kvm_userspace_memory_region *mem) { return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem); } void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old) { kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6aff5a990492..ed493d123268 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2376,13 +2376,13 @@ static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem) + const struct kvm_userspace_memory_region *mem) { return 0; } static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old) { unsigned long npages = mem->memory_size >> PAGE_SHIFT; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index c01dfc798c66..0873e766df1b 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1573,13 +1573,13 @@ static void kvmppc_core_flush_memslot_pr(struct kvm *kvm, static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem) + const struct kvm_userspace_memory_region *mem) { return 0; } static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old) { return; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 3872ab31c80a..518e3a8b351f 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1784,13 +1784,13 @@ int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem) + const struct kvm_userspace_memory_region *mem) { return 0; } void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old) { } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8cd1f80fdc70..5985bb2a332b 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -595,14 +595,14 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { return kvmppc_core_prepare_memory_region(kvm, memslot, mem); } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, enum kvm_mr_change change) { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index a05107e9b2bf..994f9c37f25f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2582,7 +2582,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { /* A few sanity checks. We can have memory slots which have to be @@ -2600,7 +2600,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, enum kvm_mr_change change) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8918e23e0e8e..30854ea218e7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7700,7 +7700,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm) int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { /* @@ -7778,7 +7778,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, } void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, enum kvm_mr_change change) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 87fd74a04005..fbced7015ebd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -501,9 +501,9 @@ enum kvm_mr_change { }; int kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region *mem); int __kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem); + const struct kvm_userspace_memory_region *mem); void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont); int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, @@ -511,10 +511,10 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, void kvm_arch_memslots_updated(struct kvm *kvm); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change); void kvm_arch_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, + const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, enum kvm_mr_change change); bool kvm_largepages_enabled(void); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 42df724071c0..fc2dbe1c34fc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -676,8 +676,6 @@ static void update_memslots(struct kvm_memslots *slots, WARN_ON(mslots[i].id != id); if (!new->npages) { WARN_ON(!mslots[i].npages); - new->base_gfn = 0; - new->flags = 0; if (mslots[i].npages) slots->used_slots--; } else { @@ -717,7 +715,7 @@ static void update_memslots(struct kvm_memslots *slots, slots->id_to_index[mslots[i].id] = i; } -static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) +static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem) { u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; @@ -767,7 +765,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, * Must be called holding kvm->slots_lock for write. */ int __kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) + const struct kvm_userspace_memory_region *mem) { int r; gfn_t base_gfn; @@ -806,9 +804,6 @@ int __kvm_set_memory_region(struct kvm *kvm, if (npages > KVM_MEM_MAX_NR_PAGES) goto out; - if (!npages) - mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; - new = old = *slot; new.id = mem->slot; @@ -834,10 +829,14 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out; } } - } else if (old.npages) { + } else { + if (!old.npages) + goto out; + change = KVM_MR_DELETE; - } else /* Modify a non-existent slot: disallowed. */ - goto out; + new.base_gfn = 0; + new.flags = 0; + } if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { /* Check for overlaps */ @@ -944,7 +943,7 @@ out: EXPORT_SYMBOL_GPL(__kvm_set_memory_region); int kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) + const struct kvm_userspace_memory_region *mem) { int r; @@ -960,6 +959,7 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, { if (mem->slot >= KVM_USER_MEM_SLOTS) return -EINVAL; + return kvm_set_memory_region(kvm, mem); } -- cgit v1.2.3 From 15f46015ee17681b542432df21747f5c51857156 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 17 May 2015 21:26:08 +0200 Subject: KVM: add memslots argument to kvm_arch_memslots_updated Prepare for the case of multiple address spaces. Reviewed-by: Radim Krcmar Signed-off-by: Paolo Bonzini --- arch/arm/kvm/mmu.c | 2 +- arch/mips/include/asm/kvm_host.h | 2 +- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/s390/include/asm/kvm_host.h | 2 +- arch/x86/kvm/x86.c | 2 +- include/linux/kvm_host.h | 2 +- include/linux/kvm_types.h | 1 + virt/kvm/kvm_main.c | 2 +- 8 files changed, 8 insertions(+), 7 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index b5c023a37aec..e9ac084d21ea 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1839,7 +1839,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, return 0; } -void kvm_arch_memslots_updated(struct kvm *kvm) +void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) { } diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 4c25823563fe..e8c8d9d0c45f 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -839,7 +839,7 @@ static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index a193a13cf08b..d91f65b28e32 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -698,7 +698,7 @@ struct kvm_vcpu_arch { static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_exit(void) {} diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 444c412307cb..3024acbe1f9d 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -636,7 +636,7 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 30854ea218e7..f0aec85f8cdd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7689,7 +7689,7 @@ out_free: return -ENOMEM; } -void kvm_arch_memslots_updated(struct kvm *kvm) +void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) { /* * memslots->generation has been incremented. diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fbced7015ebd..8815f1dffb77 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -508,7 +508,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont); int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages); -void kvm_arch_memslots_updated(struct kvm *kvm); +void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, const struct kvm_userspace_memory_region *mem, diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 931da7e917cf..1b47a185c2f0 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -28,6 +28,7 @@ struct kvm_run; struct kvm_userspace_memory_region; struct kvm_vcpu; struct kvm_vcpu_init; +struct kvm_memslots; enum kvm_mr_change; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index fc2dbe1c34fc..4361204a6348 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -751,7 +751,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, */ slots->generation++; - kvm_arch_memslots_updated(kvm); + kvm_arch_memslots_updated(kvm, slots); return old_memslots; } -- cgit v1.2.3 From 06931e62246844c73fba24d7aeb4a5dc897a2739 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 26 May 2015 15:11:28 +0200 Subject: sched/topology: Rename topology_thread_cpumask() to topology_sibling_cpumask() Rename topology_thread_cpumask() to topology_sibling_cpumask() for more consistency with scheduler code. Signed-off-by: Bartosz Golaszewski Reviewed-by: Thomas Gleixner Acked-by: Russell King Acked-by: Catalin Marinas Cc: Benoit Cousson Cc: Fenghua Yu Cc: Guenter Roeck Cc: Jean Delvare Cc: Jonathan Corbet Cc: Linus Torvalds Cc: Oleg Drokin Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Russell King Cc: Viresh Kumar Link: http://lkml.kernel.org/r/1432645896-12588-2-git-send-email-bgolaszewski@baylibre.com Signed-off-by: Ingo Molnar --- Documentation/cputopology.txt | 2 +- arch/arm/include/asm/topology.h | 2 +- arch/arm64/include/asm/topology.h | 2 +- arch/ia64/include/asm/topology.h | 2 +- arch/mips/include/asm/topology.h | 2 +- arch/powerpc/include/asm/topology.h | 2 +- arch/powerpc/mm/tlb_nohash.c | 2 +- arch/s390/include/asm/topology.h | 3 ++- arch/sparc/include/asm/topology_64.h | 2 +- arch/tile/include/asm/topology.h | 2 +- arch/x86/include/asm/topology.h | 2 +- arch/x86/kernel/cpu/perf_event_intel.c | 6 +++--- block/blk-mq-cpumap.c | 2 +- drivers/acpi/acpi_pad.c | 2 +- drivers/base/topology.c | 2 +- drivers/net/ethernet/sfc/efx.c | 2 +- drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c | 2 +- drivers/staging/lustre/lustre/ptlrpc/service.c | 4 ++-- include/linux/topology.h | 6 +++--- lib/cpu_rmap.c | 2 +- 20 files changed, 26 insertions(+), 25 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt index 0aad6deb2d96..428a961ff063 100644 --- a/Documentation/cputopology.txt +++ b/Documentation/cputopology.txt @@ -44,7 +44,7 @@ these macros in include/asm-XXX/topology.h: #define topology_physical_package_id(cpu) #define topology_core_id(cpu) #define topology_book_id(cpu) -#define topology_thread_cpumask(cpu) +#define topology_sibling_cpumask(cpu) #define topology_core_cpumask(cpu) #define topology_book_cpumask(cpu) diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index 2fe85fff5cca..370f7a732900 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -18,7 +18,7 @@ extern struct cputopo_arm cpu_topology[NR_CPUS]; #define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id) #define topology_core_id(cpu) (cpu_topology[cpu].core_id) #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) -#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) +#define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 7ebcd31ce51c..225ec3524fbf 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -18,7 +18,7 @@ extern struct cpu_topology cpu_topology[NR_CPUS]; #define topology_physical_package_id(cpu) (cpu_topology[cpu].cluster_id) #define topology_core_id(cpu) (cpu_topology[cpu].core_id) #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) -#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) +#define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 6437ca21f61b..3ad8f6988363 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -53,7 +53,7 @@ void build_cpu_to_node_map(void); #define topology_physical_package_id(cpu) (cpu_data(cpu)->socket_id) #define topology_core_id(cpu) (cpu_data(cpu)->core_id) #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) -#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) +#define topology_sibling_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) #endif extern void arch_fix_phys_package_id(int num, u32 slot); diff --git a/arch/mips/include/asm/topology.h b/arch/mips/include/asm/topology.h index 3e307ec2afba..7afda4150a59 100644 --- a/arch/mips/include/asm/topology.h +++ b/arch/mips/include/asm/topology.h @@ -15,7 +15,7 @@ #define topology_physical_package_id(cpu) (cpu_data[cpu].package) #define topology_core_id(cpu) (cpu_data[cpu].core) #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) -#define topology_thread_cpumask(cpu) (&cpu_sibling_map[cpu]) +#define topology_sibling_cpumask(cpu) (&cpu_sibling_map[cpu]) #endif #endif /* __ASM_TOPOLOGY_H */ diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 5f1048eaa5b6..8b3b46b7b0f2 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -87,7 +87,7 @@ static inline int prrn_is_enabled(void) #include #define topology_physical_package_id(cpu) (cpu_to_chip_id(cpu)) -#define topology_thread_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) +#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_core_id(cpu) (cpu_to_core_id(cpu)) #endif diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index cbd3d069897f..723a099f6be3 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -217,7 +217,7 @@ static DEFINE_RAW_SPINLOCK(tlbivax_lock); static int mm_is_core_local(struct mm_struct *mm) { return cpumask_subset(mm_cpumask(mm), - topology_thread_cpumask(smp_processor_id())); + topology_sibling_cpumask(smp_processor_id())); } struct tlb_flush_param { diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index b1453a2ae1ca..4990f6c66288 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -22,7 +22,8 @@ DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology); #define topology_physical_package_id(cpu) (per_cpu(cpu_topology, cpu).socket_id) #define topology_thread_id(cpu) (per_cpu(cpu_topology, cpu).thread_id) -#define topology_thread_cpumask(cpu) (&per_cpu(cpu_topology, cpu).thread_mask) +#define topology_sibling_cpumask(cpu) \ + (&per_cpu(cpu_topology, cpu).thread_mask) #define topology_core_id(cpu) (per_cpu(cpu_topology, cpu).core_id) #define topology_core_cpumask(cpu) (&per_cpu(cpu_topology, cpu).core_mask) #define topology_book_id(cpu) (per_cpu(cpu_topology, cpu).book_id) diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index ed8f071132e4..9a928fcb7a9b 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -41,7 +41,7 @@ static inline int pcibus_to_node(struct pci_bus *pbus) #define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) #define topology_core_id(cpu) (cpu_data(cpu).core_id) #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) -#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) +#define topology_sibling_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) #endif /* CONFIG_SMP */ extern cpumask_t cpu_core_map[NR_CPUS]; diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h index 938311844233..76b0d0ebb244 100644 --- a/arch/tile/include/asm/topology.h +++ b/arch/tile/include/asm/topology.h @@ -55,7 +55,7 @@ static inline const struct cpumask *cpumask_of_node(int node) #define topology_physical_package_id(cpu) ((void)(cpu), 0) #define topology_core_id(cpu) (cpu) #define topology_core_cpumask(cpu) ((void)(cpu), cpu_online_mask) -#define topology_thread_cpumask(cpu) cpumask_of(cpu) +#define topology_sibling_cpumask(cpu) cpumask_of(cpu) #endif #endif /* _ASM_TILE_TOPOLOGY_H */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 0e8f04f2c26f..5a77593fdace 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -124,7 +124,7 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); #ifdef ENABLE_TOPO_DEFINES #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) -#define topology_thread_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) +#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) #endif static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 3998131d1a68..324817735771 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2621,7 +2621,7 @@ static void intel_pmu_cpu_starting(int cpu) if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) { void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED]; - for_each_cpu(i, topology_thread_cpumask(cpu)) { + for_each_cpu(i, topology_sibling_cpumask(cpu)) { struct intel_shared_regs *pc; pc = per_cpu(cpu_hw_events, i).shared_regs; @@ -2641,7 +2641,7 @@ static void intel_pmu_cpu_starting(int cpu) if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { int h = x86_pmu.num_counters >> 1; - for_each_cpu(i, topology_thread_cpumask(cpu)) { + for_each_cpu(i, topology_sibling_cpumask(cpu)) { struct intel_excl_cntrs *c; c = per_cpu(cpu_hw_events, i).excl_cntrs; @@ -3403,7 +3403,7 @@ static __init int fixup_ht_bug(void) if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED)) return 0; - w = cpumask_weight(topology_thread_cpumask(cpu)); + w = cpumask_weight(topology_sibling_cpumask(cpu)); if (w > 1) { pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n"); return 0; diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 5f13f4d0bcce..1e28ddb656b8 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -24,7 +24,7 @@ static int get_first_sibling(unsigned int cpu) { unsigned int ret; - ret = cpumask_first(topology_thread_cpumask(cpu)); + ret = cpumask_first(topology_sibling_cpumask(cpu)); if (ret < nr_cpu_ids) return ret; diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index 6bc9cbc01ad6..00b39802d7ec 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -105,7 +105,7 @@ static void round_robin_cpu(unsigned int tsk_index) mutex_lock(&round_robin_lock); cpumask_clear(tmp); for_each_cpu(cpu, pad_busy_cpus) - cpumask_or(tmp, tmp, topology_thread_cpumask(cpu)); + cpumask_or(tmp, tmp, topology_sibling_cpumask(cpu)); cpumask_andnot(tmp, cpu_online_mask, tmp); /* avoid HT sibilings if possible */ if (cpumask_empty(tmp)) diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 6491f45200a7..8b7d7f8e5851 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -61,7 +61,7 @@ static DEVICE_ATTR_RO(physical_package_id); define_id_show_func(core_id); static DEVICE_ATTR_RO(core_id); -define_siblings_show_func(thread_siblings, thread_cpumask); +define_siblings_show_func(thread_siblings, sibling_cpumask); static DEVICE_ATTR_RO(thread_siblings); static DEVICE_ATTR_RO(thread_siblings_list); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 4b00545a3ace..65944dd8bf6b 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1304,7 +1304,7 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx) if (!cpumask_test_cpu(cpu, thread_mask)) { ++count; cpumask_or(thread_mask, thread_mask, - topology_thread_cpumask(cpu)); + topology_sibling_cpumask(cpu)); } } diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c index cc3ab351943e..f9262243f935 100644 --- a/drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c +++ b/drivers/staging/lustre/lustre/libcfs/linux/linux-cpu.c @@ -87,7 +87,7 @@ static void cfs_cpu_core_siblings(int cpu, cpumask_t *mask) /* return cpumask of HTs in the same core */ static void cfs_cpu_ht_siblings(int cpu, cpumask_t *mask) { - cpumask_copy(mask, topology_thread_cpumask(cpu)); + cpumask_copy(mask, topology_sibling_cpumask(cpu)); } static void cfs_node_to_cpumask(int node, cpumask_t *mask) diff --git a/drivers/staging/lustre/lustre/ptlrpc/service.c b/drivers/staging/lustre/lustre/ptlrpc/service.c index 8e61421515cb..344189ac5698 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/service.c +++ b/drivers/staging/lustre/lustre/ptlrpc/service.c @@ -557,7 +557,7 @@ ptlrpc_server_nthreads_check(struct ptlrpc_service *svc, * there are. */ /* weight is # of HTs */ - if (cpumask_weight(topology_thread_cpumask(0)) > 1) { + if (cpumask_weight(topology_sibling_cpumask(0)) > 1) { /* depress thread factor for hyper-thread */ factor = factor - (factor >> 1) + (factor >> 3); } @@ -2768,7 +2768,7 @@ int ptlrpc_hr_init(void) init_waitqueue_head(&ptlrpc_hr.hr_waitq); - weight = cpumask_weight(topology_thread_cpumask(0)); + weight = cpumask_weight(topology_sibling_cpumask(0)); cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) { hrp->hrp_cpt = i; diff --git a/include/linux/topology.h b/include/linux/topology.h index 909b6e43b694..73ddad1e0fa3 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -191,8 +191,8 @@ static inline int cpu_to_mem(int cpu) #ifndef topology_core_id #define topology_core_id(cpu) ((void)(cpu), 0) #endif -#ifndef topology_thread_cpumask -#define topology_thread_cpumask(cpu) cpumask_of(cpu) +#ifndef topology_sibling_cpumask +#define topology_sibling_cpumask(cpu) cpumask_of(cpu) #endif #ifndef topology_core_cpumask #define topology_core_cpumask(cpu) cpumask_of(cpu) @@ -201,7 +201,7 @@ static inline int cpu_to_mem(int cpu) #ifdef CONFIG_SCHED_SMT static inline const struct cpumask *cpu_smt_mask(int cpu) { - return topology_thread_cpumask(cpu); + return topology_sibling_cpumask(cpu); } #endif diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c index 4f134d8907a7..f610b2a10b3e 100644 --- a/lib/cpu_rmap.c +++ b/lib/cpu_rmap.c @@ -191,7 +191,7 @@ int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, /* Update distances based on topology */ for_each_cpu(cpu, update_mask) { if (cpu_rmap_copy_neigh(rmap, cpu, - topology_thread_cpumask(cpu), 1)) + topology_sibling_cpumask(cpu), 1)) continue; if (cpu_rmap_copy_neigh(rmap, cpu, topology_core_cpumask(cpu), 2)) -- cgit v1.2.3 From a76ff6884bfedfafcbb0d9c84c7a6b6a546cba6d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 1 Apr 2015 08:19:59 -0700 Subject: powerpc: Fix smp_mb__before_spinlock() Currently, smp_mb__before_spinlock() is defined to be smp_wmb() in core code, but this is not sufficient on PowerPC. This patch therefore supplies an override for the generic definition to strengthen smp_mb__before_spinlock() to smp_mb(), as is needed on PowerPC. Signed-off-by: Paul E. McKenney Cc: --- arch/powerpc/include/asm/barrier.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index a3bf5be111ff..1124f59b8df4 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -89,5 +89,6 @@ do { \ #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() +#define smp_mb__before_spinlock() smp_mb() #endif /* _ASM_POWERPC_BARRIER_H */ -- cgit v1.2.3 From f36f3f2846b5578d62910ee0b6dbef59fdd1cfa4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 May 2015 13:20:23 +0200 Subject: KVM: add "new" argument to kvm_arch_commit_memory_region This lets the function access the new memory slot without going through kvm_memslots and id_to_memslot. It will simplify the code when more than one address space will be supported. Unfortunately, the "const"ness of the new argument must be casted away in two places. Fixing KVM to accept const struct kvm_memory_slot pointers would require modifications in pretty much all architectures, and is left for later. Reviewed-by: Radim Krcmar Signed-off-by: Paolo Bonzini --- arch/arm/kvm/mmu.c | 1 + arch/mips/kvm/mips.c | 1 + arch/powerpc/include/asm/kvm_ppc.h | 6 ++++-- arch/powerpc/kvm/book3s.c | 5 +++-- arch/powerpc/kvm/book3s_hv.c | 3 ++- arch/powerpc/kvm/book3s_pr.c | 3 ++- arch/powerpc/kvm/booke.c | 3 ++- arch/powerpc/kvm/powerpc.c | 3 ++- arch/s390/kvm/kvm-s390.c | 1 + arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 6 ++++-- arch/x86/kvm/x86.c | 13 +++++-------- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 2 +- 14 files changed, 30 insertions(+), 20 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index e9ac084d21ea..7f473e6d3bf5 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1721,6 +1721,7 @@ out: void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, enum kvm_mr_change change) { /* diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 5963e2e8a6d7..cd4c129ce743 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -207,6 +207,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, enum kvm_mr_change change) { unsigned long npages = 0; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index aff563b5f001..c6ef05bd0765 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -185,7 +185,8 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old); + const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new); extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info); extern void kvmppc_core_flush_memslot(struct kvm *kvm, @@ -246,7 +247,8 @@ struct kvmppc_ops { const struct kvm_userspace_memory_region *mem); void (*commit_memory_region)(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old); + const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new); int (*unmap_hva)(struct kvm *kvm, unsigned long hva); int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, unsigned long end); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 60aa0726dccc..05ea8fc7f829 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -764,9 +764,10 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old) + const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new) { - kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old); + kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new); } int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ed493d123268..68d067ad4222 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2383,7 +2383,8 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old) + const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new) { unsigned long npages = mem->memory_size >> PAGE_SHIFT; struct kvm_memslots *slots; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 0873e766df1b..64891b081ad5 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1580,7 +1580,8 @@ static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm, static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old) + const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new) { return; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 518e3a8b351f..cc5842657161 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1791,7 +1791,8 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, - const struct kvm_memory_slot *old) + const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new) { } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 5985bb2a332b..e5dde32fe71f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -604,9 +604,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, enum kvm_mr_change change) { - kvmppc_core_commit_memory_region(kvm, mem, old); + kvmppc_core_commit_memory_region(kvm, mem, old, new); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 994f9c37f25f..8ad4b9a5667f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2602,6 +2602,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, enum kvm_mr_change change) { int rc; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1a4d6a054749..7276107b35df 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -874,7 +874,7 @@ void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, struct kvm_memory_slot *memslot); void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, - struct kvm_memory_slot *memslot); + const struct kvm_memory_slot *memslot); void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot); void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 49c34e632b91..1bf2ae9ca521 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4621,10 +4621,12 @@ restart: } void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, - struct kvm_memory_slot *memslot) + const struct kvm_memory_slot *memslot) { + /* FIXME: const-ify all uses of struct kvm_memory_slot. */ spin_lock(&kvm->mmu_lock); - slot_handle_leaf(kvm, memslot, kvm_mmu_zap_collapsible_spte, true); + slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot, + kvm_mmu_zap_collapsible_spte, true); spin_unlock(&kvm->mmu_lock); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f0aec85f8cdd..ba7b0cc52fed 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7780,13 +7780,12 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, enum kvm_mr_change change) { - struct kvm_memslots *slots; - struct kvm_memory_slot *new; int nr_mmu_pages = 0; - if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) { + if (change == KVM_MR_DELETE && old->id >= KVM_USER_MEM_SLOTS) { int ret; ret = vm_munmap(old->userspace_addr, @@ -7803,10 +7802,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, if (nr_mmu_pages) kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); - /* It's OK to get 'new' slot here as it has already been installed */ - slots = kvm_memslots(kvm); - new = id_to_memslot(slots, mem->slot); - /* * Dirty logging tracks sptes in 4k granularity, meaning that large * sptes have to be split. If live migration is successful, the guest @@ -7831,9 +7826,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * been zapped so no dirty logging staff is needed for old slot. For * KVM_MR_FLAGS_ONLY, the old slot is essentially the same one as the * new and it's also covered when dealing with the new slot. + * + * FIXME: const-ify all uses of struct kvm_memory_slot. */ if (change != KVM_MR_DELETE) - kvm_mmu_slot_apply_flags(kvm, new); + kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new); } void kvm_arch_flush_shadow_all(struct kvm *kvm) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8815f1dffb77..9bd3bc16be87 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -516,6 +516,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, enum kvm_mr_change change); bool kvm_largepages_enabled(void); void kvm_disable_largepages(void); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4361204a6348..9f67c942d8ee 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -912,7 +912,7 @@ int __kvm_set_memory_region(struct kvm *kvm, update_memslots(slots, &new); old_memslots = install_new_memslots(kvm, slots); - kvm_arch_commit_memory_region(kvm, mem, &old, change); + kvm_arch_commit_memory_region(kvm, mem, &old, &new, change); kvm_free_memslot(kvm, &old, &new); kvfree(old_memslots); -- cgit v1.2.3 From b01aec9b2c7d32f17a37553df63efa9f7c0fdaa0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 21 May 2015 19:59:31 +0200 Subject: EDAC: Cleanup atomic_scrub mess So first of all, this atomic_scrub() function's naming is bad. It looks like an atomic_t helper. Change it to edac_atomic_scrub(). The bigger problem is that this function is arch-specific and every new arch which doesn't necessarily need that functionality still needs to define it, otherwise EDAC doesn't compile. So instead of doing that and including arch-specific headers, have each arch define an EDAC_ATOMIC_SCRUB symbol which can be used in edac_mc.c for ifdeffery. Much cleaner. And we already are doing this with another symbol - EDAC_SUPPORT. This is also much cleaner than having CONFIG_EDAC enumerate all the arches which need/have EDAC support and drivers. This way I can kill the useless edac.h header in tile too. Acked-by: Ralf Baechle Acked-by: Michael Ellerman Acked-by: Chris Metcalf Acked-by: Ingo Molnar Acked-by: Russell King Cc: Benjamin Herrenschmidt Cc: Doug Thompson Cc: linux-arm-kernel@lists.infradead.org Cc: linux-edac@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: linuxppc-dev@lists.ozlabs.org Cc: "Maciej W. Rozycki" Cc: Markos Chandras Cc: Mauro Carvalho Chehab Cc: Paul Mackerras Cc: "Steven J. Hill" Cc: x86@kernel.org Signed-off-by: Borislav Petkov --- arch/arm/Kconfig | 2 ++ arch/arm/include/asm/edac.h | 5 +++-- arch/mips/Kconfig | 1 + arch/mips/include/asm/edac.h | 4 ++-- arch/powerpc/Kconfig | 2 ++ arch/powerpc/include/asm/edac.h | 4 ++-- arch/tile/Kconfig | 1 + arch/tile/include/asm/edac.h | 29 ----------------------------- arch/x86/Kconfig | 2 ++ arch/x86/include/asm/edac.h | 2 +- drivers/edac/Kconfig | 7 ++++--- drivers/edac/edac_mc.c | 9 +++++++-- 12 files changed, 27 insertions(+), 41 deletions(-) delete mode 100644 arch/tile/include/asm/edac.h (limited to 'arch/powerpc') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 45df48ba0b12..325d6f3a596a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -15,6 +15,8 @@ config ARM select CLONE_BACKWARDS select CPU_PM if (SUSPEND || CPU_IDLE) select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS + select EDAC_SUPPORT + select EDAC_ATOMIC_SCRUB select GENERIC_ALLOCATOR select GENERIC_ATOMIC64 if (CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI) select GENERIC_CLOCKEVENTS_BROADCAST if SMP diff --git a/arch/arm/include/asm/edac.h b/arch/arm/include/asm/edac.h index 0df7a2c1fc3d..5189fa819b60 100644 --- a/arch/arm/include/asm/edac.h +++ b/arch/arm/include/asm/edac.h @@ -18,11 +18,12 @@ #define ASM_EDAC_H /* * ECC atomic, DMA, SMP and interrupt safe scrub function. - * Implements the per arch atomic_scrub() that EDAC use for software + * Implements the per arch edac_atomic_scrub() that EDAC use for software * ECC scrubbing. It reads memory and then writes back the original * value, allowing the hardware to detect and correct memory errors. */ -static inline void atomic_scrub(void *va, u32 size) + +static inline void edac_atomic_scrub(void *va, u32 size) { #if __LINUX_ARM_ARCH__ >= 6 unsigned int *virt_addr = va; diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index f5016656494f..b65edf514b40 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -819,6 +819,7 @@ config CAVIUM_OCTEON_SOC select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN select EDAC_SUPPORT + select EDAC_ATOMIC_SCRUB select SYS_SUPPORTS_LITTLE_ENDIAN select SYS_SUPPORTS_HOTPLUG_CPU if CPU_BIG_ENDIAN select SYS_HAS_EARLY_PRINTK diff --git a/arch/mips/include/asm/edac.h b/arch/mips/include/asm/edac.h index 94105d3f58f4..980b16527374 100644 --- a/arch/mips/include/asm/edac.h +++ b/arch/mips/include/asm/edac.h @@ -5,7 +5,7 @@ /* ECC atomic, DMA, SMP and interrupt safe scrub function */ -static inline void atomic_scrub(void *va, u32 size) +static inline void edac_atomic_scrub(void *va, u32 size) { unsigned long *virt_addr = va; unsigned long temp; @@ -21,7 +21,7 @@ static inline void atomic_scrub(void *va, u32 size) __asm__ __volatile__ ( " .set mips2 \n" - "1: ll %0, %1 # atomic_scrub \n" + "1: ll %0, %1 # edac_atomic_scrub \n" " addu %0, $0 \n" " sc %0, %1 \n" " beqz %0, 1b \n" diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 190cc48abc0c..5ef27113b898 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -153,6 +153,8 @@ config PPC select NO_BOOTMEM select HAVE_GENERIC_RCU_GUP select HAVE_PERF_EVENTS_NMI if PPC64 + select EDAC_SUPPORT + select EDAC_ATOMIC_SCRUB config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/include/asm/edac.h b/arch/powerpc/include/asm/edac.h index 6ead88bbfbb8..5571e23d253e 100644 --- a/arch/powerpc/include/asm/edac.h +++ b/arch/powerpc/include/asm/edac.h @@ -12,11 +12,11 @@ #define ASM_EDAC_H /* * ECC atomic, DMA, SMP and interrupt safe scrub function. - * Implements the per arch atomic_scrub() that EDAC use for software + * Implements the per arch edac_atomic_scrub() that EDAC use for software * ECC scrubbing. It reads memory and then writes back the original * value, allowing the hardware to detect and correct memory errors. */ -static __inline__ void atomic_scrub(void *va, u32 size) +static __inline__ void edac_atomic_scrub(void *va, u32 size) { unsigned int *virt_addr = va; unsigned int temp; diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index a07e31b50d3f..59cf0b911898 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -28,6 +28,7 @@ config TILE select HAVE_DEBUG_STACKOVERFLOW select ARCH_WANT_FRAME_POINTERS select HAVE_CONTEXT_TRACKING + select EDAC_SUPPORT # FIXME: investigate whether we need/want these options. # select HAVE_IOREMAP_PROT diff --git a/arch/tile/include/asm/edac.h b/arch/tile/include/asm/edac.h deleted file mode 100644 index 87fc83eeaffd..000000000000 --- a/arch/tile/include/asm/edac.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright 2011 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#ifndef _ASM_TILE_EDAC_H -#define _ASM_TILE_EDAC_H - -/* ECC atomic, DMA, SMP and interrupt safe scrub function */ - -static inline void atomic_scrub(void *va, u32 size) -{ - /* - * These is nothing to be done here because CE is - * corrected by the mshim. - */ - return; -} - -#endif /* _ASM_TILE_EDAC_H */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 226d5696e1d1..482c160a9fe9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -143,6 +143,8 @@ config X86 select ACPI_LEGACY_TABLES_LOOKUP if ACPI select X86_FEATURE_NAMES if PROC_FS select SRCU + select EDAC_SUPPORT + select EDAC_ATOMIC_SCRUB config INSTRUCTION_DECODER def_bool y diff --git a/arch/x86/include/asm/edac.h b/arch/x86/include/asm/edac.h index e9b57ecc70c5..cf8fdf83b231 100644 --- a/arch/x86/include/asm/edac.h +++ b/arch/x86/include/asm/edac.h @@ -3,7 +3,7 @@ /* ECC atomic, DMA, SMP and interrupt safe scrub function */ -static inline void atomic_scrub(void *va, u32 size) +static inline void edac_atomic_scrub(void *va, u32 size) { u32 i, *virt_addr = va; diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 2d2530cdf99d..a90e06ac1631 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -2,15 +2,16 @@ # EDAC Kconfig # Copyright (c) 2008 Doug Thompson www.softwarebitmaker.com # Licensed and distributed under the GPL -# + +config EDAC_ATOMIC_SCRUB + bool config EDAC_SUPPORT bool menuconfig EDAC bool "EDAC (Error Detection And Correction) reporting" - depends on HAS_IOMEM - depends on X86 || PPC || TILE || ARM || EDAC_SUPPORT + depends on HAS_IOMEM && EDAC_SUPPORT help EDAC is designed to report errors in the core system. These are low-level errors that are reported in the CPU or diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index af3be1914dbb..943ed8cf71b9 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -30,11 +30,16 @@ #include #include #include -#include #include "edac_core.h" #include "edac_module.h" #include +#ifdef CONFIG_EDAC_ATOMIC_SCRUB +#include +#else +#define edac_atomic_scrub(va, size) do { } while (0) +#endif + /* lock to memory controller's control array */ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); @@ -874,7 +879,7 @@ static void edac_mc_scrub_block(unsigned long page, unsigned long offset, virt_addr = kmap_atomic(pg); /* Perform architecture specific atomic scrub operation */ - atomic_scrub(virt_addr + offset, size); + edac_atomic_scrub(virt_addr + offset, size); /* Unmap and complete */ kunmap_atomic(virt_addr); -- cgit v1.2.3 From d9fbe003ead7205cfd6e623f90f3eb9a56a0d445 Mon Sep 17 00:00:00 2001 From: Yangbo Lu Date: Fri, 15 May 2015 10:46:02 +0800 Subject: powerpc/dts: add eSDHC compatible list Add eSDHC compatible list for P2041/P3041/P4080/P5020/P5040. Signed-off-by: Yangbo Lu Acked-by: Scott Wood Signed-off-by: Ulf Hansson --- arch/powerpc/boot/dts/fsl/p2041si-post.dtsi | 1 + arch/powerpc/boot/dts/fsl/p3041si-post.dtsi | 1 + arch/powerpc/boot/dts/fsl/p4080si-post.dtsi | 1 + arch/powerpc/boot/dts/fsl/p5020si-post.dtsi | 1 + arch/powerpc/boot/dts/fsl/p5040si-post.dtsi | 1 + 5 files changed, 5 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi index f2feacfd9a25..b6a0e88ee5ce 100644 --- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi @@ -370,6 +370,7 @@ /include/ "qoriq-esdhc-0.dtsi" sdhc@114000 { + compatible = "fsl,p2041-esdhc", "fsl,esdhc"; fsl,iommu-parent = <&pamu1>; fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */ sdhci,auto-cmd12; diff --git a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi index d6fea37395ad..cf18f7bf824f 100644 --- a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi @@ -397,6 +397,7 @@ /include/ "qoriq-esdhc-0.dtsi" sdhc@114000 { + compatible = "fsl,p3041-esdhc", "fsl,esdhc"; fsl,iommu-parent = <&pamu1>; fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */ sdhci,auto-cmd12; diff --git a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi index 89482c9b2301..90431c0b53ad 100644 --- a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi @@ -469,6 +469,7 @@ /include/ "qoriq-esdhc-0.dtsi" sdhc@114000 { + compatible = "fsl,p4080-esdhc", "fsl,esdhc"; fsl,iommu-parent = <&pamu1>; fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */ voltage-ranges = <3300 3300>; diff --git a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi index 6e04851e2fc9..8be61d11349e 100644 --- a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi @@ -384,6 +384,7 @@ /include/ "qoriq-esdhc-0.dtsi" sdhc@114000 { + compatible = "fsl,p5020-esdhc", "fsl,esdhc"; fsl,iommu-parent = <&pamu1>; fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */ sdhci,auto-cmd12; diff --git a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi index 5e44dfa1e1a5..48e232f2d50d 100644 --- a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi @@ -362,6 +362,7 @@ /include/ "qoriq-esdhc-0.dtsi" sdhc@114000 { + compatible = "fsl,p5040-esdhc", "fsl,esdhc"; fsl,iommu-parent = <&pamu2>; fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */ sdhci,auto-cmd12; -- cgit v1.2.3 From 1d14b8755f0a7d8110f0bdc5b74987f8cc96c18e Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:27:57 +1000 Subject: powerpc/pseries: Move MSI-related ops to pci_controller_ops Move the pseries platform to use the pci_controller_ops structure rather than ppc_md for MSI related PCI controller operations We need to iterate all PHBs because the MSI setup happens later than find_and_init_phbs() - mpe. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/msi.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index c8d24f9a6948..c22bb647cce6 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -18,6 +18,8 @@ #include #include +#include "pseries.h" + static int query_token, change_token; #define RTAS_QUERY_FN 0 @@ -505,6 +507,8 @@ static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev) static int rtas_msi_init(void) { + struct pci_controller *phb; + query_token = rtas_token("ibm,query-interrupt-source-number"); change_token = rtas_token("ibm,change-msi"); @@ -516,9 +520,15 @@ static int rtas_msi_init(void) pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n"); - WARN_ON(ppc_md.setup_msi_irqs); - ppc_md.setup_msi_irqs = rtas_setup_msi_irqs; - ppc_md.teardown_msi_irqs = rtas_teardown_msi_irqs; + WARN_ON(pseries_pci_controller_ops.setup_msi_irqs); + pseries_pci_controller_ops.setup_msi_irqs = rtas_setup_msi_irqs; + pseries_pci_controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs; + + list_for_each_entry(phb, &hose_list, list_node) { + WARN_ON(phb->controller_ops.setup_msi_irqs); + phb->controller_ops.setup_msi_irqs = rtas_setup_msi_irqs; + phb->controller_ops.teardown_msi_irqs = rtas_teardown_msi_irqs; + } WARN_ON(ppc_md.pci_irq_fixup); ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup; -- cgit v1.2.3 From 00e25397032f590d0a4d0ee89e236a4d1f8c0580 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:27:58 +1000 Subject: powerpc/fsl_msi: Move MSI-related ops to pci_controller_ops Move the fsl_msi subsystem to use the pci_controller_ops structure rather than ppc_md for MSI related PCI controller operations. Previously, MSI ops were added to ppc_md at the subsys level. However, in fsl_pci.c, PCI controllers are created at the at arch level. So, unlike in e.g. PowerNV/pSeries/Cell, we can't simply populate a platform-level controller ops structure and have it copied into the controllers when they are created. Instead, walk every phb, and attempt to populate it with the MSI ops. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/fsl_msi.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index f086c6f22dc9..5236e5427c38 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -405,6 +405,7 @@ static int fsl_of_msi_probe(struct platform_device *dev) const struct fsl_msi_feature *features; int len; u32 offset; + struct pci_controller *phb; match = of_match_device(fsl_of_msi_ids, &dev->dev); if (!match) @@ -541,14 +542,20 @@ static int fsl_of_msi_probe(struct platform_device *dev) list_add_tail(&msi->list, &msi_head); - /* The multiple setting ppc_md.setup_msi_irqs will not harm things */ - if (!ppc_md.setup_msi_irqs) { - ppc_md.setup_msi_irqs = fsl_setup_msi_irqs; - ppc_md.teardown_msi_irqs = fsl_teardown_msi_irqs; - } else if (ppc_md.setup_msi_irqs != fsl_setup_msi_irqs) { - dev_err(&dev->dev, "Different MSI driver already installed!\n"); - err = -ENODEV; - goto error_out; + /* + * Apply the MSI ops to all the controllers. + * It doesn't hurt to reassign the same ops, + * but bail out if we find another MSI driver. + */ + list_for_each_entry(phb, &hose_list, list_node) { + if (!phb->controller_ops.setup_msi_irqs) { + phb->controller_ops.setup_msi_irqs = fsl_setup_msi_irqs; + phb->controller_ops.teardown_msi_irqs = fsl_teardown_msi_irqs; + } else if (phb->controller_ops.setup_msi_irqs != fsl_setup_msi_irqs) { + dev_err(&dev->dev, "Different MSI driver already installed!\n"); + err = -ENODEV; + goto error_out; + } } return 0; error_out: -- cgit v1.2.3 From b7eba2ffccb51d7676a4ddc26658687331feb6a3 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:27:59 +1000 Subject: powerpc/ppc4xx_msi: Move MSI-related ops to pci_controller_ops Move the ppc4xx msi subsystem to use the pci_controller_ops structure rather than ppc_md for MSI related PCI controller operations. As with fsl_msi, operations are plugged in at the subsys level, after controller creation. Again, we iterate over all controllers and populate them with the MSI ops. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/ppc4xx_msi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 6e2e6aa378bb..6eb21f2ea585 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -218,6 +218,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev) struct ppc4xx_msi *msi; struct resource res; int err = 0; + struct pci_controller *phb; dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n"); @@ -250,8 +251,10 @@ static int ppc4xx_msi_probe(struct platform_device *dev) } ppc4xx_msi = *msi; - ppc_md.setup_msi_irqs = ppc4xx_setup_msi_irqs; - ppc_md.teardown_msi_irqs = ppc4xx_teardown_msi_irqs; + list_for_each_entry(phb, &hose_list, list_node) { + phb->controller_ops.setup_msi_irqs = ppc4xx_setup_msi_irqs; + phb->controller_ops.teardown_msi_irqs = ppc4xx_teardown_msi_irqs; + } return err; error_out: -- cgit v1.2.3 From f2c800aaceb6f26fe78590f971169b1d3d6fe322 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:28:00 +1000 Subject: powerpc/ppc4xx_hsta_msi: Move MSI-related ops to pci_controller_ops Move the ppc4xx hsta msi subsystem to use the pci_controller_ops structure rather than ppc_md for MSI related PCI controller operations. As with fsl_msi, operations are plugged in at the subsys level, after controller creation. Again, we iterate over all controllers and populate them with the MSI ops. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/ppc4xx_hsta_msi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c index f366d2d4c079..2bc33674ebfc 100644 --- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c @@ -128,6 +128,7 @@ static int hsta_msi_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct resource *mem; int irq, ret, irq_count; + struct pci_controller *phb; mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (IS_ERR(mem)) { @@ -171,8 +172,10 @@ static int hsta_msi_probe(struct platform_device *pdev) } } - ppc_md.setup_msi_irqs = hsta_setup_msi_irqs; - ppc_md.teardown_msi_irqs = hsta_teardown_msi_irqs; + list_for_each_entry(phb, &hose_list, list_node) { + phb->controller_ops.setup_msi_irqs = hsta_setup_msi_irqs; + phb->controller_ops.teardown_msi_irqs = hsta_teardown_msi_irqs; + } return 0; out2: -- cgit v1.2.3 From 83922966973e19a48b6e59f9fa1259aa790a33c1 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:28:01 +1000 Subject: powerpc/pasemi: Move MSI-related ops to pci_controller_ops Move the PaSemi MPIC msi subsystem to use the pci_controller_ops structure rather than ppc_md for MSI related PCI controller operations. As with fsl_msi, operations are plugged in at the subsys level, after controller creation. Again, we iterate over all controllers and populate them with the MSI ops. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pasemi/msi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c index 0b3706604543..27f2b187a91b 100644 --- a/arch/powerpc/platforms/pasemi/msi.c +++ b/arch/powerpc/platforms/pasemi/msi.c @@ -142,6 +142,7 @@ static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) int mpic_pasemi_msi_init(struct mpic *mpic) { int rc; + struct pci_controller *phb; if (!mpic->irqhost->of_node || !of_device_is_compatible(mpic->irqhost->of_node, @@ -157,9 +158,11 @@ int mpic_pasemi_msi_init(struct mpic *mpic) pr_debug("pasemi_msi: Registering PA Semi MPIC MSI callbacks\n"); msi_mpic = mpic; - WARN_ON(ppc_md.setup_msi_irqs); - ppc_md.setup_msi_irqs = pasemi_msi_setup_msi_irqs; - ppc_md.teardown_msi_irqs = pasemi_msi_teardown_msi_irqs; + list_for_each_entry(phb, &hose_list, list_node) { + WARN_ON(phb->controller_ops.setup_msi_irqs); + phb->controller_ops.setup_msi_irqs = pasemi_msi_setup_msi_irqs; + phb->controller_ops.teardown_msi_irqs = pasemi_msi_teardown_msi_irqs; + } return 0; } -- cgit v1.2.3 From 14f95acda2f3238f3470beea0ddbf78081b3268d Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:28:02 +1000 Subject: powerpc/mpic_u3msi: Move MSI-related ops to pci_controller_ops Move the u3 MPIC msi subsystem to use the pci_controller_ops structure rather than ppc_md for MSI related PCI controller operations. As with fsl_msi, operations are plugged in at the subsys level, after controller creation. Again, we iterate over all controllers and populate them with the MSI ops. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/mpic_u3msi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index b2cef1809389..fc46ef3b816e 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -181,6 +181,7 @@ static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) int mpic_u3msi_init(struct mpic *mpic) { int rc; + struct pci_controller *phb; rc = mpic_msi_init_allocator(mpic); if (rc) { @@ -193,9 +194,11 @@ int mpic_u3msi_init(struct mpic *mpic) BUG_ON(msi_mpic); msi_mpic = mpic; - WARN_ON(ppc_md.setup_msi_irqs); - ppc_md.setup_msi_irqs = u3msi_setup_msi_irqs; - ppc_md.teardown_msi_irqs = u3msi_teardown_msi_irqs; + list_for_each_entry(phb, &hose_list, list_node) { + WARN_ON(phb->controller_ops.setup_msi_irqs); + phb->controller_ops.setup_msi_irqs = u3msi_setup_msi_irqs; + phb->controller_ops.teardown_msi_irqs = u3msi_teardown_msi_irqs; + } return 0; } -- cgit v1.2.3 From 1f88d5860e0b8244b28e21b63a521915e5c15313 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 14 Apr 2015 14:28:03 +1000 Subject: powerpc: Remove MSI-related PCI controller ops from ppc_md Remove unneeded ppc_md functions. Patch callsites to use pci_controller_ops functions exclusively. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/machdep.h | 6 ------ arch/powerpc/kernel/msi.c | 15 ++++----------- 2 files changed, 4 insertions(+), 17 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index ef8899432ae7..0d387d0570cd 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -131,12 +131,6 @@ struct machdep_calls { /* To setup PHBs when using automatic OF platform driver for PCI */ int (*pci_setup_phb)(struct pci_controller *host); -#ifdef CONFIG_PCI_MSI - int (*setup_msi_irqs)(struct pci_dev *dev, - int nvec, int type); - void (*teardown_msi_irqs)(struct pci_dev *dev); -#endif - void (*restart)(char *cmd); void (*halt)(void); void (*panic)(char *str); diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c index 3d452f71fa25..dab616a33b8d 100644 --- a/arch/powerpc/kernel/msi.c +++ b/arch/powerpc/kernel/msi.c @@ -17,9 +17,8 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { struct pci_controller *phb = pci_bus_to_host(dev->bus); - if ((!phb->controller_ops.setup_msi_irqs || - !phb->controller_ops.teardown_msi_irqs) && - (!ppc_md.setup_msi_irqs || !ppc_md.teardown_msi_irqs)) { + if (!phb->controller_ops.setup_msi_irqs || + !phb->controller_ops.teardown_msi_irqs) { pr_debug("msi: Platform doesn't provide MSI callbacks.\n"); return -ENOSYS; } @@ -28,18 +27,12 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; - if (phb->controller_ops.setup_msi_irqs) - return phb->controller_ops.setup_msi_irqs(dev, nvec, type); - else - return ppc_md.setup_msi_irqs(dev, nvec, type); + return phb->controller_ops.setup_msi_irqs(dev, nvec, type); } void arch_teardown_msi_irqs(struct pci_dev *dev) { struct pci_controller *phb = pci_bus_to_host(dev->bus); - if (phb->controller_ops.teardown_msi_irqs) - phb->controller_ops.teardown_msi_irqs(dev); - else - ppc_md.teardown_msi_irqs(dev); + phb->controller_ops.teardown_msi_irqs(dev); } -- cgit v1.2.3 From 92ae03532619dc24fdb7a5ae8ea63785fbd39f86 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 28 Apr 2015 15:12:05 +1000 Subject: powerpc/powernv: Specialise pci_controller_ops for each controller type Remove powernv generic PCI controller operations. Replace it with controller ops for each of the two supported PHBs. As an added bonus, make the two new structs const, which will help guard against bugs such as the one introduced in 65ebf4b63 ("powerpc/powernv: Move controller ops from ppc_md to controller_ops") Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 16 ++++++++++++---- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 10 +++++++++- arch/powerpc/platforms/powernv/pci.c | 14 +++----------- arch/powerpc/platforms/powernv/pci.h | 4 ++++ arch/powerpc/platforms/powernv/powernv.h | 2 -- 5 files changed, 28 insertions(+), 18 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index f8bc950efcae..23125f46f865 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2648,6 +2648,17 @@ static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) OPAL_ASSERT_RESET); } +static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { + .dma_dev_setup = pnv_pci_dma_dev_setup, +#ifdef CONFIG_PCI_MSI + .setup_msi_irqs = pnv_setup_msi_irqs, + .teardown_msi_irqs = pnv_teardown_msi_irqs, +#endif + .enable_device_hook = pnv_pci_enable_device_hook, + .window_alignment = pnv_pci_window_alignment, + .reset_secondary_bus = pnv_pci_reset_secondary_bus, +}; + static void __init pnv_pci_init_ioda_phb(struct device_node *np, u64 hub_id, int ioda_type) { @@ -2808,10 +2819,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, * the child P2P bridges) can form individual PE. */ ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; - pnv_pci_controller_ops.enable_device_hook = pnv_pci_enable_device_hook; - pnv_pci_controller_ops.window_alignment = pnv_pci_window_alignment; - pnv_pci_controller_ops.reset_secondary_bus = pnv_pci_reset_secondary_bus; - hose->controller_ops = pnv_pci_controller_ops; + hose->controller_ops = pnv_pci_ioda_controller_ops; #ifdef CONFIG_PCI_IOV ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources; diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 4729ca793813..7f826e8cc1d4 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -95,6 +95,14 @@ static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, set_iommu_table_base_and_group(&pdev->dev, &phb->p5ioc2.iommu_table); } +static const struct pci_controller_ops pnv_pci_p5ioc2_controller_ops = { + .dma_dev_setup = pnv_pci_dma_dev_setup, +#ifdef CONFIG_PCI_MSI + .setup_msi_irqs = pnv_setup_msi_irqs, + .teardown_msi_irqs = pnv_teardown_msi_irqs, +#endif +}; + static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, void *tce_mem, u64 tce_size) { @@ -133,7 +141,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, phb->hose->first_busno = 0; phb->hose->last_busno = 0xff; phb->hose->private_data = phb; - phb->hose->controller_ops = pnv_pci_controller_ops; + phb->hose->controller_ops = pnv_pci_p5ioc2_controller_ops; phb->hub_id = hub_id; phb->opal_id = phb_id; phb->type = PNV_PHB_P5IOC2; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 0f04940b7fb0..dfad2ba13bdf 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -45,7 +45,7 @@ //#define cfg_dbg(fmt...) printk(fmt) #ifdef CONFIG_PCI_MSI -static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; @@ -94,7 +94,7 @@ static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return 0; } -static void pnv_teardown_msi_irqs(struct pci_dev *pdev) +void pnv_teardown_msi_irqs(struct pci_dev *pdev) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; @@ -662,7 +662,7 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl, tbl->it_type = TCE_PCI; } -static void pnv_pci_dma_dev_setup(struct pci_dev *pdev) +void pnv_pci_dma_dev_setup(struct pci_dev *pdev) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; @@ -771,11 +771,3 @@ void __init pnv_pci_init(void) } machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init); - -struct pci_controller_ops pnv_pci_controller_ops = { - .dma_dev_setup = pnv_pci_dma_dev_setup, -#ifdef CONFIG_PCI_MSI - .setup_msi_irqs = pnv_setup_msi_irqs, - .teardown_msi_irqs = pnv_teardown_msi_irqs, -#endif -}; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 070ee888fc95..15fde52cb5cd 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -218,4 +218,8 @@ extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); +extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev); +extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); +extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); + #endif /* __POWERNV_PCI_H */ diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index f907f0a494da..addd15cdbabc 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -29,8 +29,6 @@ static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) } #endif -extern struct pci_controller_ops pnv_pci_controller_ops; - extern u32 pnv_get_supported_cpuidle_states(void); extern void pnv_lpc_init(void); -- cgit v1.2.3 From 3405c2570fd68fc5ccc703c8de9c23abf5e95819 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 28 Apr 2015 15:12:06 +1000 Subject: powerpc/pci: add dma_set_mask to pci_controller_ops Some systems only need to deal with DMA masks for PCI devices. For these systems, we can avoid the need for a platform hook and instead use a pci controller based hook. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 2 ++ arch/powerpc/kernel/dma.c | 8 ++++++++ 2 files changed, 10 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index a3b6252bcfc9..6d17bb8498bf 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -36,6 +36,8 @@ struct pci_controller_ops { int nvec, int type); void (*teardown_msi_irqs)(struct pci_dev *dev); #endif + + int (*dma_set_mask)(struct pci_dev *dev, u64 dma_mask); }; /* diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 484b2d4462c1..35e4dcc5dce3 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -248,6 +248,14 @@ int dma_set_mask(struct device *dev, u64 dma_mask) { if (ppc_md.dma_set_mask) return ppc_md.dma_set_mask(dev, dma_mask); + + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_controller *phb = pci_bus_to_host(pdev->bus); + if (phb->controller_ops.dma_set_mask) + return phb->controller_ops.dma_set_mask(pdev, dma_mask); + } + return __dma_set_mask(dev, dma_mask); } EXPORT_SYMBOL(dma_set_mask); -- cgit v1.2.3 From 763d2d8df1ee2b92ff09cd58f6034021e2cabf6d Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 28 Apr 2015 15:12:07 +1000 Subject: powerpc/powernv: Move dma_set_mask() from pnv_phb to pci_controller_ops Previously, dma_set_mask() on powernv was convoluted: 0) Call dma_set_mask() (a/p/kernel/dma.c) 1) In dma_set_mask(), ppc_md.dma_set_mask() exists, so call it. 2) On powernv, that function pointer is pnv_dma_set_mask(). In pnv_dma_set_mask(), the device is pci, so call pnv_pci_dma_set_mask(). 3) In pnv_pci_dma_set_mask(), call pnv_phb->set_dma_mask() if it exists. 4) It only exists in the ioda case, where it points to pnv_pci_ioda_dma_set_mask(), which is the final function. So the call chain is: dma_set_mask() -> pnv_dma_set_mask() -> pnv_pci_dma_set_mask() -> pnv_pci_ioda_dma_set_mask() Both ppc_md and pnv_phb function pointers are used. Rip out the ppc_md call, pnv_dma_set_mask() and pnv_pci_dma_set_mask(). Instead: 0) Call dma_set_mask() (a/p/kernel/dma.c) 1) In dma_set_mask(), the device is pci, and pci_controller_ops.dma_set_mask() exists, so call pci_controller_ops.dma_set_mask() 2) In the ioda case, that points to pnv_pci_ioda_dma_set_mask(). The new call chain is dma_set_mask() -> pnv_pci_ioda_dma_set_mask() Now only the pci_controller_ops function pointer is used. The fallback paths for p5ioc2 are the same. Previously, pnv_pci_dma_set_mask() would find no pnv_phb->set_dma_mask() function, to it would call __set_dma_mask(). Now, dma_set_mask() finds no ppc_md call or pci_controller_ops call, so it calls __set_dma_mask(). Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 7 ++++--- arch/powerpc/platforms/powernv/pci.c | 10 ---------- arch/powerpc/platforms/powernv/pci.h | 2 -- arch/powerpc/platforms/powernv/powernv.h | 6 ------ arch/powerpc/platforms/powernv/setup.c | 8 -------- 5 files changed, 4 insertions(+), 29 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 23125f46f865..508f530e367b 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1601,9 +1601,10 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev set_iommu_table_base_and_group(&pdev->dev, pe->tce32_table); } -static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, - struct pci_dev *pdev, u64 dma_mask) +static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) { + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn = pci_get_pdn(pdev); struct pnv_ioda_pe *pe; uint64_t top; @@ -2657,6 +2658,7 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .enable_device_hook = pnv_pci_enable_device_hook, .window_alignment = pnv_pci_window_alignment, .reset_secondary_bus = pnv_pci_reset_secondary_bus, + .dma_set_mask = pnv_pci_ioda_dma_set_mask, }; static void __init pnv_pci_init_ioda_phb(struct device_node *np, @@ -2802,7 +2804,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, /* Setup TCEs */ phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; - phb->dma_set_mask = pnv_pci_ioda_dma_set_mask; phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask; /* Setup shutdown function for kexec */ diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index dfad2ba13bdf..8a557b5aabf7 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -689,16 +689,6 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev) phb->dma_dev_setup(phb, pdev); } -int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) -{ - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct pnv_phb *phb = hose->private_data; - - if (phb && phb->dma_set_mask) - return phb->dma_set_mask(phb, pdev, dma_mask); - return __dma_set_mask(&pdev->dev, dma_mask); -} - u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 15fde52cb5cd..ac8686c853e6 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -106,8 +106,6 @@ struct pnv_phb { unsigned int hwirq, unsigned int virq, unsigned int is_64, struct msi_msg *msg); void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev); - int (*dma_set_mask)(struct pnv_phb *phb, struct pci_dev *pdev, - u64 dma_mask); u64 (*dma_get_required_mask)(struct pnv_phb *phb, struct pci_dev *pdev); void (*fixup_phb)(struct pci_controller *hose); diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index addd15cdbabc..9269e30e4ca0 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -12,17 +12,11 @@ struct pci_dev; #ifdef CONFIG_PCI extern void pnv_pci_init(void); extern void pnv_pci_shutdown(void); -extern int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask); extern u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev); #else static inline void pnv_pci_init(void) { } static inline void pnv_pci_shutdown(void) { } -static inline int pnv_pci_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) -{ - return -ENODEV; -} - static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev) { return 0; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 15e9337b392c..53737e019ae3 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -165,13 +165,6 @@ static void pnv_progress(char *s, unsigned short hex) { } -static int pnv_dma_set_mask(struct device *dev, u64 dma_mask) -{ - if (dev_is_pci(dev)) - return pnv_pci_dma_set_mask(to_pci_dev(dev), dma_mask); - return __dma_set_mask(dev, dma_mask); -} - static u64 pnv_dma_get_required_mask(struct device *dev) { if (dev_is_pci(dev)) @@ -321,7 +314,6 @@ define_machine(powernv) { .machine_shutdown = pnv_shutdown, .power_save = power7_idle, .calibrate_decr = generic_calibrate_decr, - .dma_set_mask = pnv_dma_set_mask, .dma_get_required_mask = pnv_dma_get_required_mask, #ifdef CONFIG_KEXEC .kexec_cpu_down = pnv_kexec_cpu_down, -- cgit v1.2.3 From 09f3f326cda6b6e2dfc6471c7f8ac77696c87419 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 1 Jun 2015 21:11:35 +1000 Subject: powerpc/mm: Fix build break with STRICT_MM_TYPECHECKS && DEBUG_PAGEALLOC If both STRICT_MM_TYPECHECKS and DEBUG_PAGEALLOC are enabled, the code in kernel_map_linear_page() is built, and so we fail with: arch/powerpc/mm/hash_utils_64.c:1478:2: error: incompatible type for argument 1 of 'htab_convert_pte_flags' Fix it by using pgprot_val(). Reported-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index fda236f908eb..a04ca922f0db 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1475,7 +1475,7 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) unsigned long hash; unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); - unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL); + unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL)); long ret; hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); -- cgit v1.2.3 From 72e349f1124a114435e599479c9b8d14bfd1ebcd Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 15:10:24 +1000 Subject: powerpc/perf: Fix book3s kernel to userspace backtraces When we take a PMU exception or a software event we call perf_read_regs(). This overloads regs->result with a boolean that describes if we should use the sampled instruction address register (SIAR) or the regs. If the exception is in kernel, we start with the kernel regs and backtrace through the kernel stack. At this point we switch to the userspace regs and backtrace the user stack with perf_callchain_user(). Unfortunately these regs have not got the perf_read_regs() treatment, so regs->result could be anything. If it is non zero, perf_instruction_pointer() decides to use the SIAR, and we get issues like this: 0.11% qemu-system-ppc [kernel.kallsyms] [k] _raw_spin_lock_irqsave | ---_raw_spin_lock_irqsave | |--52.35%-- 0 | | | |--46.39%-- __hrtimer_start_range_ns | | kvmppc_run_core | | kvmppc_vcpu_run_hv | | kvmppc_vcpu_run | | kvm_arch_vcpu_ioctl_run | | kvm_vcpu_ioctl | | do_vfs_ioctl | | sys_ioctl | | system_call | | | | | |--67.08%-- _raw_spin_lock_irqsave <--- hi mum | | | | | | | --100.00%-- 0x7e714 | | | 0x7e714 Notice the bogus _raw_spin_irqsave when we transition from kernel (system_call) to userspace (0x7e714). We inserted what was in the SIAR. Add a check in regs_use_siar() to check that the regs in question are from a PMU exception. With this fix the backtrace makes sense: 0.47% qemu-system-ppc [kernel.vmlinux] [k] _raw_spin_lock_irqsave | ---_raw_spin_lock_irqsave | |--53.83%-- 0 | | | |--44.73%-- hrtimer_try_to_cancel | | kvmppc_start_thread | | kvmppc_run_core | | kvmppc_vcpu_run_hv | | kvmppc_vcpu_run | | kvm_arch_vcpu_ioctl_run | | kvm_vcpu_ioctl | | do_vfs_ioctl | | sys_ioctl | | system_call | | __ioctl | | 0x7e714 | | 0x7e714 Cc: stable@vger.kernel.org Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/perf/core-book3s.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 12b638425bb9..d90893b76e7c 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -131,7 +131,16 @@ static void pmao_restore_workaround(bool ebb) { } static bool regs_use_siar(struct pt_regs *regs) { - return !!regs->result; + /* + * When we take a performance monitor exception the regs are setup + * using perf_read_regs() which overloads some fields, in particular + * regs->result to tell us whether to use SIAR. + * + * However if the regs are from another exception, eg. a syscall, then + * they have not been setup using perf_read_regs() and so regs->result + * is something random. + */ + return ((TRAP(regs) == 0xf00) && regs->result); } /* -- cgit v1.2.3 From 05b05f28fb3835087b3d4e741fd561b9826fe461 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 15:46:54 +1000 Subject: powerpc: Relocatable system call no longer uses the LR We had some code to restore the LR in the relocatable system call path back when we used the LR to do an indirect branch. Commit 6a404806dfce ("powerpc: Avoid link stack corruption in MMU on syscall entry path") changed this to use the CTR which is volatile across system calls so does not need restoring. Remove the stale comment and the restore of the LR. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 9519e6bdc6d7..cf8c1b55e6c8 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -59,14 +59,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ #if defined(CONFIG_RELOCATABLE) /* - * We can't branch directly; in the direct case we use LR - * and system_call_entry restores LR. (We thus need to move - * LR to r10 in the RFID case too.) + * We can't branch directly so we do it via the CTR which + * is volatile across system calls. */ #define SYSCALL_PSERIES_2_DIRECT \ mflr r10 ; \ ld r12,PACAKBASE(r13) ; \ - LOAD_HANDLER(r12, system_call_entry_direct) ; \ + LOAD_HANDLER(r12, system_call_entry) ; \ mtctr r12 ; \ mfspr r12,SPRN_SRR1 ; \ /* Re-use of r13... No spare regs to do this */ \ @@ -80,7 +79,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ mfspr r12,SPRN_SRR1 ; \ li r10,MSR_RI ; \ mtmsrd r10,1 ; /* Set RI (EE=0) */ \ - b system_call_entry_direct ; + b system_call_entry ; #endif /* @@ -969,13 +968,6 @@ hv_facility_unavailable_relon_trampoline: __end_interrupts: .align 7 -system_call_entry_direct: -#if defined(CONFIG_RELOCATABLE) - /* The first level prologue may have used LR to get here, saving - * orig in r10. To save hacking/ifdeffing common code, restore here. - */ - mtlr r10 -#endif system_call_entry: b system_call_common -- cgit v1.2.3 From d20be433e6a8892ecf59ef62636cd1333975347d Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 15:46:55 +1000 Subject: powerpc: Non relocatable system call doesn't need a trampoline We need to use a trampoline when using LOAD_HANDLER(), because the destination needs to be in the first 64kB. An absolute branch has no such limitations, so just jump there. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index cf8c1b55e6c8..0a0399c2af11 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -79,7 +79,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ mfspr r12,SPRN_SRR1 ; \ li r10,MSR_RI ; \ mtmsrd r10,1 ; /* Set RI (EE=0) */ \ - b system_call_entry ; + b system_call_common ; #endif /* -- cgit v1.2.3 From c1231a784acc25ec01d35a019533da9ab8593e2e Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 20 May 2015 17:59:52 +0800 Subject: powerpc: Use irq_desc_get_xxx() to avoid redundant lookup of irq_desc Use irq_desc_get_xxx() to avoid redundant lookup of irq_desc while we already have a pointer to corresponding irq_desc. Signed-off-by: Jiang Liu Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/52xx/mpc52xx_gpt.c | 2 +- arch/powerpc/platforms/cell/axon_msi.c | 2 +- arch/powerpc/platforms/embedded6xx/hlwd-pic.c | 2 +- arch/powerpc/sysdev/uic.c | 2 +- arch/powerpc/sysdev/xics/xics-common.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index c949ca055712..63016621aff8 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -193,7 +193,7 @@ static struct irq_chip mpc52xx_gpt_irq_chip = { void mpc52xx_gpt_irq_cascade(unsigned int virq, struct irq_desc *desc) { - struct mpc52xx_gpt_priv *gpt = irq_get_handler_data(virq); + struct mpc52xx_gpt_priv *gpt = irq_desc_get_handler_data(desc); int sub_virq; u32 status; diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index d9b8a443458f..fe51de4fcf13 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -96,7 +96,7 @@ static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val) static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); - struct axon_msic *msic = irq_get_handler_data(irq); + struct axon_msic *msic = irq_desc_get_handler_data(desc); u32 write_offset, msi; int idx; int retry = 0; diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index c269caee58f9..9dd154d6f89a 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -124,7 +124,7 @@ static void hlwd_pic_irq_cascade(unsigned int cascade_virq, struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); - struct irq_domain *irq_domain = irq_get_handler_data(cascade_virq); + struct irq_domain *irq_domain = irq_desc_get_handler_data(desc); unsigned int virq; raw_spin_lock(&desc->lock); diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index 1cd057f11725..d77345338671 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -198,7 +198,7 @@ void uic_irq_cascade(unsigned int virq, struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct irq_data *idata = irq_desc_get_irq_data(desc); - struct uic *uic = irq_get_handler_data(virq); + struct uic *uic = irq_desc_get_handler_data(desc); u32 msr; int src; int subvirq; diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 5bc5889d4acc..08c248eb491b 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -227,7 +227,7 @@ void xics_migrate_irqs_away(void) /* Locate interrupt server */ server = -1; - ics = irq_get_chip_data(virq); + ics = irq_desc_get_chip_data(desc); if (ics) server = ics->get_server(ics, irq); if (server < 0) { -- cgit v1.2.3 From a1c97df2787f6542257f1fd85e6a9b055e1125be Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Tue, 26 May 2015 11:36:56 +1000 Subject: powerpc/configs: Merge pseries_defconfig and pseries_le_defconfig These two configs should be identical with the exception of big or little endian. The big endian version has XMON_DEFAULT turned on while the little endian has XMON_DEFAULT not set. It makes the most sense for defconfigs not to use xmon by default, production systems should get back up as quickly as possible, not sit in xmon. In the event debugging is required, the option can be enabled or xmon=on can be specified on commandline. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/configs/pseries_defconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index c2e39f66b182..4da826004ef8 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -297,7 +297,6 @@ CONFIG_CODE_PATCHING_SELFTEST=y CONFIG_FTR_FIXUP_SELFTEST=y CONFIG_MSI_BITMAP_SELFTEST=y CONFIG_XMON=y -CONFIG_XMON_DEFAULT=y CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y -- cgit v1.2.3 From ea4d1a87e6dee541a525c822b0b5da160940fc67 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Tue, 26 May 2015 11:36:57 +1000 Subject: powerpc/configs: Replace pseries_le_defconfig with a Makefile target using merge_config Rather than continuing to maintain a copy of pseries_defconfig with CONFIG_CPU_LITTLE_ENDIAN enabled, use the generic merge_config script and use an le.config to enable little endian on top of pseries_defconfig without the need for a duplicated _defconfig file. This method will require less maintenance in the future and will ensure that both 'defconfigs' are always in sync. It is worth noting that the seemingly more simple approach of: pseries_le_defconfig: pseries_defconfig $(Q)$(MAKE) le.config Will not work when building using O=builddir. The obvious fix to that: pseries_le_defconfig: $(Q)$(MAKE) -f $(srctree)/Makefile pseries_defconfig le.config Also does not work. This is because if we have for example: config FOO depends on CPU_BIG_ENDIAN select BAR Then BAR will be enabled by the first call to kconfig (via pseries_defconfig), and then will remain enabled after we merge le.config, even though FOO will have been turned off. The solution is to ensure to only invoke the kconfig logic once, after we have merged all the config fragments. This ensures nothing is select'ed on that should then be disabled by the later merged configs. This is done through the explicit call to make olddefconfig Signed-off-by: Cyril Bur Reviewed-by: Samuel Mendoza-Jonas [mpe: Massage change log, fix white space and use ARCH not SRCARCH] Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 15 ++ arch/powerpc/configs/le.config | 1 + arch/powerpc/configs/pseries_le_defconfig | 319 ------------------------------ 3 files changed, 16 insertions(+), 319 deletions(-) create mode 100644 arch/powerpc/configs/le.config delete mode 100644 arch/powerpc/configs/pseries_le_defconfig (limited to 'arch/powerpc') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index a314cb024c8b..edf39cebfed8 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -269,6 +269,21 @@ bootwrapper_install: %.dtb: scripts $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@) +# Used to create 'merged defconfigs' +# To use it $(call) it with the first argument as the base defconfig +# and the second argument as a space separated list of .config files to merge, +# without the .config suffix. +define merge_into_defconfig + $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \ + -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \ + $(foreach config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config) + +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig +endef + +PHONY += pseries_le_defconfig +pseries_le_defconfig: + $(call merge_into_defconfig,pseries_defconfig,le) + define archhelp @echo '* zImage - Build default images selected by kernel config' @echo ' zImage.* - Compressed kernel image (arch/$(ARCH)/boot/zImage.*)' diff --git a/arch/powerpc/configs/le.config b/arch/powerpc/configs/le.config new file mode 100644 index 000000000000..ee43fdb3b8f4 --- /dev/null +++ b/arch/powerpc/configs/le.config @@ -0,0 +1 @@ +CONFIG_CPU_LITTLE_ENDIAN=y diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig deleted file mode 100644 index 09bc96e792cd..000000000000 --- a/arch/powerpc/configs/pseries_le_defconfig +++ /dev/null @@ -1,319 +0,0 @@ -CONFIG_PPC64=y -CONFIG_SMP=y -CONFIG_NR_CPUS=2048 -CONFIG_CPU_LITTLE_ENDIAN=y -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_FHANDLE=y -CONFIG_AUDIT=y -CONFIG_AUDITSYSCALL=y -CONFIG_IRQ_DOMAIN_DEBUG=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_TASKSTATS=y -CONFIG_TASK_DELAY_ACCT=y -CONFIG_TASK_XACCT=y -CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_NUMA_BALANCING=y -CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y -CONFIG_CGROUPS=y -CONFIG_CGROUP_FREEZER=y -CONFIG_CGROUP_DEVICE=y -CONFIG_CPUSETS=y -CONFIG_CGROUP_CPUACCT=y -CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y -CONFIG_CGROUP_PERF=y -CONFIG_CGROUP_SCHED=y -CONFIG_USER_NS=y -CONFIG_BLK_DEV_INITRD=y -# CONFIG_COMPAT_BRK is not set -CONFIG_PROFILING=y -CONFIG_OPROFILE=y -CONFIG_KPROBES=y -CONFIG_JUMP_LABEL=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_PPC_SPLPAR=y -CONFIG_SCANLOG=m -CONFIG_PPC_SMLPAR=y -CONFIG_DTL=y -# CONFIG_PPC_PMAC is not set -CONFIG_RTAS_FLASH=m -CONFIG_IBMEBUS=y -CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y -CONFIG_HZ_100=y -CONFIG_BINFMT_MISC=m -CONFIG_PPC_TRANSACTIONAL_MEM=y -CONFIG_KEXEC=y -CONFIG_IRQ_ALL_CPUS=y -CONFIG_MEMORY_HOTPLUG=y -CONFIG_MEMORY_HOTREMOVE=y -CONFIG_KSM=y -CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_PPC_64K_PAGES=y -CONFIG_PPC_SUBPAGE_PROT=y -CONFIG_SCHED_SMT=y -CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_RPA=m -CONFIG_HOTPLUG_PCI_RPA_DLPAR=m -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_XFRM_USER=m -CONFIG_NET_KEY=m -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_NET_IPIP=y -CONFIG_SYN_COOKIES=y -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m -# CONFIG_IPV6 is not set -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_ADVANCED is not set -CONFIG_BRIDGE=m -CONFIG_VLAN_8021Q=m -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_DEVTMPFS=y -CONFIG_DEVTMPFS_MOUNT=y -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_BLK_DEV_FD=m -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=65536 -CONFIG_VIRTIO_BLK=m -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_AMD74XX=y -CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=y -CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_FC_ATTRS=y -CONFIG_SCSI_CXGB3_ISCSI=m -CONFIG_SCSI_CXGB4_ISCSI=m -CONFIG_SCSI_BNX2_ISCSI=m -CONFIG_BE2ISCSI=m -CONFIG_SCSI_MPT2SAS=m -CONFIG_SCSI_IBMVSCSI=y -CONFIG_SCSI_IBMVFC=m -CONFIG_SCSI_SYM53C8XX_2=y -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 -CONFIG_SCSI_IPR=y -CONFIG_SCSI_QLA_FC=m -CONFIG_SCSI_QLA_ISCSI=m -CONFIG_SCSI_LPFC=m -CONFIG_SCSI_VIRTIO=m -CONFIG_SCSI_DH=m -CONFIG_SCSI_DH_RDAC=m -CONFIG_SCSI_DH_ALUA=m -CONFIG_ATA=y -CONFIG_SATA_AHCI=y -# CONFIG_ATA_SFF is not set -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=y -CONFIG_MD_RAID0=y -CONFIG_MD_RAID1=y -CONFIG_MD_RAID10=m -CONFIG_MD_RAID456=m -CONFIG_MD_MULTIPATH=m -CONFIG_MD_FAULTY=m -CONFIG_BLK_DEV_DM=y -CONFIG_DM_CRYPT=m -CONFIG_DM_SNAPSHOT=m -CONFIG_DM_THIN_PROVISIONING=m -CONFIG_DM_MIRROR=m -CONFIG_DM_ZERO=m -CONFIG_DM_MULTIPATH=m -CONFIG_DM_MULTIPATH_QL=m -CONFIG_DM_MULTIPATH_ST=m -CONFIG_DM_UEVENT=y -CONFIG_BONDING=m -CONFIG_DUMMY=m -CONFIG_MACVLAN=m -CONFIG_MACVTAP=m -CONFIG_VXLAN=m -CONFIG_NETCONSOLE=y -CONFIG_TUN=m -CONFIG_VETH=m -CONFIG_VIRTIO_NET=m -CONFIG_VHOST_NET=m -CONFIG_VORTEX=y -CONFIG_ACENIC=m -CONFIG_ACENIC_OMIT_TIGON_I=y -CONFIG_PCNET32=y -CONFIG_TIGON3=y -CONFIG_CHELSIO_T1=m -CONFIG_BE2NET=m -CONFIG_S2IO=m -CONFIG_IBMVETH=y -CONFIG_EHEA=y -CONFIG_E100=y -CONFIG_E1000=y -CONFIG_E1000E=y -CONFIG_IXGB=m -CONFIG_IXGBE=m -CONFIG_MLX4_EN=m -CONFIG_MYRI10GE=m -CONFIG_QLGE=m -CONFIG_NETXEN_NIC=m -CONFIG_PPP=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPPOE=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -CONFIG_INPUT_EVDEV=m -CONFIG_INPUT_MISC=y -CONFIG_INPUT_PCSPKR=m -# CONFIG_SERIO_SERPORT is not set -CONFIG_DEVPTS_MULTIPLE_INSTANCES=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_ICOM=m -CONFIG_SERIAL_JSM=m -CONFIG_HVC_CONSOLE=y -CONFIG_HVC_RTAS=y -CONFIG_HVCS=m -CONFIG_VIRTIO_CONSOLE=m -CONFIG_IBM_BSR=m -CONFIG_GEN_RTC=y -CONFIG_RAW_DRIVER=y -CONFIG_MAX_RAW_DEVS=1024 -CONFIG_FB=y -CONFIG_FIRMWARE_EDID=y -CONFIG_FB_OF=y -CONFIG_FB_MATROX=y -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -CONFIG_FB_MATROX_G=y -CONFIG_FB_RADEON=y -CONFIG_FB_IBM_GXT4500=y -CONFIG_LCD_PLATFORM=m -# CONFIG_VGA_CONSOLE is not set -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_LOGO=y -CONFIG_HID_GYRATION=y -CONFIG_HID_PANTHERLORD=y -CONFIG_HID_PETALYNX=y -CONFIG_HID_SAMSUNG=y -CONFIG_HID_SUNPLUS=y -CONFIG_USB_HIDDEV=y -CONFIG_USB=y -CONFIG_USB_MON=m -CONFIG_USB_EHCI_HCD=y -# CONFIG_USB_EHCI_HCD_PPC_OF is not set -CONFIG_USB_OHCI_HCD=y -CONFIG_USB_STORAGE=m -CONFIG_INFINIBAND=m -CONFIG_INFINIBAND_USER_MAD=m -CONFIG_INFINIBAND_USER_ACCESS=m -CONFIG_INFINIBAND_MTHCA=m -CONFIG_INFINIBAND_EHCA=m -CONFIG_INFINIBAND_CXGB3=m -CONFIG_INFINIBAND_CXGB4=m -CONFIG_MLX4_INFINIBAND=m -CONFIG_INFINIBAND_IPOIB=m -CONFIG_INFINIBAND_IPOIB_CM=y -CONFIG_INFINIBAND_SRP=m -CONFIG_INFINIBAND_ISER=m -CONFIG_VIRTIO_PCI=m -CONFIG_VIRTIO_BALLOON=m -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_EXT4_FS=y -CONFIG_EXT4_FS_POSIX_ACL=y -CONFIG_EXT4_FS_SECURITY=y -CONFIG_REISERFS_FS=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -CONFIG_JFS_SECURITY=y -CONFIG_XFS_FS=m -CONFIG_XFS_POSIX_ACL=y -CONFIG_BTRFS_FS=m -CONFIG_BTRFS_FS_POSIX_ACL=y -CONFIG_NILFS2_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_FUSE_FS=m -CONFIG_OVERLAY_FS=m -CONFIG_ISO9660_FS=y -CONFIG_UDF_FS=m -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_HUGETLBFS=y -CONFIG_CRAMFS=m -CONFIG_SQUASHFS=m -CONFIG_SQUASHFS_XATTR=y -CONFIG_SQUASHFS_LZO=y -CONFIG_SQUASHFS_XZ=y -CONFIG_PSTORE=y -CONFIG_NFS_FS=y -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=y -CONFIG_NFSD=m -CONFIG_NFSD_V3_ACL=y -CONFIG_NFSD_V4=y -CONFIG_CIFS=m -CONFIG_CIFS_XATTR=y -CONFIG_CIFS_POSIX=y -CONFIG_NLS_DEFAULT="utf8" -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ASCII=y -CONFIG_NLS_ISO8859_1=y -CONFIG_NLS_UTF8=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_STACK_USAGE=y -CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y -CONFIG_LATENCYTOP=y -CONFIG_SCHED_TRACER=y -CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_CODE_PATCHING_SELFTEST=y -CONFIG_FTR_FIXUP_SELFTEST=y -CONFIG_MSI_BITMAP_SELFTEST=y -CONFIG_XMON=y -CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_LZO=m -# CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_VIRTUALIZATION=y -CONFIG_KVM_BOOK3S_64=m -CONFIG_KVM_BOOK3S_64_HV=m -- cgit v1.2.3 From 24bbd929e6b9e62afd263c42b4318d3b603c956c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Jun 2015 13:40:31 +0200 Subject: of/fdt: split off FDT self reservation from memreserve processing This splits off the reservation of the memory occupied by the FDT binary itself from the processing of the memory reservations it contains. This is necessary because the physical address of the FDT, which is needed to perform the reservation, may not be known to the FDT driver core, i.e., it may be mapped outside the linear direct mapping, in which case __pa() returns a bogus value. Cc: Russell King Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Acked-by: Rob Herring Acked-by: Mark Rutland Acked-by: Catalin Marinas Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm/mm/init.c | 1 + arch/arm64/mm/init.c | 1 + arch/powerpc/kernel/prom.c | 1 + drivers/of/fdt.c | 19 ++++++++++++++----- include/linux/of_fdt.h | 2 ++ 5 files changed, 19 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index be92fa0f2f35..8a63b4cdc0f2 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -268,6 +268,7 @@ void __init arm_memblock_init(const struct machine_desc *mdesc) if (mdesc->reserve) mdesc->reserve(); + early_init_fdt_reserve_self(); early_init_fdt_scan_reserved_mem(); /* reserve memory for DMA contiguous allocations */ diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 597831bdddf3..89a05f467ffb 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -170,6 +170,7 @@ void __init arm64_memblock_init(void) memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start); #endif + early_init_fdt_reserve_self(); early_init_fdt_scan_reserved_mem(); /* 4GB maximum for 32-bit only capable devices */ diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 308c5e15676b..51ea36f79307 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -573,6 +573,7 @@ static void __init early_reserve_mem_dt(void) int len; const __be32 *prop; + early_init_fdt_reserve_self(); early_init_fdt_scan_reserved_mem(); dt_root = of_get_flat_dt_root(); diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index cde35c5d0191..f2dd23a32267 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -580,11 +580,6 @@ void __init early_init_fdt_scan_reserved_mem(void) if (!initial_boot_params) return; - /* Reserve the dtb region */ - early_init_dt_reserve_memory_arch(__pa(initial_boot_params), - fdt_totalsize(initial_boot_params), - 0); - /* Process header /memreserve/ fields */ for (n = 0; ; n++) { fdt_get_mem_rsv(initial_boot_params, n, &base, &size); @@ -597,6 +592,20 @@ void __init early_init_fdt_scan_reserved_mem(void) fdt_init_reserved_mem(); } +/** + * early_init_fdt_reserve_self() - reserve the memory used by the FDT blob + */ +void __init early_init_fdt_reserve_self(void) +{ + if (!initial_boot_params) + return; + + /* Reserve the dtb region */ + early_init_dt_reserve_memory_arch(__pa(initial_boot_params), + fdt_totalsize(initial_boot_params), + 0); +} + /** * of_scan_flat_dt - scan flattened tree blob and call callback on each. * @it: callback function diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 587ee507965d..fd627a58068f 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -64,6 +64,7 @@ extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, extern int early_init_dt_scan_memory(unsigned long node, const char *uname, int depth, void *data); extern void early_init_fdt_scan_reserved_mem(void); +extern void early_init_fdt_reserve_self(void); extern void early_init_dt_add_memory_arch(u64 base, u64 size); extern int early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size, bool no_map); @@ -91,6 +92,7 @@ extern u64 fdt_translate_address(const void *blob, int node_offset); extern void of_fdt_limit_memory(int limit); #else /* CONFIG_OF_FLATTREE */ static inline void early_init_fdt_scan_reserved_mem(void) {} +static inline void early_init_fdt_reserve_self(void) {} static inline const char *of_flat_dt_get_machine_name(void) { return NULL; } static inline void unflatten_device_tree(void) {} static inline void unflatten_and_copy_device_tree(void) {} -- cgit v1.2.3 From fb326e9841b2000a8d1b115ccd7f5b3b61a4d123 Mon Sep 17 00:00:00 2001 From: Igal Liberman Date: Mon, 12 Jan 2015 08:03:57 +0200 Subject: powerpc/dts: Unify B4 mux nodes Signed-off-by: Igal Liberman Change-Id: Ic5f28f7b492b708f00a5ff74dda723ce5e1da0ba Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/fsl/b4420si-post.dtsi | 15 ++------------- arch/powerpc/boot/dts/fsl/b4860si-post.dtsi | 15 ++------------- arch/powerpc/boot/dts/fsl/b4si-post.dtsi | 12 ++++++++++++ 3 files changed, 16 insertions(+), 26 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi index 86161ae6c966..1ea8602e4345 100644 --- a/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi @@ -80,20 +80,9 @@ compatible = "fsl,b4420-device-config", "fsl,qoriq-device-config-2.0"; }; -/include/ "qoriq-clockgen2.dtsi" global-utilities@e1000 { - compatible = "fsl,b4420-clockgen", "fsl,qoriq-clockgen-2.0"; - - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-2.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>, - <&pll1 0>, <&pll1 1>, <&pll1 2>; - clock-names = "pll0", "pll0-div2", "pll0-div4", - "pll1", "pll1-div2", "pll1-div4"; - clock-output-names = "cmux0"; - }; + compatible = "fsl,b4420-clockgen", "fsl,b4-clockgen", + "fsl,qoriq-clockgen-2.0"; }; rcpm: global-utilities@e2000 { diff --git a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi index f35e9e0a5445..68b9a0536485 100644 --- a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi @@ -182,20 +182,9 @@ compatible = "fsl,b4860-device-config", "fsl,qoriq-device-config-2.0"; }; -/include/ "qoriq-clockgen2.dtsi" global-utilities@e1000 { - compatible = "fsl,b4860-clockgen", "fsl,qoriq-clockgen-2.0"; - - mux0: mux0@0 { - #clock-cells = <0>; - reg = <0x0 0x4>; - compatible = "fsl,qoriq-core-mux-2.0"; - clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>, - <&pll1 0>, <&pll1 1>, <&pll1 2>; - clock-names = "pll0", "pll0-div2", "pll0-div4", - "pll1", "pll1-div2", "pll1-div4"; - clock-output-names = "cmux0"; - }; + compatible = "fsl,b4860-clockgen", "fsl,b4-clockgen", + "fsl,qoriq-clockgen-2.0"; }; rcpm: global-utilities@e2000 { diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi index 73136c0029d2..c6bab55bb065 100644 --- a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi @@ -296,9 +296,21 @@ fsl,liodn-bits = <12>; }; +/include/ "qoriq-clockgen2.dtsi" clockgen: global-utilities@e1000 { compatible = "fsl,b4-clockgen", "fsl,qoriq-clockgen-2.0"; reg = <0xe1000 0x1000>; + + mux0: mux0@0 { + #clock-cells = <0>; + reg = <0x0 0x4>; + compatible = "fsl,qoriq-core-mux-2.0"; + clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>, + <&pll1 0>, <&pll1 1>, <&pll1 2>; + clock-names = "pll0", "pll0-div2", "pll0-div4", + "pll1", "pll1-div2", "pll1-div4"; + clock-output-names = "cmux0"; + }; }; rcpm: global-utilities@e2000 { -- cgit v1.2.3 From c89ca8ab7457d891abdffca7e6f5e74cef670d6d Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 2 Apr 2015 22:14:11 -0500 Subject: powerpc/e6500: Optimize hugepage TLB misses Some workloads take a lot of TLB misses despite using traditional hugepages. Handle these TLB misses in the asm fastpath rather than going through a bunch of C code. With this patch I measured around a 5x speedup in handling hugepage TLB misses. Signed-off-by: Scott Wood --- arch/powerpc/mm/tlb_low_64e.S | 51 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 89bf95bd63b1..765b419883f2 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -398,18 +398,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 clrrdi r15,r15,3 cmpdi cr0,r14,0 - bge tlb_miss_fault_e6500 /* Bad pgd entry or hugepage; bail */ + bge tlb_miss_huge_e6500 /* Bad pgd entry or hugepage; bail */ ldx r14,r14,r15 /* grab pud entry */ rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 clrrdi r15,r15,3 cmpdi cr0,r14,0 - bge tlb_miss_fault_e6500 + bge tlb_miss_huge_e6500 ldx r14,r14,r15 /* Grab pmd entry */ mfspr r10,SPRN_MAS0 cmpdi cr0,r14,0 - bge tlb_miss_fault_e6500 + bge tlb_miss_huge_e6500 /* Now we build the MAS for a 2M indirect page: * @@ -428,6 +428,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) clrrdi r15,r16,21 /* make EA 2M-aligned */ mtspr SPRN_MAS2,r15 +tlb_miss_huge_done_e6500: lbz r15,TCD_ESEL_NEXT(r11) lbz r16,TCD_ESEL_MAX(r11) lbz r14,TCD_ESEL_FIRST(r11) @@ -456,6 +457,50 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT) tlb_epilog_bolted rfi +tlb_miss_huge_e6500: + beq tlb_miss_fault_e6500 + li r10,1 + andi. r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */ + rldimi r14,r10,63,0 /* Set PD_HUGE */ + xor r14,r14,r15 /* Clear size bits */ + ldx r14,0,r14 + + /* + * Now we build the MAS for a huge page. + * + * MAS 0 : ESEL needs to be filled by software round-robin + * - can be handled by indirect code + * MAS 1 : Need to clear IND and set TSIZE + * MAS 2,3+7: Needs to be redone similar to non-tablewalk handler + */ + + subi r15,r15,10 /* Convert psize to tsize */ + mfspr r10,SPRN_MAS1 + rlwinm r10,r10,0,~MAS1_IND + rlwimi r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK + mtspr SPRN_MAS1,r10 + + li r10,-0x400 + sld r15,r10,r15 /* Generate mask based on size */ + and r10,r16,r15 + rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT + rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */ + clrldi r15,r15,PAGE_SHIFT /* Clear crap at the top */ + rlwimi r15,r14,32-8,22,25 /* Move in U bits */ + mtspr SPRN_MAS2,r10 + andi. r10,r14,_PAGE_DIRTY + rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */ + + /* Mask out SW and UW if !DIRTY (XXX optimize this !) */ + bne 1f + li r10,MAS3_SW|MAS3_UW + andc r15,r15,r10 +1: + mtspr SPRN_MAS7_MAS3,r15 + + mfspr r10,SPRN_MAS0 + b tlb_miss_huge_done_e6500 + tlb_miss_kernel_e6500: ld r14,PACA_KERNELPGD(r13) cmpldi cr1,r15,8 /* Check for vmalloc region */ -- cgit v1.2.3 From 86d63363defc0b2f22288b287f75837d48b561fc Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Mon, 6 Apr 2015 22:00:38 -0500 Subject: powerpc/e500mc: Remove dead L2 flushing code in idle_e500.S This code can never be executed as it is only built when CONFIG_PPC_E500MC is unset, but the only CPUs that have CPU_FTR_L2CSR require CONFIG_PPC_E500MC and do not have the MSR/HID0-based nap mechanism that this file uses. Signed-off-by: Scott Wood --- arch/powerpc/kernel/idle_e500.S | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S index 15448668988d..b9b6ef510be1 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_e500.S @@ -58,15 +58,6 @@ BEGIN_FTR_SECTION mtlr r0 lis r3,HID0_NAP@h END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) -BEGIN_FTR_SECTION - msync - li r7,L2CSR0_L2FL@l - mtspr SPRN_L2CSR0,r7 -2: - mfspr r7,SPRN_L2CSR0 - andi. r4,r7,L2CSR0_L2FL@l - bne 2b -END_FTR_SECTION_IFSET(CPU_FTR_L2CSR|CPU_FTR_CAN_NAP) 1: /* Go to NAP or DOZE now */ mfspr r4,SPRN_HID0 -- cgit v1.2.3 From ec66a97d153306cd35250da419f7f8ead0bc142f Mon Sep 17 00:00:00 2001 From: Shengzhou Liu Date: Wed, 15 Apr 2015 11:48:08 +0800 Subject: powerpc/fsl-booke: Add device tree support for T1024/T1023 SoC The T1024 SoC includes the following function and features: - Two 64-bit Power architecture e5500 cores, up to 1.4GHz - private 256KB L2 cache each core and shared 256KB CoreNet platform cache (CPC) - 32-/64-bit DDR3L/DDR4 SDRAM memory controller with ECC and interleaving support - Data Path Acceleration Architecture (DPAA) incorporating acceleration - Four MAC for 1G/2.5G/10G network interfaces (RGMII, SGMII, QSGMII, XFI) - High-speed peripheral interfaces - Three PCI Express 2.0 controllers - Additional peripheral interfaces - One SATA 2.0 controller - Two USB 2.0 controllers with integrated PHY - Enhanced secure digital host controller (SD/eSDHC/eMMC) - Enhanced serial peripheral interface (eSPI) - Four I2C controllers - Four 2-pin UARTs or two 4-pin UARTs - Integrated Flash Controller supporting NAND and NOR flash - Two 8-channel DMA engines - Multicore programmable interrupt controller (PIC) - LCD interface (DIU) with 12 bit dual data rate - QUICC Engine block supporting TDM, HDLC, and UART - Deep Sleep power implementaion (wakeup from GPIO/Timer/Ethernet/USB) - Support for hardware virtualization and partitioning enforcement - QorIQ Platform's Trust Architecture 2.0 Signed-off-by: Shengzhou Liu [scottwood@freescale.com: whitespace fixes] Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/fsl/t1023si-post.dtsi | 330 ++++++++++++++++++++++++++++ arch/powerpc/boot/dts/fsl/t1024si-post.dtsi | 100 +++++++++ arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi | 87 ++++++++ 3 files changed, 517 insertions(+) create mode 100644 arch/powerpc/boot/dts/fsl/t1023si-post.dtsi create mode 100644 arch/powerpc/boot/dts/fsl/t1024si-post.dtsi create mode 100644 arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi new file mode 100644 index 000000000000..dbe65783cc2d --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi @@ -0,0 +1,330 @@ +/* + * T1023 Silicon/SoC Device Tree Source (post include) + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +&ifc { + #address-cells = <2>; + #size-cells = <1>; + compatible = "fsl,ifc", "simple-bus"; + interrupts = <25 2 0 0>; +}; + +&pci0 { + compatible = "fsl,t1023-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie"; + device_type = "pci"; + #size-cells = <2>; + #address-cells = <3>; + bus-range = <0x0 0xff>; + interrupts = <20 2 0 0>; + fsl,iommu-parent = <&pamu0>; + pcie@0 { + reg = <0 0 0 0 0>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + interrupts = <20 2 0 0>; + interrupt-map-mask = <0xf800 0 0 7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0 0 1 &mpic 40 1 0 0 + 0000 0 0 2 &mpic 1 1 0 0 + 0000 0 0 3 &mpic 2 1 0 0 + 0000 0 0 4 &mpic 3 1 0 0 + >; + }; +}; + +&pci1 { + compatible = "fsl,t1023-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie"; + device_type = "pci"; + #size-cells = <2>; + #address-cells = <3>; + bus-range = <0 0xff>; + interrupts = <21 2 0 0>; + fsl,iommu-parent = <&pamu0>; + pcie@0 { + reg = <0 0 0 0 0>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + interrupts = <21 2 0 0>; + interrupt-map-mask = <0xf800 0 0 7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0 0 1 &mpic 41 1 0 0 + 0000 0 0 2 &mpic 5 1 0 0 + 0000 0 0 3 &mpic 6 1 0 0 + 0000 0 0 4 &mpic 7 1 0 0 + >; + }; +}; + +&pci2 { + compatible = "fsl,t1023-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie"; + device_type = "pci"; + #size-cells = <2>; + #address-cells = <3>; + bus-range = <0x0 0xff>; + interrupts = <22 2 0 0>; + fsl,iommu-parent = <&pamu0>; + pcie@0 { + reg = <0 0 0 0 0>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + interrupts = <22 2 0 0>; + interrupt-map-mask = <0xf800 0 0 7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0 0 1 &mpic 42 1 0 0 + 0000 0 0 2 &mpic 9 1 0 0 + 0000 0 0 3 &mpic 10 1 0 0 + 0000 0 0 4 &mpic 11 1 0 0 + >; + }; +}; + +&dcsr { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,dcsr", "simple-bus"; + + dcsr-epu@0 { + compatible = "fsl,t1023-dcsr-epu", "fsl,dcsr-epu"; + interrupts = <52 2 0 0 + 84 2 0 0 + 85 2 0 0>; + reg = <0x0 0x1000>; + }; + dcsr-npc { + compatible = "fsl,t1023-dcsr-cnpc", "fsl,dcsr-cnpc"; + reg = <0x1000 0x1000 0x1002000 0x10000>; + }; + dcsr-nxc@2000 { + compatible = "fsl,dcsr-nxc"; + reg = <0x2000 0x1000>; + }; + dcsr-corenet { + compatible = "fsl,dcsr-corenet"; + reg = <0x8000 0x1000 0x1A000 0x1000>; + }; + dcsr-ocn@11000 { + compatible = "fsl,t1023-dcsr-ocn", "fsl,dcsr-ocn"; + reg = <0x11000 0x1000>; + }; + dcsr-ddr@12000 { + compatible = "fsl,dcsr-ddr"; + dev-handle = <&ddr1>; + reg = <0x12000 0x1000>; + }; + dcsr-nal@18000 { + compatible = "fsl,t1023-dcsr-nal", "fsl,dcsr-nal"; + reg = <0x18000 0x1000>; + }; + dcsr-rcpm@22000 { + compatible = "fsl,t1023-dcsr-rcpm", "fsl,dcsr-rcpm"; + reg = <0x22000 0x1000>; + }; + dcsr-snpc@30000 { + compatible = "fsl,t1023-dcsr-snpc", "fsl,dcsr-snpc"; + reg = <0x30000 0x1000 0x1022000 0x10000>; + }; + dcsr-snpc@31000 { + compatible = "fsl,t1023-dcsr-snpc", "fsl,dcsr-snpc"; + reg = <0x31000 0x1000 0x1042000 0x10000>; + }; + dcsr-cpu-sb-proxy@100000 { + compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy"; + cpu-handle = <&cpu0>; + reg = <0x100000 0x1000 0x101000 0x1000>; + }; + dcsr-cpu-sb-proxy@108000 { + compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy"; + cpu-handle = <&cpu1>; + reg = <0x108000 0x1000 0x109000 0x1000>; + }; +}; + +&soc { + #address-cells = <1>; + #size-cells = <1>; + device_type = "soc"; + compatible = "simple-bus"; + + soc-sram-error { + compatible = "fsl,soc-sram-error"; + interrupts = <16 2 1 29>; + }; + + corenet-law@0 { + compatible = "fsl,corenet-law"; + reg = <0x0 0x1000>; + fsl,num-laws = <16>; + }; + + ddr1: memory-controller@8000 { + compatible = "fsl,qoriq-memory-controller-v5.0", + "fsl,qoriq-memory-controller"; + reg = <0x8000 0x1000>; + interrupts = <16 2 1 23>; + }; + + cpc: l3-cache-controller@10000 { + compatible = "fsl,t1023-l3-cache-controller", "cache"; + reg = <0x10000 0x1000>; + interrupts = <16 2 1 27>; + }; + + corenet-cf@18000 { + compatible = "fsl,corenet2-cf"; + reg = <0x18000 0x1000>; + interrupts = <16 2 1 31>; + }; + + iommu@20000 { + compatible = "fsl,pamu-v1.0", "fsl,pamu"; + reg = <0x20000 0x1000>; + ranges = <0 0x20000 0x1000>; + #address-cells = <1>; + #size-cells = <1>; + interrupts = < + 24 2 0 0 + 16 2 1 30>; + pamu0: pamu@0 { + reg = <0 0x1000>; + fsl,primary-cache-geometry = <128 1>; + fsl,secondary-cache-geometry = <32 2>; + }; + }; + +/include/ "qoriq-mpic.dtsi" + + guts: global-utilities@e0000 { + compatible = "fsl,t1023-device-config", "fsl,qoriq-device-config-2.0"; + reg = <0xe0000 0xe00>; + fsl,has-rstcr; + fsl,liodn-bits = <12>; + }; + +/include/ "qoriq-clockgen2.dtsi" + global-utilities@e1000 { + compatible = "fsl,t1023-clockgen", "fsl,qoriq-clockgen-2.0"; + mux0: mux0@0 { + #clock-cells = <0>; + reg = <0x0 4>; + compatible = "fsl,core-mux-clock"; + clocks = <&pll0 0>, <&pll0 1>; + clock-names = "pll0_0", "pll0_1"; + clock-output-names = "cmux0"; + }; + mux1: mux1@20 { + #clock-cells = <0>; + reg = <0x20 4>; + compatible = "fsl,core-mux-clock"; + clocks = <&pll0 0>, <&pll0 1>; + clock-names = "pll0_0", "pll0_1"; + clock-output-names = "cmux1"; + }; + }; + + rcpm: global-utilities@e2000 { + compatible = "fsl,t1023-rcpm", "fsl,qoriq-rcpm-2.0"; + reg = <0xe2000 0x1000>; + }; + + sfp: sfp@e8000 { + compatible = "fsl,t1023-sfp"; + reg = <0xe8000 0x1000>; + }; + + serdes: serdes@ea000 { + compatible = "fsl,t1023-serdes"; + reg = <0xea000 0x4000>; + }; + + scfg: global-utilities@fc000 { + compatible = "fsl,t1023-scfg"; + reg = <0xfc000 0x1000>; + }; + +/include/ "elo3-dma-0.dtsi" +/include/ "elo3-dma-1.dtsi" + +/include/ "qoriq-espi-0.dtsi" + spi@110000 { + fsl,espi-num-chipselects = <4>; + }; + +/include/ "qoriq-esdhc-0.dtsi" + sdhc@114000 { + compatible = "fsl,t1023-esdhc", "fsl,esdhc"; + fsl,iommu-parent = <&pamu0>; + fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */ + sdhci,auto-cmd12; + no-1-8-v; + }; +/include/ "qoriq-i2c-0.dtsi" +/include/ "qoriq-i2c-1.dtsi" +/include/ "qoriq-duart-0.dtsi" +/include/ "qoriq-duart-1.dtsi" +/include/ "qoriq-gpio-0.dtsi" +/include/ "qoriq-gpio-1.dtsi" +/include/ "qoriq-gpio-2.dtsi" +/include/ "qoriq-gpio-3.dtsi" +/include/ "qoriq-usb2-mph-0.dtsi" + usb0: usb@210000 { + compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph"; + fsl,iommu-parent = <&pamu0>; + fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */ + phy_type = "utmi"; + port0; + }; +/include/ "qoriq-usb2-dr-0.dtsi" + usb1: usb@211000 { + compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr"; + fsl,iommu-parent = <&pamu0>; + fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */ + dr_mode = "host"; + phy_type = "utmi"; + }; +/include/ "qoriq-sata2-0.dtsi" +sata@220000 { + fsl,iommu-parent = <&pamu0>; + fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */ +}; + +/include/ "qoriq-sec5.0-0.dtsi" +}; diff --git a/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi new file mode 100644 index 000000000000..95e3af8d768e --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi @@ -0,0 +1,100 @@ +/* + * T1024 Silicon/SoC Device Tree Source (post include) + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "t1023si-post.dtsi" + +/ { + aliases { + vga = &display; + display = &display; + }; + + qe:qe@ffe140000 { + #address-cells = <1>; + #size-cells = <1>; + device_type = "qe"; + compatible = "fsl,qe"; + ranges = <0x0 0xf 0xfe140000 0x40000>; + reg = <0xf 0xfe140000 0 0x480>; + fsl,qe-num-riscs = <1>; + fsl,qe-num-snums = <28>; + brg-frequency = <0>; + bus-frequency = <0>; + }; +}; + +&soc { + display:display@180000 { + compatible = "fsl,t1024-diu", "fsl,diu"; + reg = <0x180000 1000>; + interrupts = <74 2 0 0>; + }; +}; + +&qe { + qeic: interrupt-controller@80 { + interrupt-controller; + compatible = "fsl,qe-ic"; + #address-cells = <0>; + #interrupt-cells = <1>; + reg = <0x80 0x80>; + interrupts = <95 2 0 0 94 2 0 0>; //high:79 low:78 + }; + + ucc@2000 { + cell-index = <1>; + reg = <0x2000 0x200>; + interrupts = <32>; + interrupt-parent = <&qeic>; + }; + + ucc@2200 { + cell-index = <3>; + reg = <0x2200 0x200>; + interrupts = <34>; + interrupt-parent = <&qeic>; + }; + + muram@10000 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,qe-muram", "fsl,cpm-muram"; + ranges = <0x0 0x10000 0x6000>; + + data-only@0 { + compatible = "fsl,qe-muram-data", "fsl,cpm-muram-data"; + reg = <0x0 0x6000>; + }; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi b/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi new file mode 100644 index 000000000000..1f1a9f8474d5 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi @@ -0,0 +1,87 @@ +/* + * T1024/T1023 Silicon/SoC Device Tree Source (pre include) + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/dts-v1/; + +/include/ "e5500_power_isa.dtsi" + +/ { + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mpic>; + + aliases { + ccsr = &soc; + dcsr = &dcsr; + + dma0 = &dma0; + dma1 = &dma1; + serial0 = &serial0; + serial1 = &serial1; + serial2 = &serial2; + serial3 = &serial3; + pci0 = &pci0; + pci1 = &pci1; + pci2 = &pci2; + usb0 = &usb0; + usb1 = &usb1; + sdhc = &sdhc; + + crypto = &crypto; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: PowerPC,e5500@0 { + device_type = "cpu"; + reg = <0>; + clocks = <&mux0>; + next-level-cache = <&L2_1>; + L2_1: l2-cache { + next-level-cache = <&cpc>; + }; + }; + cpu1: PowerPC,e5500@1 { + device_type = "cpu"; + reg = <1>; + clocks = <&mux1>; + next-level-cache = <&L2_2>; + L2_2: l2-cache { + next-level-cache = <&cpc>; + }; + }; + }; +}; -- cgit v1.2.3 From 2b6029e2e026771fb4e2add9b38e91f34725813c Mon Sep 17 00:00:00 2001 From: Shengzhou Liu Date: Thu, 9 Apr 2015 16:07:43 +0800 Subject: powerpc/fsl-booke: Add T1024 QDS board support Add support for Freescale T1024/T1023 QorIQ Development System Board. T1024QDS is a high-performance computing evaluation, development and test platform for T1024 QorIQ Power Architecture processor. Signed-off-by: Shengzhou Liu [scottwood: vendor prefix: s/at24/atmel/ and trimmed detailed board description with too-long lines] Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/fsl/t1023si-post.dtsi | 36 ++-- arch/powerpc/boot/dts/t1024qds.dts | 251 ++++++++++++++++++++++++++ arch/powerpc/platforms/85xx/Kconfig | 2 +- arch/powerpc/platforms/85xx/corenet_generic.c | 1 + 4 files changed, 271 insertions(+), 19 deletions(-) create mode 100644 arch/powerpc/boot/dts/t1024qds.dts (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi index dbe65783cc2d..df1f068a5376 100644 --- a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi @@ -305,26 +305,26 @@ /include/ "qoriq-gpio-2.dtsi" /include/ "qoriq-gpio-3.dtsi" /include/ "qoriq-usb2-mph-0.dtsi" - usb0: usb@210000 { - compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph"; - fsl,iommu-parent = <&pamu0>; - fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */ - phy_type = "utmi"; - port0; - }; + usb0: usb@210000 { + compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph"; + fsl,iommu-parent = <&pamu0>; + fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */ + phy_type = "utmi"; + port0; + }; /include/ "qoriq-usb2-dr-0.dtsi" - usb1: usb@211000 { - compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr"; - fsl,iommu-parent = <&pamu0>; - fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */ - dr_mode = "host"; - phy_type = "utmi"; - }; + usb1: usb@211000 { + compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr"; + fsl,iommu-parent = <&pamu0>; + fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */ + dr_mode = "host"; + phy_type = "utmi"; + }; /include/ "qoriq-sata2-0.dtsi" -sata@220000 { - fsl,iommu-parent = <&pamu0>; - fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */ -}; + sata@220000 { + fsl,iommu-parent = <&pamu0>; + fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */ + }; /include/ "qoriq-sec5.0-0.dtsi" }; diff --git a/arch/powerpc/boot/dts/t1024qds.dts b/arch/powerpc/boot/dts/t1024qds.dts new file mode 100644 index 000000000000..f31fabb383b9 --- /dev/null +++ b/arch/powerpc/boot/dts/t1024qds.dts @@ -0,0 +1,251 @@ +/* + * T1024 QDS Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t102xsi-pre.dtsi" + +/ { + model = "fsl,T1024QDS"; + compatible = "fsl,T1024QDS"; + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mpic>; + + ifc: localbus@ffe124000 { + reg = <0xf 0xfe124000 0 0x2000>; + ranges = <0 0 0xf 0xe8000000 0x08000000 + 2 0 0xf 0xff800000 0x00010000 + 3 0 0xf 0xffdf0000 0x00008000>; + + nor@0,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cfi-flash"; + reg = <0x0 0x0 0x8000000>; + bank-width = <2>; + device-width = <1>; + }; + + nand@2,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,ifc-nand"; + reg = <0x2 0x0 0x10000>; + }; + + board-control@3,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,tetra-fpga", "fsl,fpga-qixis"; + reg = <3 0 0x300>; + ranges = <0 3 0 0x300>; + }; + }; + + memory { + device_type = "memory"; + }; + + dcsr: dcsr@f00000000 { + ranges = <0x00000000 0xf 0x00000000 0x01072000>; + }; + + soc: soc@ffe000000 { + ranges = <0x00000000 0xf 0xfe000000 0x1000000>; + reg = <0xf 0xfe000000 0 0x00001000>; + spi@110000 { + flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "micron,n25q128a11"; /* 16MB */ + reg = <0>; + spi-max-frequency = <10000000>; + }; + + flash@1 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "sst,sst25wf040"; /* 512KB */ + reg = <1>; + spi-max-frequency = <10000000>; + }; + + flash@2 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "eon,en25s64"; /* 8MB */ + reg = <2>; + spi-max-frequency = <10000000>; + }; + + slic@2 { + compatible = "maxim,ds26522"; + reg = <2>; + spi-max-frequency = <2000000>; + }; + + slic@3 { + compatible = "maxim,ds26522"; + reg = <3>; + spi-max-frequency = <2000000>; + }; + }; + + i2c@118000 { + pca9547@77 { + compatible = "nxp,pca9547"; + reg = <0x77>; + #address-cells = <1>; + #size-cells = <0>; + + i2c@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0>; + + eeprom@50 { + compatible = "atmel,24c512"; + reg = <0x50>; + }; + + eeprom@51 { + compatible = "atmel,24c02"; + reg = <0x51>; + }; + + eeprom@57 { + compatible = "atmel,24c02"; + reg = <0x57>; + }; + }; + + i2c@2 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x2>; + + ina220@40 { + compatible = "ti,ina220"; + reg = <0x40>; + shunt-resistor = <1000>; + }; + + ina220@41 { + compatible = "ti,ina220"; + reg = <0x41>; + shunt-resistor = <1000>; + }; + }; + + i2c@3 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0x3>; + + adt7461@4c { + /* Thermal Monitor */ + compatible = "adi,adt7461"; + reg = <0x4c>; + }; + + eeprom@55 { + compatible = "atmel,24c02"; + reg = <0x55>; + }; + + eeprom@56 { + compatible = "atmel,24c512"; + reg = <0x56>; + }; + + eeprom@57 { + compatible = "atmel,24c512"; + reg = <0x57>; + }; + }; + }; + rtc@68 { + compatible = "dallas,ds3232"; + reg = <0x68>; + interrupts = <0x5 0x1 0 0>; + }; + }; + }; + + pci0: pcie@ffe240000 { + reg = <0xf 0xfe240000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8000000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci1: pcie@ffe250000 { + reg = <0xf 0xfe250000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8010000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci2: pcie@ffe260000 { + reg = <0xf 0xfe260000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; +}; + +/include/ "fsl/t1024si-post.dtsi" diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index 2fb4b24368a6..e628d4001272 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -282,7 +282,7 @@ config CORENET_GENERIC For 64bit kernel, the following boards are supported: T208x QDS/RDB, T4240 QDS/RDB and B4 QDS The following boards are supported for both 32bit and 64bit kernel: - P5020 DS, P5040 DS and T104xQDS/RDB + P5020 DS, P5040 DS, T102x QDS, T104x QDS/RDB endif # FSL_SOC_BOOKE diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 9824d2cf79bd..f61f47780486 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -150,6 +150,7 @@ static const char * const boards[] __initconst = { "fsl,B4860QDS", "fsl,B4420QDS", "fsl,B4220QDS", + "fsl,T1024QDS", "fsl,T1040QDS", "fsl,T1042QDS", "fsl,T1040RDB", -- cgit v1.2.3 From 5afe13fd48c72be61342edbc6ede6e60f6153227 Mon Sep 17 00:00:00 2001 From: Shengzhou Liu Date: Thu, 9 Apr 2015 16:07:44 +0800 Subject: powerpc/fsl-booke: Add T1024 RDB board support T1024RDB is a Freescale Reference Design Board that hosts the T1024 SoC. Signed-off-by: Shengzhou Liu [scottwood: vendor prefix: s/at24/atmel/ and trimmed detailed board description with too-long lines] Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/t1024rdb.dts | 185 ++++++++++++++++++++++++++ arch/powerpc/platforms/85xx/Kconfig | 2 +- arch/powerpc/platforms/85xx/corenet_generic.c | 1 + 3 files changed, 187 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/boot/dts/t1024rdb.dts (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/t1024rdb.dts b/arch/powerpc/boot/dts/t1024rdb.dts new file mode 100644 index 000000000000..733e723ffed6 --- /dev/null +++ b/arch/powerpc/boot/dts/t1024rdb.dts @@ -0,0 +1,185 @@ +/* + * T1024 RDB Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t102xsi-pre.dtsi" + +/ { + model = "fsl,T1024RDB"; + compatible = "fsl,T1024RDB"; + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mpic>; + + ifc: localbus@ffe124000 { + reg = <0xf 0xfe124000 0 0x2000>; + ranges = <0 0 0xf 0xe8000000 0x08000000 + 2 0 0xf 0xff800000 0x00010000 + 3 0 0xf 0xffdf0000 0x00008000>; + + nor@0,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cfi-flash"; + reg = <0x0 0x0 0x8000000>; + bank-width = <2>; + device-width = <1>; + }; + + nand@1,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,ifc-nand"; + reg = <0x2 0x0 0x10000>; + }; + + board-control@2,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,t1024-cpld"; + reg = <3 0 0x300>; + ranges = <0 3 0 0x300>; + bank-width = <1>; + device-width = <1>; + }; + }; + + memory { + device_type = "memory"; + }; + + dcsr: dcsr@f00000000 { + ranges = <0x00000000 0xf 0x00000000 0x01072000>; + }; + + soc: soc@ffe000000 { + ranges = <0x00000000 0xf 0xfe000000 0x1000000>; + reg = <0xf 0xfe000000 0 0x00001000>; + spi@110000 { + flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "micron,n25q512ax3"; + reg = <0>; + spi-max-frequency = <10000000>; /* input clk */ + }; + + slic@1 { + compatible = "maxim,ds26522"; + reg = <1>; + spi-max-frequency = <2000000>; + }; + + slic@2 { + compatible = "maxim,ds26522"; + reg = <2>; + spi-max-frequency = <2000000>; + }; + }; + + i2c@118000 { + adt7461@4c { + /* Thermal Monitor */ + compatible = "adi,adt7461"; + reg = <0x4c>; + }; + + eeprom@50 { + compatible = "atmel,24c256"; + reg = <0x50>; + }; + + rtc@68 { + compatible = "dallas,ds1339"; + reg = <0x68>; + interrupts = <0x1 0x1 0 0>; + }; + }; + + i2c@118100 { + pca9546@77 { + compatible = "nxp,pca9546"; + reg = <0x77>; + #address-cells = <1>; + #size-cells = <0>; + }; + }; + }; + + pci0: pcie@ffe240000 { + reg = <0xf 0xfe240000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8000000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci1: pcie@ffe250000 { + reg = <0xf 0xfe250000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8010000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci2: pcie@ffe260000 { + reg = <0xf 0xfe260000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; +}; + +/include/ "fsl/t1024si-post.dtsi" diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index e628d4001272..97915feffd42 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -282,7 +282,7 @@ config CORENET_GENERIC For 64bit kernel, the following boards are supported: T208x QDS/RDB, T4240 QDS/RDB and B4 QDS The following boards are supported for both 32bit and 64bit kernel: - P5020 DS, P5040 DS, T102x QDS, T104x QDS/RDB + P5020 DS, P5040 DS, T102x QDS/RDB, T104x QDS/RDB endif # FSL_SOC_BOOKE diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index f61f47780486..d7c1b4d4254a 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -151,6 +151,7 @@ static const char * const boards[] __initconst = { "fsl,B4420QDS", "fsl,B4220QDS", "fsl,T1024QDS", + "fsl,T1024RDB", "fsl,T1040QDS", "fsl,T1042QDS", "fsl,T1040RDB", -- cgit v1.2.3 From 65bf2a057021b009ff9924b59203993bdff82ecc Mon Sep 17 00:00:00 2001 From: Shengzhou Liu Date: Thu, 9 Apr 2015 16:07:45 +0800 Subject: powerpc/fsl-booke: Add T1023 RDB board support T1023RDB is a Freescale Reference Design Board that hosts T1023 SoC. T1023RDB board Overview ----------------------- - T1023 SoC integrating two 64-bit e5500 cores up to 1.4GHz - CoreNet fabric supporting coherent and noncoherent transactions with prioritization and bandwidth allocation - Memory: 2GB Micron MT40A512M8HX unbuffered 32-bit fixed DDR4 without ECC - Accelerator: DPAA components consist of FMan, BMan, QMan, DCE and SEC - Ethernet interfaces: - one 1G RGMII port on-board(RTL8211F PHY) - one 1G SGMII port on-board(RTL8211F PHY) - one 2.5G SGMII port on-board(AQR105 PHY) - PCIe: Two Mini-PCIe connectors on-board. - SerDes: 4 lanes up to 10.3125GHz - NOR: 128MB S29GL01GS110TFIV10 Spansion NOR Flash - NAND: 512MB S34MS04G200BFI000 Spansion NAND Flash - eSPI: 64MB S25FL512SAGMFI010 Spansion SPI flash - USB: one Type-A USB 2.0 port with internal PHY - eSDHC: support SD/MMC card and eMMC flash on-board - 256Kbit M24256 I2C EEPROM - RTC: Real-time clock DS1339 on I2C bus - UART: one serial port on-board with RJ45 connector - Debugging: JTAG/COP for T1023 debugging Signed-off-by: Shengzhou Liu Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/t1023rdb.dts | 151 ++++++++++++++++++++++++++ arch/powerpc/platforms/85xx/corenet_generic.c | 1 + 2 files changed, 152 insertions(+) create mode 100644 arch/powerpc/boot/dts/t1023rdb.dts (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/t1023rdb.dts b/arch/powerpc/boot/dts/t1023rdb.dts new file mode 100644 index 000000000000..06b090aba066 --- /dev/null +++ b/arch/powerpc/boot/dts/t1023rdb.dts @@ -0,0 +1,151 @@ +/* + * T1023 RDB Device Tree Source + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/t102xsi-pre.dtsi" + +/ { + model = "fsl,T1023RDB"; + compatible = "fsl,T1023RDB"; + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mpic>; + + ifc: localbus@ffe124000 { + reg = <0xf 0xfe124000 0 0x2000>; + ranges = <0 0 0xf 0xe8000000 0x08000000 + 1 0 0xf 0xff800000 0x00010000>; + + nor@0,0 { + #address-cells = <1>; + #size-cells = <1>; + status = "disabled"; + compatible = "cfi-flash"; + reg = <0x0 0x0 0x8000000>; + bank-width = <2>; + device-width = <1>; + }; + + nand@1,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,ifc-nand"; + reg = <0x2 0x0 0x10000>; + }; + }; + + memory { + device_type = "memory"; + }; + + dcsr: dcsr@f00000000 { + ranges = <0x00000000 0xf 0x00000000 0x01072000>; + }; + + soc: soc@ffe000000 { + ranges = <0x00000000 0xf 0xfe000000 0x1000000>; + reg = <0xf 0xfe000000 0 0x00001000>; + spi@110000 { + flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "spansion,s25fl512s"; + reg = <0>; + spi-max-frequency = <10000000>; /* input clk */ + }; + }; + + i2c@118000 { + eeprom@50 { + compatible = "st,m24256"; + reg = <0x50>; + }; + + rtc@68 { + compatible = "dallas,ds1339"; + reg = <0x68>; + interrupts = <0x5 0x1 0 0>; + }; + }; + + i2c@118100 { + }; + }; + + pci0: pcie@ffe240000 { + reg = <0xf 0xfe240000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8000000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci1: pcie@ffe250000 { + reg = <0xf 0xfe250000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8010000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci2: pcie@ffe260000 { + reg = <0xf 0xfe260000 0 0x10000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000 + 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x10000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; +}; + +/include/ "fsl/t1023si-post.dtsi" diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index d7c1b4d4254a..bd839dc287fe 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -150,6 +150,7 @@ static const char * const boards[] __initconst = { "fsl,B4860QDS", "fsl,B4420QDS", "fsl,B4220QDS", + "fsl,T1023RDB", "fsl,T1024QDS", "fsl,T1024RDB", "fsl,T1040QDS", -- cgit v1.2.3 From 9b6179dc1e1a481026ce7b9222275727ec76aef4 Mon Sep 17 00:00:00 2001 From: Igal Liberman Date: Thu, 16 Apr 2015 14:53:36 +0300 Subject: powerpc/dts: Fix incorrect clock-names property Signed-off-by: Igal Liberman Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/fsl/t2081si-post.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi index 86bdaf6cbd14..c9ad929adaf8 100644 --- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi @@ -417,7 +417,7 @@ compatible = "fsl,qoriq-core-mux-2.0"; clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>, <&pll1 0>, <&pll1 1>, <&pll1 2>; - clock-names = "pll0", "pll0-div2", "pll1-div4", + clock-names = "pll0", "pll0-div2", "pll0-div4", "pll1", "pll1-div2", "pll1-div4"; clock-output-names = "cmux0"; }; @@ -428,7 +428,7 @@ compatible = "fsl,qoriq-core-mux-2.0"; clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>, <&pll1 0>, <&pll1 1>, <&pll1 2>; - clock-names = "pll0", "pll0-div2", "pll1-div4", + clock-names = "pll0", "pll0-div2", "pll0-div4", "pll1", "pll1-div2", "pll1-div4"; clock-output-names = "cmux1"; }; -- cgit v1.2.3 From 86c3b16e9f330c94062c40302ab266f0d932057e Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Fri, 17 Apr 2015 18:37:17 +0200 Subject: powerpc/8xx: mmu_virtual_psize incorrect for 16k pages mmu_virtual_psize shall be set to MMU_PAGE_16K when 16k pages have been selected Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/mmu-8xx.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 986b9e1e1044..d41200c01d85 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -145,7 +145,14 @@ typedef struct { } mm_context_t; #endif /* !__ASSEMBLY__ */ +#if (PAGE_SHIFT == 12) #define mmu_virtual_psize MMU_PAGE_4K +#elif (PAGE_SHIFT == 14) +#define mmu_virtual_psize MMU_PAGE_16K +#else +#error "Unsupported PAGE_SIZE" +#endif + #define mmu_linear_psize MMU_PAGE_8M #endif /* _ASM_POWERPC_MMU_8XX_H_ */ -- cgit v1.2.3 From 6c0cc62715bfd0b26f7d1a79e6e8143085950ca7 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 17 Apr 2015 16:17:14 -0500 Subject: powerpc/mm: Use PFN_PHYS() in devmem_is_allowed() This function can run on systems where physical addresses don't fit in unsigned long, so make sure to use the macro that contains the proper cast. Signed-off-by: Scott Wood --- arch/powerpc/mm/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 45fda71feb27..0f11819d8f1d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -560,7 +560,7 @@ subsys_initcall(add_system_ram_resources); */ int devmem_is_allowed(unsigned long pfn) { - if (iomem_is_exclusive(pfn << PAGE_SHIFT)) + if (iomem_is_exclusive(PFN_PHYS(pfn))) return 0; if (!page_is_ram(pfn)) return 1; -- cgit v1.2.3 From 7f6972a0d018daacef3b31090057771bc657f18a Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Fri, 17 Apr 2015 17:53:07 -0500 Subject: powerpc/mpc85xx: Add FSL QorIQ DPAA QMan support to device tree(s) Signed-off-by: Kumar Gala Signed-off-by: Geoff Thorpe Signed-off-by: Hai-Ying Wang Signed-off-by: Chunhe Lan Signed-off-by: Poonam Aggrwal [Emil Medve: Sync with the upstream binding] Signed-off-by: Emil Medve [Scott Wood: s/fsl,qman-channel-id/cell-index] Signed-off-by: Scott Wood Cc: Madalin-Cristian Bucur --- arch/powerpc/boot/dts/b4qds.dtsi | 12 + arch/powerpc/boot/dts/fsl/b4860si-post.dtsi | 69 +++++ arch/powerpc/boot/dts/fsl/b4si-post.dtsi | 106 +++++++ arch/powerpc/boot/dts/fsl/p1023si-post.dtsi | 43 +++ arch/powerpc/boot/dts/fsl/p2041si-post.dtsi | 13 + arch/powerpc/boot/dts/fsl/p3041si-post.dtsi | 13 + arch/powerpc/boot/dts/fsl/p4080si-post.dtsi | 13 + arch/powerpc/boot/dts/fsl/p5020si-post.dtsi | 13 + arch/powerpc/boot/dts/fsl/p5040si-post.dtsi | 13 + arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi | 20 +- arch/powerpc/boot/dts/fsl/t1040si-post.dtsi | 78 +++++ arch/powerpc/boot/dts/fsl/t2081si-post.dtsi | 126 ++++++++ arch/powerpc/boot/dts/fsl/t4240si-post.dtsi | 318 +++++++++++++++++++++ arch/powerpc/boot/dts/kmcoge4.dts | 12 + arch/powerpc/boot/dts/oca4080.dts | 12 + arch/powerpc/boot/dts/p1023rdb.dts | 12 + arch/powerpc/boot/dts/p2041rdb.dts | 12 + arch/powerpc/boot/dts/p3041ds.dts | 12 + arch/powerpc/boot/dts/p4080ds.dts | 12 + arch/powerpc/boot/dts/p5020ds.dts | 12 + arch/powerpc/boot/dts/p5040ds.dts | 12 + arch/powerpc/boot/dts/t104xqds.dtsi | 12 + arch/powerpc/boot/dts/t104xrdb.dtsi | 12 + arch/powerpc/boot/dts/t208xqds.dtsi | 12 + arch/powerpc/boot/dts/t208xrdb.dtsi | 12 + arch/powerpc/boot/dts/t4240qds.dts | 12 + arch/powerpc/boot/dts/t4240rdb.dts | 12 + 27 files changed, 995 insertions(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/boot/dts/b4qds.dtsi b/arch/powerpc/boot/dts/b4qds.dtsi index 24ed80dc2120..559d00657fb5 100644 --- a/arch/powerpc/boot/dts/b4qds.dtsi +++ b/arch/powerpc/boot/dts/b4qds.dtsi @@ -106,6 +106,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -116,6 +124,10 @@ ranges = <0x0 0xf 0xf4000000 0x2000000>; }; + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi index 68b9a0536485..9ba904be39ee 100644 --- a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi @@ -167,6 +167,75 @@ }; }; +&qportals { + qportal14: qman-portal@38000 { + compatible = "fsl,qman-portal"; + reg = <0x38000 0x4000>, <0x100e000 0x1000>; + interrupts = <132 0x2 0 0>; + cell-index = <0xe>; + }; + qportal15: qman-portal@3c000 { + compatible = "fsl,qman-portal"; + reg = <0x3c000 0x4000>, <0x100f000 0x1000>; + interrupts = <134 0x2 0 0>; + cell-index = <0xf>; + }; + qportal16: qman-portal@40000 { + compatible = "fsl,qman-portal"; + reg = <0x40000 0x4000>, <0x1010000 0x1000>; + interrupts = <136 0x2 0 0>; + cell-index = <0x10>; + }; + qportal17: qman-portal@44000 { + compatible = "fsl,qman-portal"; + reg = <0x44000 0x4000>, <0x1011000 0x1000>; + interrupts = <138 0x2 0 0>; + cell-index = <0x11>; + }; + qportal18: qman-portal@48000 { + compatible = "fsl,qman-portal"; + reg = <0x48000 0x4000>, <0x1012000 0x1000>; + interrupts = <140 0x2 0 0>; + cell-index = <0x12>; + }; + qportal19: qman-portal@4c000 { + compatible = "fsl,qman-portal"; + reg = <0x4c000 0x4000>, <0x1013000 0x1000>; + interrupts = <142 0x2 0 0>; + cell-index = <0x13>; + }; + qportal20: qman-portal@50000 { + compatible = "fsl,qman-portal"; + reg = <0x50000 0x4000>, <0x1014000 0x1000>; + interrupts = <144 0x2 0 0>; + cell-index = <0x14>; + }; + qportal21: qman-portal@54000 { + compatible = "fsl,qman-portal"; + reg = <0x54000 0x4000>, <0x1015000 0x1000>; + interrupts = <146 0x2 0 0>; + cell-index = <0x15>; + }; + qportal22: qman-portal@58000 { + compatible = "fsl,qman-portal"; + reg = <0x58000 0x4000>, <0x1016000 0x1000>; + interrupts = <148 0x2 0 0>; + cell-index = <0x16>; + }; + qportal23: qman-portal@5c000 { + compatible = "fsl,qman-portal"; + reg = <0x5c000 0x4000>, <0x1017000 0x1000>; + interrupts = <150 0x2 0 0>; + cell-index = <0x17>; + }; + qportal24: qman-portal@60000 { + compatible = "fsl,qman-portal"; + reg = <0x60000 0x4000>, <0x1018000 0x1000>; + interrupts = <152 0x2 0 0>; + cell-index = <0x18>; + }; +}; + &soc { ddr2: memory-controller@9000 { compatible = "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-controller"; diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi index c6bab55bb065..603910ac1db0 100644 --- a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10000 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10000 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10000 0>; +}; + &ifc { #address-cells = <2>; #size-cells = <1>; @@ -210,6 +220,97 @@ }; }; +&qportals { + #address-cells = <0x1>; + #size-cells = <0x1>; + compatible = "simple-bus"; + + qportal0: qman-portal@0 { + compatible = "fsl,qman-portal"; + reg = <0x0 0x4000>, <0x1000000 0x1000>; + interrupts = <104 0x2 0 0>; + cell-index = <0x0>; + }; + qportal1: qman-portal@4000 { + compatible = "fsl,qman-portal"; + reg = <0x4000 0x4000>, <0x1001000 0x1000>; + interrupts = <106 0x2 0 0>; + cell-index = <0x1>; + }; + qportal2: qman-portal@8000 { + compatible = "fsl,qman-portal"; + reg = <0x8000 0x4000>, <0x1002000 0x1000>; + interrupts = <108 0x2 0 0>; + cell-index = <0x2>; + }; + qportal3: qman-portal@c000 { + compatible = "fsl,qman-portal"; + reg = <0xc000 0x4000>, <0x1003000 0x1000>; + interrupts = <110 0x2 0 0>; + cell-index = <0x3>; + }; + qportal4: qman-portal@10000 { + compatible = "fsl,qman-portal"; + reg = <0x10000 0x4000>, <0x1004000 0x1000>; + interrupts = <112 0x2 0 0>; + cell-index = <0x4>; + }; + qportal5: qman-portal@14000 { + compatible = "fsl,qman-portal"; + reg = <0x14000 0x4000>, <0x1005000 0x1000>; + interrupts = <114 0x2 0 0>; + cell-index = <0x5>; + }; + qportal6: qman-portal@18000 { + compatible = "fsl,qman-portal"; + reg = <0x18000 0x4000>, <0x1006000 0x1000>; + interrupts = <116 0x2 0 0>; + cell-index = <0x6>; + }; + qportal7: qman-portal@1c000 { + compatible = "fsl,qman-portal"; + reg = <0x1c000 0x4000>, <0x1007000 0x1000>; + interrupts = <118 0x2 0 0>; + cell-index = <0x7>; + }; + qportal8: qman-portal@20000 { + compatible = "fsl,qman-portal"; + reg = <0x20000 0x4000>, <0x1008000 0x1000>; + interrupts = <120 0x2 0 0>; + cell-index = <0x8>; + }; + qportal9: qman-portal@24000 { + compatible = "fsl,qman-portal"; + reg = <0x24000 0x4000>, <0x1009000 0x1000>; + interrupts = <122 0x2 0 0>; + cell-index = <0x9>; + }; + qportal10: qman-portal@28000 { + compatible = "fsl,qman-portal"; + reg = <0x28000 0x4000>, <0x100a000 0x1000>; + interrupts = <124 0x2 0 0>; + cell-index = <0xa>; + }; + qportal11: qman-portal@2c000 { + compatible = "fsl,qman-portal"; + reg = <0x2c000 0x4000>, <0x100b000 0x1000>; + interrupts = <126 0x2 0 0>; + cell-index = <0xb>; + }; + qportal12: qman-portal@30000 { + compatible = "fsl,qman-portal"; + reg = <0x30000 0x4000>, <0x100c000 0x1000>; + interrupts = <128 0x2 0 0>; + cell-index = <0xc>; + }; + qportal13: qman-portal@34000 { + compatible = "fsl,qman-portal"; + reg = <0x34000 0x4000>, <0x100d000 0x1000>; + interrupts = <130 0x2 0 0>; + cell-index = <0xd>; + }; +}; + &soc { #address-cells = <1>; #size-cells = <1>; @@ -355,6 +456,11 @@ /include/ "qoriq-duart-1.dtsi" /include/ "qoriq-sec5.3-0.dtsi" +/include/ "qoriq-qman3.dtsi" + qman: qman@318000 { + interrupts = <16 2 1 28>; + }; + /include/ "qoriq-bman1.dtsi" bman: bman@31a000 { interrupts = <16 2 1 29>; diff --git a/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi index 7780f21430cb..da6d3fc6ba41 100644 --- a/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10 0>; +}; + &lbc { #address-cells = <2>; #size-cells = <1>; @@ -102,6 +112,31 @@ }; }; +&qportals { + #address-cells = <1>; + #size-cells = <1>; + compatible = "simple-bus"; + + qportal0: qman-portal@0 { + compatible = "fsl,qman-portal"; + reg = <0x0 0x4000>, <0x100000 0x1000>; + interrupts = <29 2 0 0>; + cell-index = <0>; + }; + qportal1: qman-portal@4000 { + compatible = "fsl,qman-portal"; + reg = <0x4000 0x4000>, <0x101000 0x1000>; + interrupts = <31 2 0 0>; + cell-index = <1>; + }; + qportal2: qman-portal@8000 { + compatible = "fsl,qman-portal"; + reg = <0x8000 0x4000>, <0x102000 0x1000>; + interrupts = <33 2 0 0>; + cell-index = <2>; + }; +}; + &bportals { #address-cells = <1>; #size-cells = <1>; @@ -248,6 +283,14 @@ /include/ "pq3-mpic.dtsi" /include/ "pq3-mpic-timer-B.dtsi" + qman: qman@88000 { + compatible = "fsl,qman"; + reg = <0x88000 0x1000>; + interrupts = <16 2 0 0>; + fsl,qman-portals = <&qportals>; + memory-region = <&qman_fqd &qman_pfdr>; + }; + bman: bman@8a000 { compatible = "fsl,bman"; reg = <0x8a000 0x1000>; diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi index f2feacfd9a25..1f18b8bac1d2 100644 --- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10 0>; +}; + &lbc { compatible = "fsl,p2041-elbc", "fsl,elbc", "simple-bus"; interrupts = <25 2 0 0>; @@ -223,6 +233,8 @@ /include/ "qoriq-bman1-portals.dtsi" +/include/ "qoriq-qman1-portals.dtsi" + &soc { #address-cells = <1>; #size-cells = <1>; @@ -415,5 +427,6 @@ crypto: crypto@300000 { fsl,iommu-parent = <&pamu1>; }; +/include/ "qoriq-qman1.dtsi" /include/ "qoriq-bman1.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi index d6fea37395ad..a555d2491197 100644 --- a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10 0>; +}; + &lbc { compatible = "fsl,p3041-elbc", "fsl,elbc", "simple-bus"; interrupts = <25 2 0 0>; @@ -250,6 +260,8 @@ /include/ "qoriq-bman1-portals.dtsi" +/include/ "qoriq-qman1-portals.dtsi" + &soc { #address-cells = <1>; #size-cells = <1>; @@ -442,5 +454,6 @@ crypto: crypto@300000 { fsl,iommu-parent = <&pamu1>; }; +/include/ "qoriq-qman1.dtsi" /include/ "qoriq-bman1.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi index 89482c9b2301..0fe728113197 100644 --- a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10 0>; +}; + &lbc { compatible = "fsl,p4080-elbc", "fsl,elbc", "simple-bus"; interrupts = <25 2 0 0>; @@ -250,6 +260,8 @@ /include/ "qoriq-bman1-portals.dtsi" +/include/ "qoriq-qman1-portals.dtsi" + &soc { #address-cells = <1>; #size-cells = <1>; @@ -498,5 +510,6 @@ crypto: crypto@300000 { fsl,iommu-parent = <&pamu1>; }; +/include/ "qoriq-qman1.dtsi" /include/ "qoriq-bman1.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi index 6e04851e2fc9..a34ca200d8fb 100644 --- a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10000 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10000 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10000 0>; +}; + &lbc { compatible = "fsl,p5020-elbc", "fsl,elbc", "simple-bus"; interrupts = <25 2 0 0>; @@ -247,6 +257,8 @@ /include/ "qoriq-bman1-portals.dtsi" +/include/ "qoriq-qman1-portals.dtsi" + &soc { #address-cells = <1>; #size-cells = <1>; @@ -428,6 +440,7 @@ fsl,iommu-parent = <&pamu1>; }; +/include/ "qoriq-qman1.dtsi" /include/ "qoriq-bman1.dtsi" /include/ "qoriq-raid1.0-0.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi index 5e44dfa1e1a5..bf575132c309 100644 --- a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10000 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10000 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10000 0>; +}; + &lbc { compatible = "fsl,p5040-elbc", "fsl,elbc", "simple-bus"; interrupts = <25 2 0 0>; @@ -202,6 +212,8 @@ /include/ "qoriq-bman1-portals.dtsi" +/include/ "qoriq-qman1-portals.dtsi" + &soc { #address-cells = <1>; #size-cells = <1>; @@ -407,5 +419,6 @@ fsl,iommu-parent = <&pamu4>; }; +/include/ "qoriq-qman1.dtsi" /include/ "qoriq-bman1.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi index 05d51acafa67..e77e4b4ed53b 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi @@ -41,61 +41,61 @@ compatible = "fsl,qman-portal"; reg = <0x0 0x4000>, <0x100000 0x1000>; interrupts = <104 2 0 0>; - fsl,qman-channel-id = <0x0>; + cell-index = <0x0>; }; qportal1: qman-portal@4000 { compatible = "fsl,qman-portal"; reg = <0x4000 0x4000>, <0x101000 0x1000>; interrupts = <106 2 0 0>; - fsl,qman-channel-id = <1>; + cell-index = <1>; }; qportal2: qman-portal@8000 { compatible = "fsl,qman-portal"; reg = <0x8000 0x4000>, <0x102000 0x1000>; interrupts = <108 2 0 0>; - fsl,qman-channel-id = <2>; + cell-index = <2>; }; qportal3: qman-portal@c000 { compatible = "fsl,qman-portal"; reg = <0xc000 0x4000>, <0x103000 0x1000>; interrupts = <110 2 0 0>; - fsl,qman-channel-id = <3>; + cell-index = <3>; }; qportal4: qman-portal@10000 { compatible = "fsl,qman-portal"; reg = <0x10000 0x4000>, <0x104000 0x1000>; interrupts = <112 2 0 0>; - fsl,qman-channel-id = <4>; + cell-index = <4>; }; qportal5: qman-portal@14000 { compatible = "fsl,qman-portal"; reg = <0x14000 0x4000>, <0x105000 0x1000>; interrupts = <114 2 0 0>; - fsl,qman-channel-id = <5>; + cell-index = <5>; }; qportal6: qman-portal@18000 { compatible = "fsl,qman-portal"; reg = <0x18000 0x4000>, <0x106000 0x1000>; interrupts = <116 2 0 0>; - fsl,qman-channel-id = <6>; + cell-index = <6>; }; qportal7: qman-portal@1c000 { compatible = "fsl,qman-portal"; reg = <0x1c000 0x4000>, <0x107000 0x1000>; interrupts = <118 2 0 0>; - fsl,qman-channel-id = <7>; + cell-index = <7>; }; qportal8: qman-portal@20000 { compatible = "fsl,qman-portal"; reg = <0x20000 0x4000>, <0x108000 0x1000>; interrupts = <120 2 0 0>; - fsl,qman-channel-id = <8>; + cell-index = <8>; }; qportal9: qman-portal@24000 { compatible = "fsl,qman-portal"; reg = <0x24000 0x4000>, <0x109000 0x1000>; interrupts = <122 2 0 0>; - fsl,qman-channel-id = <9>; + cell-index = <9>; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi index 5cc01be5b152..9e9f7e201d43 100644 --- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10000 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10000 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10000 0>; +}; + &ifc { #address-cells = <2>; #size-cells = <1>; @@ -280,6 +290,73 @@ }; }; +&qportals { + #address-cells = <0x1>; + #size-cells = <0x1>; + compatible = "simple-bus"; + + qportal0: qman-portal@0 { + compatible = "fsl,qman-portal"; + reg = <0x0 0x4000>, <0x1000000 0x1000>; + interrupts = <104 0x2 0 0>; + cell-index = <0x0>; + }; + qportal1: qman-portal@4000 { + compatible = "fsl,qman-portal"; + reg = <0x4000 0x4000>, <0x1001000 0x1000>; + interrupts = <106 0x2 0 0>; + cell-index = <0x1>; + }; + qportal2: qman-portal@8000 { + compatible = "fsl,qman-portal"; + reg = <0x8000 0x4000>, <0x1002000 0x1000>; + interrupts = <108 0x2 0 0>; + cell-index = <0x2>; + }; + qportal3: qman-portal@c000 { + compatible = "fsl,qman-portal"; + reg = <0xc000 0x4000>, <0x1003000 0x1000>; + interrupts = <110 0x2 0 0>; + cell-index = <0x3>; + }; + qportal4: qman-portal@10000 { + compatible = "fsl,qman-portal"; + reg = <0x10000 0x4000>, <0x1004000 0x1000>; + interrupts = <112 0x2 0 0>; + cell-index = <0x4>; + }; + qportal5: qman-portal@14000 { + compatible = "fsl,qman-portal"; + reg = <0x14000 0x4000>, <0x1005000 0x1000>; + interrupts = <114 0x2 0 0>; + cell-index = <0x5>; + }; + qportal6: qman-portal@18000 { + compatible = "fsl,qman-portal"; + reg = <0x18000 0x4000>, <0x1006000 0x1000>; + interrupts = <116 0x2 0 0>; + cell-index = <0x6>; + }; + qportal7: qman-portal@1c000 { + compatible = "fsl,qman-portal"; + reg = <0x1c000 0x4000>, <0x1007000 0x1000>; + interrupts = <118 0x2 0 0>; + cell-index = <0x7>; + }; + qportal8: qman-portal@20000 { + compatible = "fsl,qman-portal"; + reg = <0x20000 0x4000>, <0x1008000 0x1000>; + interrupts = <120 0x2 0 0>; + cell-index = <0x8>; + }; + qportal9: qman-portal@24000 { + compatible = "fsl,qman-portal"; + reg = <0x24000 0x4000>, <0x1009000 0x1000>; + interrupts = <122 0x2 0 0>; + cell-index = <0x9>; + }; +}; + &soc { #address-cells = <1>; #size-cells = <1>; @@ -463,5 +540,6 @@ fsl,liodn-reg = <&guts 0x554>; /* SATA2LIODNR */ }; /include/ "qoriq-sec5.0-0.dtsi" +/include/ "qoriq-qman3.dtsi" /include/ "qoriq-bman1.dtsi" }; diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi index c9ad929adaf8..32c790ae7fde 100644 --- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10000 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10000 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10000 0>; +}; + &ifc { #address-cells = <2>; #size-cells = <1>; @@ -326,6 +336,121 @@ }; }; +&qportals { + #address-cells = <0x1>; + #size-cells = <0x1>; + compatible = "simple-bus"; + + qportal0: qman-portal@0 { + compatible = "fsl,qman-portal"; + reg = <0x0 0x4000>, <0x1000000 0x1000>; + interrupts = <104 0x2 0 0>; + cell-index = <0x0>; + }; + qportal1: qman-portal@4000 { + compatible = "fsl,qman-portal"; + reg = <0x4000 0x4000>, <0x1001000 0x1000>; + interrupts = <106 0x2 0 0>; + cell-index = <0x1>; + }; + qportal2: qman-portal@8000 { + compatible = "fsl,qman-portal"; + reg = <0x8000 0x4000>, <0x1002000 0x1000>; + interrupts = <108 0x2 0 0>; + cell-index = <0x2>; + }; + qportal3: qman-portal@c000 { + compatible = "fsl,qman-portal"; + reg = <0xc000 0x4000>, <0x1003000 0x1000>; + interrupts = <110 0x2 0 0>; + cell-index = <0x3>; + }; + qportal4: qman-portal@10000 { + compatible = "fsl,qman-portal"; + reg = <0x10000 0x4000>, <0x1004000 0x1000>; + interrupts = <112 0x2 0 0>; + cell-index = <0x4>; + }; + qportal5: qman-portal@14000 { + compatible = "fsl,qman-portal"; + reg = <0x14000 0x4000>, <0x1005000 0x1000>; + interrupts = <114 0x2 0 0>; + cell-index = <0x5>; + }; + qportal6: qman-portal@18000 { + compatible = "fsl,qman-portal"; + reg = <0x18000 0x4000>, <0x1006000 0x1000>; + interrupts = <116 0x2 0 0>; + cell-index = <0x6>; + }; + qportal7: qman-portal@1c000 { + compatible = "fsl,qman-portal"; + reg = <0x1c000 0x4000>, <0x1007000 0x1000>; + interrupts = <118 0x2 0 0>; + cell-index = <0x7>; + }; + qportal8: qman-portal@20000 { + compatible = "fsl,qman-portal"; + reg = <0x20000 0x4000>, <0x1008000 0x1000>; + interrupts = <120 0x2 0 0>; + cell-index = <0x8>; + }; + qportal9: qman-portal@24000 { + compatible = "fsl,qman-portal"; + reg = <0x24000 0x4000>, <0x1009000 0x1000>; + interrupts = <122 0x2 0 0>; + cell-index = <0x9>; + }; + qportal10: qman-portal@28000 { + compatible = "fsl,qman-portal"; + reg = <0x28000 0x4000>, <0x100a000 0x1000>; + interrupts = <124 0x2 0 0>; + cell-index = <0xa>; + }; + qportal11: qman-portal@2c000 { + compatible = "fsl,qman-portal"; + reg = <0x2c000 0x4000>, <0x100b000 0x1000>; + interrupts = <126 0x2 0 0>; + cell-index = <0xb>; + }; + qportal12: qman-portal@30000 { + compatible = "fsl,qman-portal"; + reg = <0x30000 0x4000>, <0x100c000 0x1000>; + interrupts = <128 0x2 0 0>; + cell-index = <0xc>; + }; + qportal13: qman-portal@34000 { + compatible = "fsl,qman-portal"; + reg = <0x34000 0x4000>, <0x100d000 0x1000>; + interrupts = <130 0x2 0 0>; + cell-index = <0xd>; + }; + qportal14: qman-portal@38000 { + compatible = "fsl,qman-portal"; + reg = <0x38000 0x4000>, <0x100e000 0x1000>; + interrupts = <132 0x2 0 0>; + cell-index = <0xe>; + }; + qportal15: qman-portal@3c000 { + compatible = "fsl,qman-portal"; + reg = <0x3c000 0x4000>, <0x100f000 0x1000>; + interrupts = <134 0x2 0 0>; + cell-index = <0xf>; + }; + qportal16: qman-portal@40000 { + compatible = "fsl,qman-portal"; + reg = <0x40000 0x4000>, <0x1010000 0x1000>; + interrupts = <136 0x2 0 0>; + cell-index = <0x10>; + }; + qportal17: qman-portal@44000 { + compatible = "fsl,qman-portal"; + reg = <0x44000 0x4000>, <0x1011000 0x1000>; + interrupts = <138 0x2 0 0>; + cell-index = <0x11>; + }; +}; + &soc { #address-cells = <1>; #size-cells = <1>; @@ -502,6 +627,7 @@ phy_type = "utmi"; }; /include/ "qoriq-sec5.2-0.dtsi" +/include/ "qoriq-qman3.dtsi" /include/ "qoriq-bman1.dtsi" L2_1: l2-cache-controller@c20000 { diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi index 4d4f25895d8c..d806360d0f64 100644 --- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi @@ -37,6 +37,16 @@ alloc-ranges = <0 0 0x10000 0>; }; +&qman_fqd { + compatible = "fsl,qman-fqd"; + alloc-ranges = <0 0 0x10000 0>; +}; + +&qman_pfdr { + compatible = "fsl,qman-pfdr"; + alloc-ranges = <0 0 0x10000 0>; +}; + &ifc { #address-cells = <2>; #size-cells = <1>; @@ -556,6 +566,313 @@ }; }; +&qportals { + #address-cells = <0x1>; + #size-cells = <0x1>; + compatible = "simple-bus"; + + qportal0: qman-portal@0 { + compatible = "fsl,qman-portal"; + reg = <0x0 0x4000>, <0x1000000 0x1000>; + interrupts = <104 0x2 0 0>; + cell-index = <0x0>; + }; + qportal1: qman-portal@4000 { + compatible = "fsl,qman-portal"; + reg = <0x4000 0x4000>, <0x1001000 0x1000>; + interrupts = <106 0x2 0 0>; + cell-index = <0x1>; + }; + qportal2: qman-portal@8000 { + compatible = "fsl,qman-portal"; + reg = <0x8000 0x4000>, <0x1002000 0x1000>; + interrupts = <108 0x2 0 0>; + cell-index = <0x2>; + }; + qportal3: qman-portal@c000 { + compatible = "fsl,qman-portal"; + reg = <0xc000 0x4000>, <0x1003000 0x1000>; + interrupts = <110 0x2 0 0>; + cell-index = <0x3>; + }; + qportal4: qman-portal@10000 { + compatible = "fsl,qman-portal"; + reg = <0x10000 0x4000>, <0x1004000 0x1000>; + interrupts = <112 0x2 0 0>; + cell-index = <0x4>; + }; + qportal5: qman-portal@14000 { + compatible = "fsl,qman-portal"; + reg = <0x14000 0x4000>, <0x1005000 0x1000>; + interrupts = <114 0x2 0 0>; + cell-index = <0x5>; + }; + qportal6: qman-portal@18000 { + compatible = "fsl,qman-portal"; + reg = <0x18000 0x4000>, <0x1006000 0x1000>; + interrupts = <116 0x2 0 0>; + cell-index = <0x6>; + }; + qportal7: qman-portal@1c000 { + compatible = "fsl,qman-portal"; + reg = <0x1c000 0x4000>, <0x1007000 0x1000>; + interrupts = <118 0x2 0 0>; + cell-index = <0x7>; + }; + qportal8: qman-portal@20000 { + compatible = "fsl,qman-portal"; + reg = <0x20000 0x4000>, <0x1008000 0x1000>; + interrupts = <120 0x2 0 0>; + cell-index = <0x8>; + }; + qportal9: qman-portal@24000 { + compatible = "fsl,qman-portal"; + reg = <0x24000 0x4000>, <0x1009000 0x1000>; + interrupts = <122 0x2 0 0>; + cell-index = <0x9>; + }; + qportal10: qman-portal@28000 { + compatible = "fsl,qman-portal"; + reg = <0x28000 0x4000>, <0x100a000 0x1000>; + interrupts = <124 0x2 0 0>; + cell-index = <0xa>; + }; + qportal11: qman-portal@2c000 { + compatible = "fsl,qman-portal"; + reg = <0x2c000 0x4000>, <0x100b000 0x1000>; + interrupts = <126 0x2 0 0>; + cell-index = <0xb>; + }; + qportal12: qman-portal@30000 { + compatible = "fsl,qman-portal"; + reg = <0x30000 0x4000>, <0x100c000 0x1000>; + interrupts = <128 0x2 0 0>; + cell-index = <0xc>; + }; + qportal13: qman-portal@34000 { + compatible = "fsl,qman-portal"; + reg = <0x34000 0x4000>, <0x100d000 0x1000>; + interrupts = <130 0x2 0 0>; + cell-index = <0xd>; + }; + qportal14: qman-portal@38000 { + compatible = "fsl,qman-portal"; + reg = <0x38000 0x4000>, <0x100e000 0x1000>; + interrupts = <132 0x2 0 0>; + cell-index = <0xe>; + }; + qportal15: qman-portal@3c000 { + compatible = "fsl,qman-portal"; + reg = <0x3c000 0x4000>, <0x100f000 0x1000>; + interrupts = <134 0x2 0 0>; + cell-index = <0xf>; + }; + qportal16: qman-portal@40000 { + compatible = "fsl,qman-portal"; + reg = <0x40000 0x4000>, <0x1010000 0x1000>; + interrupts = <136 0x2 0 0>; + cell-index = <0x10>; + }; + qportal17: qman-portal@44000 { + compatible = "fsl,qman-portal"; + reg = <0x44000 0x4000>, <0x1011000 0x1000>; + interrupts = <138 0x2 0 0>; + cell-index = <0x11>; + }; + qportal18: qman-portal@48000 { + compatible = "fsl,qman-portal"; + reg = <0x48000 0x4000>, <0x1012000 0x1000>; + interrupts = <140 0x2 0 0>; + cell-index = <0x12>; + }; + qportal19: qman-portal@4c000 { + compatible = "fsl,qman-portal"; + reg = <0x4c000 0x4000>, <0x1013000 0x1000>; + interrupts = <142 0x2 0 0>; + cell-index = <0x13>; + }; + qportal20: qman-portal@50000 { + compatible = "fsl,qman-portal"; + reg = <0x50000 0x4000>, <0x1014000 0x1000>; + interrupts = <144 0x2 0 0>; + cell-index = <0x14>; + }; + qportal21: qman-portal@54000 { + compatible = "fsl,qman-portal"; + reg = <0x54000 0x4000>, <0x1015000 0x1000>; + interrupts = <146 0x2 0 0>; + cell-index = <0x15>; + }; + qportal22: qman-portal@58000 { + compatible = "fsl,qman-portal"; + reg = <0x58000 0x4000>, <0x1016000 0x1000>; + interrupts = <148 0x2 0 0>; + cell-index = <0x16>; + }; + qportal23: qman-portal@5c000 { + compatible = "fsl,qman-portal"; + reg = <0x5c000 0x4000>, <0x1017000 0x1000>; + interrupts = <150 0x2 0 0>; + cell-index = <0x17>; + }; + qportal24: qman-portal@60000 { + compatible = "fsl,qman-portal"; + reg = <0x60000 0x4000>, <0x1018000 0x1000>; + interrupts = <152 0x2 0 0>; + cell-index = <0x18>; + }; + qportal25: qman-portal@64000 { + compatible = "fsl,qman-portal"; + reg = <0x64000 0x4000>, <0x1019000 0x1000>; + interrupts = <154 0x2 0 0>; + cell-index = <0x19>; + }; + qportal26: qman-portal@68000 { + compatible = "fsl,qman-portal"; + reg = <0x68000 0x4000>, <0x101a000 0x1000>; + interrupts = <156 0x2 0 0>; + cell-index = <0x1a>; + }; + qportal27: qman-portal@6c000 { + compatible = "fsl,qman-portal"; + reg = <0x6c000 0x4000>, <0x101b000 0x1000>; + interrupts = <158 0x2 0 0>; + cell-index = <0x1b>; + }; + qportal28: qman-portal@70000 { + compatible = "fsl,qman-portal"; + reg = <0x70000 0x4000>, <0x101c000 0x1000>; + interrupts = <160 0x2 0 0>; + cell-index = <0x1c>; + }; + qportal29: qman-portal@74000 { + compatible = "fsl,qman-portal"; + reg = <0x74000 0x4000>, <0x101d000 0x1000>; + interrupts = <162 0x2 0 0>; + cell-index = <0x1d>; + }; + qportal30: qman-portal@78000 { + compatible = "fsl,qman-portal"; + reg = <0x78000 0x4000>, <0x101e000 0x1000>; + interrupts = <164 0x2 0 0>; + cell-index = <0x1e>; + }; + qportal31: qman-portal@7c000 { + compatible = "fsl,qman-portal"; + reg = <0x7c000 0x4000>, <0x101f000 0x1000>; + interrupts = <166 0x2 0 0>; + cell-index = <0x1f>; + }; + qportal32: qman-portal@80000 { + compatible = "fsl,qman-portal"; + reg = <0x80000 0x4000>, <0x1020000 0x1000>; + interrupts = <168 0x2 0 0>; + cell-index = <0x20>; + }; + qportal33: qman-portal@84000 { + compatible = "fsl,qman-portal"; + reg = <0x84000 0x4000>, <0x1021000 0x1000>; + interrupts = <170 0x2 0 0>; + cell-index = <0x21>; + }; + qportal34: qman-portal@88000 { + compatible = "fsl,qman-portal"; + reg = <0x88000 0x4000>, <0x1022000 0x1000>; + interrupts = <172 0x2 0 0>; + cell-index = <0x22>; + }; + qportal35: qman-portal@8c000 { + compatible = "fsl,qman-portal"; + reg = <0x8c000 0x4000>, <0x1023000 0x1000>; + interrupts = <174 0x2 0 0>; + cell-index = <0x23>; + }; + qportal36: qman-portal@90000 { + compatible = "fsl,qman-portal"; + reg = <0x90000 0x4000>, <0x1024000 0x1000>; + interrupts = <384 0x2 0 0>; + cell-index = <0x24>; + }; + qportal37: qman-portal@94000 { + compatible = "fsl,qman-portal"; + reg = <0x94000 0x4000>, <0x1025000 0x1000>; + interrupts = <386 0x2 0 0>; + cell-index = <0x25>; + }; + qportal38: qman-portal@98000 { + compatible = "fsl,qman-portal"; + reg = <0x98000 0x4000>, <0x1026000 0x1000>; + interrupts = <388 0x2 0 0>; + cell-index = <0x26>; + }; + qportal39: qman-portal@9c000 { + compatible = "fsl,qman-portal"; + reg = <0x9c000 0x4000>, <0x1027000 0x1000>; + interrupts = <390 0x2 0 0>; + cell-index = <0x27>; + }; + qportal40: qman-portal@a0000 { + compatible = "fsl,qman-portal"; + reg = <0xa0000 0x4000>, <0x1028000 0x1000>; + interrupts = <392 0x2 0 0>; + cell-index = <0x28>; + }; + qportal41: qman-portal@a4000 { + compatible = "fsl,qman-portal"; + reg = <0xa4000 0x4000>, <0x1029000 0x1000>; + interrupts = <394 0x2 0 0>; + cell-index = <0x29>; + }; + qportal42: qman-portal@a8000 { + compatible = "fsl,qman-portal"; + reg = <0xa8000 0x4000>, <0x102a000 0x1000>; + interrupts = <396 0x2 0 0>; + cell-index = <0x2a>; + }; + qportal43: qman-portal@ac000 { + compatible = "fsl,qman-portal"; + reg = <0xac000 0x4000>, <0x102b000 0x1000>; + interrupts = <398 0x2 0 0>; + cell-index = <0x2b>; + }; + qportal44: qman-portal@b0000 { + compatible = "fsl,qman-portal"; + reg = <0xb0000 0x4000>, <0x102c000 0x1000>; + interrupts = <400 0x2 0 0>; + cell-index = <0x2c>; + }; + qportal45: qman-portal@b4000 { + compatible = "fsl,qman-portal"; + reg = <0xb4000 0x4000>, <0x102d000 0x1000>; + interrupts = <402 0x2 0 0>; + cell-index = <0x2d>; + }; + qportal46: qman-portal@b8000 { + compatible = "fsl,qman-portal"; + reg = <0xb8000 0x4000>, <0x102e000 0x1000>; + interrupts = <404 0x2 0 0>; + cell-index = <0x2e>; + }; + qportal47: qman-portal@bc000 { + compatible = "fsl,qman-portal"; + reg = <0xbc000 0x4000>, <0x102f000 0x1000>; + interrupts = <406 0x2 0 0>; + cell-index = <0x2f>; + }; + qportal48: qman-portal@c0000 { + compatible = "fsl,qman-portal"; + reg = <0xc0000 0x4000>, <0x1030000 0x1000>; + interrupts = <408 0x2 0 0>; + cell-index = <0x30>; + }; + qportal49: qman-portal@c4000 { + compatible = "fsl,qman-portal"; + reg = <0xc4000 0x4000>, <0x1031000 0x1000>; + interrupts = <410 0x2 0 0>; + cell-index = <0x31>; + }; +}; + &soc { #address-cells = <1>; #size-cells = <1>; @@ -748,6 +1065,7 @@ /include/ "qoriq-sata2-0.dtsi" /include/ "qoriq-sata2-1.dtsi" /include/ "qoriq-sec5.0-0.dtsi" +/include/ "qoriq-qman3.dtsi" /include/ "qoriq-bman1.dtsi" L2_1: l2-cache-controller@c20000 { diff --git a/arch/powerpc/boot/dts/kmcoge4.dts b/arch/powerpc/boot/dts/kmcoge4.dts index 97e6d11d1e6d..48dab6a50437 100644 --- a/arch/powerpc/boot/dts/kmcoge4.dts +++ b/arch/powerpc/boot/dts/kmcoge4.dts @@ -34,6 +34,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -44,6 +52,10 @@ ranges = <0x0 0xf 0xf4000000 0x200000>; }; + qportals: qman-portals@ff4200000 { + ranges = <0x0 0xf 0xf4200000 0x200000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/oca4080.dts b/arch/powerpc/boot/dts/oca4080.dts index eb76caae11d9..42796c5b0561 100644 --- a/arch/powerpc/boot/dts/oca4080.dts +++ b/arch/powerpc/boot/dts/oca4080.dts @@ -58,6 +58,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -68,6 +76,10 @@ ranges = <0x0 0xf 0xf4000000 0x200000>; }; + qportals: qman-portals@ff4200000 { + ranges = <0x0 0xf 0xf4200000 0x200000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/p1023rdb.dts b/arch/powerpc/boot/dts/p1023rdb.dts index 9236e3742a23..05a00a4d2861 100644 --- a/arch/powerpc/boot/dts/p1023rdb.dts +++ b/arch/powerpc/boot/dts/p1023rdb.dts @@ -56,6 +56,18 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; + }; + + qportals: qman-portals@ff000000 { + ranges = <0x0 0xf 0xff000000 0x200000>; }; bportals: bman-portals@ff200000 { diff --git a/arch/powerpc/boot/dts/p2041rdb.dts b/arch/powerpc/boot/dts/p2041rdb.dts index c1e69dc7188e..d2bb0765bd5a 100644 --- a/arch/powerpc/boot/dts/p2041rdb.dts +++ b/arch/powerpc/boot/dts/p2041rdb.dts @@ -54,6 +54,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -64,6 +72,10 @@ ranges = <0x0 0xf 0xf4000000 0x200000>; }; + qportals: qman-portals@ff4200000 { + ranges = <0x0 0xf 0xf4200000 0x200000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/p3041ds.dts b/arch/powerpc/boot/dts/p3041ds.dts index 2192fe94866d..eca6c697cfd7 100644 --- a/arch/powerpc/boot/dts/p3041ds.dts +++ b/arch/powerpc/boot/dts/p3041ds.dts @@ -54,6 +54,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -64,6 +72,10 @@ ranges = <0x0 0xf 0xf4000000 0x200000>; }; + qportals: qman-portals@ff4200000 { + ranges = <0x0 0xf 0xf4200000 0x200000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/p4080ds.dts index fad441654642..4f80c9d02c27 100644 --- a/arch/powerpc/boot/dts/p4080ds.dts +++ b/arch/powerpc/boot/dts/p4080ds.dts @@ -54,6 +54,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -64,6 +72,10 @@ ranges = <0x0 0xf 0xf4000000 0x200000>; }; + qportals: qman-portals@ff4200000 { + ranges = <0x0 0xf 0xf4200000 0x200000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/p5020ds.dts b/arch/powerpc/boot/dts/p5020ds.dts index 7382636dc560..d0309a8b9749 100644 --- a/arch/powerpc/boot/dts/p5020ds.dts +++ b/arch/powerpc/boot/dts/p5020ds.dts @@ -54,6 +54,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -64,6 +72,10 @@ ranges = <0x0 0xf 0xf4000000 0x200000>; }; + qportals: qman-portals@ff4200000 { + ranges = <0x0 0xf 0xf4200000 0x200000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/p5040ds.dts b/arch/powerpc/boot/dts/p5040ds.dts index 35dabf5b6098..05168236d3ab 100644 --- a/arch/powerpc/boot/dts/p5040ds.dts +++ b/arch/powerpc/boot/dts/p5040ds.dts @@ -54,6 +54,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -64,6 +72,10 @@ ranges = <0x0 0xf 0xf4000000 0x200000>; }; + qportals: qman-portals@ff4200000 { + ranges = <0x0 0xf 0xf4200000 0x200000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/t104xqds.dtsi b/arch/powerpc/boot/dts/t104xqds.dtsi index f7e9bfbeefc7..1498d1e4aecf 100644 --- a/arch/powerpc/boot/dts/t104xqds.dtsi +++ b/arch/powerpc/boot/dts/t104xqds.dtsi @@ -47,6 +47,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; ifc: localbus@ffe124000 { @@ -92,6 +100,10 @@ ranges = <0x0 0xf 0xf4000000 0x2000000>; }; + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/t104xrdb.dtsi b/arch/powerpc/boot/dts/t104xrdb.dtsi index 76e07a3f2ca8..830ea484295b 100644 --- a/arch/powerpc/boot/dts/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/t104xrdb.dtsi @@ -42,6 +42,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; ifc: localbus@ffe124000 { @@ -83,6 +91,10 @@ ranges = <0x0 0xf 0xf4000000 0x2000000>; }; + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/t208xqds.dtsi b/arch/powerpc/boot/dts/t208xqds.dtsi index c42e07f4f648..869f9159b4d1 100644 --- a/arch/powerpc/boot/dts/t208xqds.dtsi +++ b/arch/powerpc/boot/dts/t208xqds.dtsi @@ -48,6 +48,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; ifc: localbus@ffe124000 { @@ -93,6 +101,10 @@ ranges = <0x0 0xf 0xf4000000 0x2000000>; }; + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/t208xrdb.dtsi b/arch/powerpc/boot/dts/t208xrdb.dtsi index e1463b165d0e..693d2a8fa01c 100644 --- a/arch/powerpc/boot/dts/t208xrdb.dtsi +++ b/arch/powerpc/boot/dts/t208xrdb.dtsi @@ -48,6 +48,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; ifc: localbus@ffe124000 { @@ -94,6 +102,10 @@ ranges = <0x0 0xf 0xf4000000 0x2000000>; }; + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/t4240qds.dts b/arch/powerpc/boot/dts/t4240qds.dts index 6df77766410b..93722da10e16 100644 --- a/arch/powerpc/boot/dts/t4240qds.dts +++ b/arch/powerpc/boot/dts/t4240qds.dts @@ -109,6 +109,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -119,6 +127,10 @@ ranges = <0x0 0xf 0xf4000000 0x2000000>; }; + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; diff --git a/arch/powerpc/boot/dts/t4240rdb.dts b/arch/powerpc/boot/dts/t4240rdb.dts index 46049cf37f02..993eb4b8a487 100644 --- a/arch/powerpc/boot/dts/t4240rdb.dts +++ b/arch/powerpc/boot/dts/t4240rdb.dts @@ -78,6 +78,14 @@ size = <0 0x1000000>; alignment = <0 0x1000000>; }; + qman_fqd: qman-fqd { + size = <0 0x400000>; + alignment = <0 0x400000>; + }; + qman_pfdr: qman-pfdr { + size = <0 0x2000000>; + alignment = <0 0x2000000>; + }; }; dcsr: dcsr@f00000000 { @@ -88,6 +96,10 @@ ranges = <0x0 0xf 0xf4000000 0x2000000>; }; + qportals: qman-portals@ff6000000 { + ranges = <0x0 0xf 0xf6000000 0x2000000>; + }; + soc: soc@ffe000000 { ranges = <0x00000000 0xf 0xfe000000 0x1000000>; reg = <0xf 0xfe000000 0 0x00001000>; -- cgit v1.2.3 From 90883a8255148e98026591e52a3495db18b29905 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 20 Apr 2015 07:54:38 +0200 Subject: powerpc/8xx: macro for handling CPU15 errata Having a macro will help keep clear code. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 9b53fe139bf6..1279018b3749 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -297,6 +297,17 @@ SystemCall: * We have to use the MD_xxx registers for the tablewalk because the * equivalent MI_xxx registers only perform the attribute functions. */ + +#ifdef CONFIG_8xx_CPU15 +#define INVALIDATE_ADJACENT_PAGES_CPU15(tmp, addr) \ + addi tmp, addr, PAGE_SIZE; \ + tlbie tmp; \ + addi tmp, addr, -PAGE_SIZE; \ + tlbie tmp +#else +#define INVALIDATE_ADJACENT_PAGES_CPU15(tmp, addr) +#endif + InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 mtspr SPRN_DAR, r3 @@ -304,12 +315,7 @@ InstructionTLBMiss: EXCEPTION_PROLOG_0 mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ -#ifdef CONFIG_8xx_CPU15 - addi r11, r10, PAGE_SIZE - tlbie r11 - addi r11, r10, -PAGE_SIZE - tlbie r11 -#endif + INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) /* If we are faulting a kernel address, we have to use the * kernel page tables. -- cgit v1.2.3 From d5fd9d7d669e81e024737a198d12ff8af4fd89cf Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 20 Apr 2015 07:54:40 +0200 Subject: powerpc/8xx: Handle CR out of exception PROLOG/EPILOG In order to be able to reduce scope during which CR is saved, we take CR saving/restoring out of exception PROLOG and EPILOG Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 1279018b3749..5a69c5e82363 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -116,13 +116,13 @@ turn_on_mmu: */ #define EXCEPTION_PROLOG \ EXCEPTION_PROLOG_0; \ + mfcr r10; \ EXCEPTION_PROLOG_1; \ EXCEPTION_PROLOG_2 #define EXCEPTION_PROLOG_0 \ mtspr SPRN_SPRG_SCRATCH0,r10; \ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mfcr r10 + mtspr SPRN_SPRG_SCRATCH1,r11 #define EXCEPTION_PROLOG_1 \ mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ @@ -162,7 +162,6 @@ turn_on_mmu: * Exception exit code. */ #define EXCEPTION_EPILOG_0 \ - mtcr r10; \ mfspr r10,SPRN_SPRG_SCRATCH0; \ mfspr r11,SPRN_SPRG_SCRATCH1 @@ -313,6 +312,7 @@ InstructionTLBMiss: mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 + mfcr r10 mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) @@ -363,6 +363,7 @@ InstructionTLBMiss: mtspr SPRN_DAR, r11 /* Tag DAR */ #endif mfspr r10, SPRN_SPRG_SCRATCH2 + mtcr r10 EXCEPTION_EPILOG_0 rfi @@ -372,6 +373,7 @@ DataStoreTLBMiss: mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 + mfcr r10 mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_MD_EPN @@ -437,6 +439,7 @@ DataStoreTLBMiss: #endif mtspr SPRN_DAR, r11 /* Tag DAR */ mfspr r10, SPRN_SPRG_SCRATCH2 + mtcr r10 EXCEPTION_EPILOG_0 rfi @@ -462,6 +465,7 @@ InstructionTLBError: . = 0x1400 DataTLBError: EXCEPTION_PROLOG_0 + mfcr r10 mfspr r11, SPRN_DAR cmpwi cr0, r11, RPN_PATTERN -- cgit v1.2.3 From 2eb2fd95001e93c611034f494c7254350ff94d2a Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 20 Apr 2015 07:54:42 +0200 Subject: powerpc/8xx: dont save CR in SCRATCH registers CR only needs to be preserved when checking if we are handling a kernel address. So we can preserve CR in a register: - In ITLBMiss, check is done only when CONFIG_MODULES is defined. Otherwise we don't need to do anything at all with CR. - We use r10, then we reload SRR0/MD_EPN into r10 when CR is restored Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 5a69c5e82363..150d03f79ad8 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -312,10 +312,6 @@ InstructionTLBMiss: mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 - mfcr r10 - mtspr SPRN_SPRG_SCRATCH2, r10 - mfspr r10, SPRN_SRR0 /* Get effective address of fault */ - INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -323,13 +319,20 @@ InstructionTLBMiss: #ifdef CONFIG_MODULES /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ - andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ -#endif + mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) + mfcr r10 + andis. r11, r11, 0x8000 /* Address >= 0x80000000 */ mfspr r11, SPRN_M_TW /* Get level 1 table */ -#ifdef CONFIG_MODULES beq 3f lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: + mtcr r10 + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ +#else + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ + INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) + mfspr r11, SPRN_M_TW /* Get level 1 table base address */ #endif /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 @@ -362,8 +365,6 @@ InstructionTLBMiss: mfspr r3, SPRN_DAR mtspr SPRN_DAR, r11 /* Tag DAR */ #endif - mfspr r10, SPRN_SPRG_SCRATCH2 - mtcr r10 EXCEPTION_EPILOG_0 rfi @@ -374,17 +375,19 @@ DataStoreTLBMiss: #endif EXCEPTION_PROLOG_0 mfcr r10 - mtspr SPRN_SPRG_SCRATCH2, r10 - mfspr r10, SPRN_MD_EPN /* If we are faulting a kernel address, we have to use the * kernel page tables. */ - andis. r11, r10, 0x8000 + mfspr r11, SPRN_MD_EPN + andis. r11, r11, 0x8000 mfspr r11, SPRN_M_TW /* Get level 1 table */ beq 3f lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: + mtcr r10 + mfspr r10, SPRN_MD_EPN + /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ @@ -438,8 +441,6 @@ DataStoreTLBMiss: mfspr r3, SPRN_DAR #endif mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r10, SPRN_SPRG_SCRATCH2 - mtcr r10 EXCEPTION_EPILOG_0 rfi -- cgit v1.2.3 From b821c5fe84829996e179bba5da30ee33fb8e9f9f Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 20 Apr 2015 07:54:44 +0200 Subject: powerpc/8xx: Use SPRG2 instead of DAR for saving r3 We now have SPRG2 available as in it not used anymore for saving CR, so we don't need to crash DAR anymore for saving r3 for CPU6 ERRATA handling. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 150d03f79ad8..ba2dc53cd5a1 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -309,7 +309,7 @@ SystemCall: InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 - mtspr SPRN_DAR, r3 + mtspr SPRN_SPRG_SCRATCH2, r3 #endif EXCEPTION_PROLOG_0 @@ -362,8 +362,7 @@ InstructionTLBMiss: /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - mfspr r3, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r3, SPRN_SPRG_SCRATCH2 #endif EXCEPTION_EPILOG_0 rfi @@ -371,7 +370,7 @@ InstructionTLBMiss: . = 0x1200 DataStoreTLBMiss: #ifdef CONFIG_8xx_CPU6 - mtspr SPRN_DAR, r3 + mtspr SPRN_SPRG_SCRATCH2, r3 #endif EXCEPTION_PROLOG_0 mfcr r10 @@ -438,7 +437,7 @@ DataStoreTLBMiss: /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - mfspr r3, SPRN_DAR + mfspr r3, SPRN_SPRG_SCRATCH2 #endif mtspr SPRN_DAR, r11 /* Tag DAR */ EXCEPTION_EPILOG_0 -- cgit v1.2.3 From eeba1f7c38425cfc0b70ba133ab7e523d5b6d90e Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 20 Apr 2015 07:54:46 +0200 Subject: powerpc/8xx: Add support for TASK_SIZE greater than 0x80000000 By default, TASK_SIZE is set to 0x80000000 for PPC_8xx, which is most likely sufficient for most cases. However, kernel configuration allows to set TASK_SIZE to another value, so the 8xx shall handle it. This patch also takes into account the case of PAGE_OFFSET lower than 0x80000000, allthought most of the time it is equal to 0xC0000000 Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index ba2dc53cd5a1..c640bbb042b5 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -48,6 +48,19 @@ mtspr spr, reg #endif +/* Macro to test if an address is a kernel address */ +#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 +#define IS_KERNEL(tmp, addr) \ + andis. tmp, addr, 0x8000 /* Address >= 0x80000000 */ +#define BRANCH_UNLESS_KERNEL(label) beq label +#else +#define IS_KERNEL(tmp, addr) \ + rlwinm tmp, addr, 16, 16, 31; \ + cmpli cr0, tmp, PAGE_OFFSET >> 16 +#define BRANCH_UNLESS_KERNEL(label) blt label +#endif + + /* * Value for the bits that have fixed value in RPN entries. * Also used for tagging DAR for DTLBerror. @@ -322,9 +335,9 @@ InstructionTLBMiss: mfspr r11, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) mfcr r10 - andis. r11, r11, 0x8000 /* Address >= 0x80000000 */ + IS_KERNEL(r11, r11) mfspr r11, SPRN_M_TW /* Get level 1 table */ - beq 3f + BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: mtcr r10 @@ -379,9 +392,9 @@ DataStoreTLBMiss: * kernel page tables. */ mfspr r11, SPRN_MD_EPN - andis. r11, r11, 0x8000 + IS_KERNEL(r11, r11) mfspr r11, SPRN_M_TW /* Get level 1 table */ - beq 3f + BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: mtcr r10 @@ -513,9 +526,9 @@ FixupDAR:/* Entry point for dcbx workaround. */ mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 - andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ + IS_KERNEL(r11, r10) mfspr r11, SPRN_M_TW /* Get level 1 table */ - beq 3f + BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha /* Insert level 1 index */ 3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 -- cgit v1.2.3 From 83b086c5697169f1e34d2430dad062cc714c5c57 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Wed, 22 Apr 2015 12:06:41 +0200 Subject: powerpc/8xx: mark _PAGE_SHARED all types of kernel pages All kernel pages have to be marked as shared in order to not perform CASID verification. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/pte-8xx.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index 97bae64afdaa..8ea537e76058 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -62,7 +62,12 @@ /* We need to add _PAGE_SHARED to kernel pages */ #define _PAGE_KERNEL_RO (_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO) -#define _PAGE_KERNEL_ROX (_PAGE_EXEC | _PAGE_RO | _PAGE_KNLRO) +#define _PAGE_KERNEL_ROX (_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO | \ + _PAGE_EXEC) +#define _PAGE_KERNEL_RW (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ + _PAGE_HWWRITE) +#define _PAGE_KERNEL_RWX (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ + _PAGE_HWWRITE | _PAGE_EXEC) #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_PTE_8xx_H */ -- cgit v1.2.3 From e0a8e0d90a9f0be66ba49f4f2380a63c22d4aaae Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Wed, 22 Apr 2015 12:06:43 +0200 Subject: powerpc/8xx: Handle PAGE_USER via APG bits Use of APG for handling PAGE_USER. All pages PP exec bits are set to either 000 or 011, which means respectively RW for Supervisor and no access for User, or RO for Supervisor and no access for user. Then we use the APG to say whether accesses are according to Page rules or "all Supervisor" rules (Access to all) Therefore, we define 2 APG groups corresponding to _PAGE_USER. Mx_AP are initialised as follows: GP0 => No user => 01 (all accesses performed according to page definition) GP1 => User => 00 (all accesses performed as supervisor according to page definition) This removes the special 8xx handling in pte_update() Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/pgtable-ppc32.h | 19 ------------------- arch/powerpc/include/asm/pte-8xx.h | 27 +++++++++------------------ arch/powerpc/kernel/head_8xx.S | 21 ++++++++++++--------- 3 files changed, 21 insertions(+), 46 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 64b52b1cf542..9c326565d498 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -170,24 +170,6 @@ static inline unsigned long pte_update(pte_t *p, #ifdef PTE_ATOMIC_UPDATES unsigned long old, tmp; -#ifdef CONFIG_PPC_8xx - unsigned long tmp2; - - __asm__ __volatile__("\ -1: lwarx %0,0,%4\n\ - andc %1,%0,%5\n\ - or %1,%1,%6\n\ - /* 0x200 == Extended encoding, bit 22 */ \ - /* Bit 22 has to be 1 when _PAGE_USER is unset and _PAGE_RO is set */ \ - rlwimi %1,%1,32-1,0x200\n /* get _PAGE_RO */ \ - rlwinm %3,%1,32-2,0x200\n /* get _PAGE_USER */ \ - andc %1,%1,%3\n\ - stwcx. %1,0,%4\n\ - bne- 1b" - : "=&r" (old), "=&r" (tmp), "=m" (*p), "=&r" (tmp2) - : "r" (p), "r" (clr), "r" (set), "m" (*p) - : "cc" ); -#else /* CONFIG_PPC_8xx */ __asm__ __volatile__("\ 1: lwarx %0,0,%3\n\ andc %1,%0,%4\n\ @@ -198,7 +180,6 @@ static inline unsigned long pte_update(pte_t *p, : "=&r" (old), "=&r" (tmp), "=m" (*p) : "r" (p), "r" (clr), "r" (set), "m" (*p) : "cc" ); -#endif /* CONFIG_PPC_8xx */ #else /* PTE_ATOMIC_UPDATES */ unsigned long old = pte_val(*p); *p = __pte((old & ~clr) | set); diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index 8ea537e76058..b82094e4c242 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -34,36 +34,27 @@ #define _PAGE_SPECIAL 0x0008 /* SW entry, forced to 0 by the TLB miss */ #define _PAGE_DIRTY 0x0100 /* C: page changed */ -/* These 4 software bits must be masked out when the entry is loaded - * into the TLB, 1 SW bit left(0x0080). +/* These 4 software bits must be masked out when the L2 entry is loaded + * into the TLB. */ -#define _PAGE_GUARDED 0x0010 /* software: guarded access */ -#define _PAGE_ACCESSED 0x0020 /* software: page referenced */ -#define _PAGE_WRITETHRU 0x0040 /* software: caching is write through */ +#define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */ +#define _PAGE_USER 0x0020 /* Copied to L1 APG lsb */ +#define _PAGE_ACCESSED 0x0040 /* software: page referenced */ +#define _PAGE_WRITETHRU 0x0080 /* software: caching is write through */ -/* Setting any bits in the nibble with the follow two controls will - * require a TLB exception handler change. It is assumed unused bits - * are always zero. - */ -#define _PAGE_RO 0x0400 /* lsb PP bits */ -#define _PAGE_USER 0x0800 /* msb PP bits */ -/* set when _PAGE_USER is unset and _PAGE_RO is set */ -#define _PAGE_KNLRO 0x0200 +#define _PAGE_RO 0x0600 /* Supervisor RO, User no access */ #define _PMD_PRESENT 0x0001 #define _PMD_BAD 0x0ff0 #define _PMD_PAGE_MASK 0x000c #define _PMD_PAGE_8M 0x000c -#define _PTE_NONE_MASK _PAGE_KNLRO - /* Until my rework is finished, 8xx still needs atomic PTE updates */ #define PTE_ATOMIC_UPDATES 1 /* We need to add _PAGE_SHARED to kernel pages */ -#define _PAGE_KERNEL_RO (_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO) -#define _PAGE_KERNEL_ROX (_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO | \ - _PAGE_EXEC) +#define _PAGE_KERNEL_RO (_PAGE_SHARED | _PAGE_RO) +#define _PAGE_KERNEL_ROX (_PAGE_SHARED | _PAGE_RO | _PAGE_EXEC) #define _PAGE_KERNEL_RW (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ _PAGE_HWWRITE) #define _PAGE_KERNEL_RWX (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index c640bbb042b5..c79184d86f58 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -351,12 +351,15 @@ InstructionTLBMiss: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ - /* Load the MI_TWC with the attributes for this "segment." */ - MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ - rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ /* Extract level 2 index */ rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 - lwzx r10, r10, r11 /* Get the pte */ + rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ + lwz r10, 0(r10) /* Get the pte */ + + /* Insert the APG into the TWC from the Linux PTE. */ + rlwimi r11, r10, 0, 26, 26 + /* Load the MI_TWC with the attributes for this "segment." */ + MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ #ifdef CONFIG_SWAP rlwinm r11, r10, 32-5, _PAGE_PRESENT @@ -365,12 +368,12 @@ InstructionTLBMiss: #endif li r11, RPN_PATTERN /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 21 and 28 must be clear. + * Software indicator bits 20-23 and 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior * of the MMU. */ - rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */ + rlwimi r10, r11, 0, 0x0ff8 /* Set 24-27, clear 20-23,28 */ MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ @@ -411,13 +414,13 @@ DataStoreTLBMiss: rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ - /* Insert the Guarded flag into the TWC from the Linux PTE. - * It is bit 27 of both the Linux PTE and the TWC (at least + /* Insert the Guarded flag and APG into the TWC from the Linux PTE. + * It is bit 26-27 of both the Linux PTE and the TWC (at least * I got that right :-). It will be better when we can put * this into the Linux pgd/pmd and load it in the operation * above. */ - rlwimi r11, r10, 0, 27, 27 + rlwimi r11, r10, 0, 26, 27 /* Insert the WriteThru flag into the TWC from the Linux PTE. * It is bit 25 in the Linux PTE and bit 30 in the TWC */ -- cgit v1.2.3 From 5b2753fc3e8a72253310d01a8b0a5bb05d917ef8 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Wed, 22 Apr 2015 12:06:45 +0200 Subject: powerpc/8xx: Implementation of PAGE_EXEC This patch implements PAGE_EXEC capability on the 8xx. All pages PP exec bits are set to 000, which means Execute for Supervisor and no Execute for User. Then we use the APG to say whether accesses are according to Page rules, "all Supervisor" rules (Exec for all) and "all User" rules (Exec for noone) Therefore, we define 4 APG groups. msb is _PAGE_EXEC, lsb is _PAGE_USER. MI_AP is initialised as follows: GP0 (00) => Not User, no exec => 11 (all accesses performed as user) GP1 (01) => User but no exec => 11 (all accesses performed as user) GP2 (10) => Not User, exec => 01 (rights according to page definition) GP3 (11) => User, exec => 00 (all accesses performed as supervisor) Signed-off-by: Christophe Leroy [scottwood: comments: s/exec/data/ on data side, and s/pages/pages'/] Signed-off-by: Scott Wood --- arch/powerpc/include/asm/cputable.h | 2 +- arch/powerpc/include/asm/mmu-8xx.h | 26 ++++++++++++++++++++++++++ arch/powerpc/include/asm/pte-8xx.h | 3 ++- arch/powerpc/kernel/head_8xx.S | 12 +++++++++--- 4 files changed, 38 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 6367b8347dad..ae1fa65bb26d 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -366,7 +366,7 @@ enum { CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE) #define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_USE_TB) -#define CPU_FTRS_8XX (CPU_FTR_USE_TB) +#define CPU_FTRS_8XX (CPU_FTR_USE_TB | CPU_FTR_NOEXECUTE) #define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) #define CPU_FTRS_44X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) #define CPU_FTRS_440x6 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \ diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index d41200c01d85..f05500a29a60 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -27,6 +27,19 @@ #define MI_Ks 0x80000000 /* Should not be set */ #define MI_Kp 0x40000000 /* Should always be set */ +/* + * All pages' PP exec bits are set to 000, which means Execute for Supervisor + * and no Execute for User. + * Then we use the APG to say whether accesses are according to Page rules, + * "all Supervisor" rules (Exec for all) and "all User" rules (Exec for noone) + * Therefore, we define 4 APG groups. msb is _PAGE_EXEC, lsb is _PAGE_USER + * 0 (00) => Not User, no exec => 11 (all accesses performed as user) + * 1 (01) => User but no exec => 11 (all accesses performed as user) + * 2 (10) => Not User, exec => 01 (rights according to page definition) + * 3 (11) => User, exec => 00 (all accesses performed as supervisor) + */ +#define MI_APG_INIT 0xf4ffffff + /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in * this register are used to create the TLB entry. @@ -87,6 +100,19 @@ #define MD_Ks 0x80000000 /* Should not be set */ #define MD_Kp 0x40000000 /* Should always be set */ +/* + * All pages' PP data bits are set to either 000 or 011, which means + * respectively RW for Supervisor and no access for User, or RO for + * Supervisor and no access for user. + * Then we use the APG to say whether accesses are according to Page rules or + * "all Supervisor" rules (Access to all) + * Therefore, we define 2 APG groups. lsb is _PAGE_USER + * 0 => No user => 01 (all accesses performed according to page definition) + * 1 => User => 00 (all accesses performed as supervisor + * according to page definition) + */ +#define MD_APG_INIT 0x4fffffff + /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in * this register are used to create the TLB entry. diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index b82094e4c242..a0e2ba960976 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -39,8 +39,9 @@ */ #define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */ #define _PAGE_USER 0x0020 /* Copied to L1 APG lsb */ -#define _PAGE_ACCESSED 0x0040 /* software: page referenced */ +#define _PAGE_EXEC 0x0040 /* Copied to L1 APG */ #define _PAGE_WRITETHRU 0x0080 /* software: caching is write through */ +#define _PAGE_ACCESSED 0x0800 /* software: page referenced */ #define _PAGE_RO 0x0600 /* Supervisor RO, User no access */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index c79184d86f58..78c1eba4c04a 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -357,7 +357,7 @@ InstructionTLBMiss: lwz r10, 0(r10) /* Get the pte */ /* Insert the APG into the TWC from the Linux PTE. */ - rlwimi r11, r10, 0, 26, 26 + rlwimi r11, r10, 0, 25, 26 /* Load the MI_TWC with the attributes for this "segment." */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ @@ -449,6 +449,7 @@ DataStoreTLBMiss: */ li r11, RPN_PATTERN rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ + rlwimi r10, r11, 0, 20, 20 /* clear 20 */ MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ @@ -769,15 +770,20 @@ initial_mmu: ori r8, r8, MI_EVALID /* Mark it valid */ mtspr SPRN_MI_EPN, r8 mtspr SPRN_MD_EPN, r8 - li r8, MI_PS8MEG /* Set 8M byte page */ + li r8, MI_PS8MEG | (2 << 5) /* Set 8M byte page, APG 2 */ ori r8, r8, MI_SVALID /* Make it valid */ mtspr SPRN_MI_TWC, r8 + li r8, MI_PS8MEG /* Set 8M byte page, APG 0 */ + ori r8, r8, MI_SVALID /* Make it valid */ mtspr SPRN_MD_TWC, r8 li r8, MI_BOOTINIT /* Create RPN for address 0 */ mtspr SPRN_MI_RPN, r8 /* Store TLB entry */ mtspr SPRN_MD_RPN, r8 - lis r8, MI_Kp@h /* Set the protection mode */ + lis r8, MI_APG_INIT@h /* Set protection modes */ + ori r8, r8, MI_APG_INIT@l mtspr SPRN_MI_AP, r8 + lis r8, MD_APG_INIT@h + ori r8, r8, MD_APG_INIT@l mtspr SPRN_MD_AP, r8 /* Map another 8 MByte at the IMMR to get the processor -- cgit v1.2.3 From 379caf606396a13c8da2742e9a6e0bcfaaffa726 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Sun, 17 May 2015 16:12:37 +0800 Subject: powerpc: mpc85xx: flush the l1 cache before cpu down in kexec We observe a "Zero PT_NOTE entries found" warning when vmcore_init() is running on the dump-capture kernel. Actually the PT_NOTE segments is not empty, but the entries generated by crash_save_cpu() are not flushed to the memory before we reset these cores. So we should flush the l1 cache as what we do in cpu hotplug. With this change, we can also kill the mpc85xx_smp_flush_dcache_kexec() since that becomes unnecessary. Please note: this only fix the issue on e500 core, we still need to implement the function to flush the l2 cache for the e500mc core. Fortunately we already had proposing patch for this support [1]. Hope we can fix this issue for e500mc after that merged. [1] https://lists.ozlabs.org/pipermail/linuxppc-dev/2014-March/115830.html Signed-off-by: Kevin Hao Signed-off-by: Scott Wood --- arch/powerpc/platforms/85xx/smp.c | 51 +-------------------------------------- 1 file changed, 1 insertion(+), 50 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 8631ac5f0e57..b8b821697910 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -345,6 +345,7 @@ void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) local_irq_disable(); if (secondary) { + __flush_disable_L1(); atomic_inc(&kexec_down_cpus); /* loop forever */ while (1); @@ -357,61 +358,11 @@ static void mpc85xx_smp_kexec_down(void *arg) ppc_md.kexec_cpu_down(0,1); } -static void map_and_flush(unsigned long paddr) -{ - struct page *page = pfn_to_page(paddr >> PAGE_SHIFT); - unsigned long kaddr = (unsigned long)kmap_atomic(page); - - flush_dcache_range(kaddr, kaddr + PAGE_SIZE); - kunmap_atomic((void *)kaddr); -} - -/** - * Before we reset the other cores, we need to flush relevant cache - * out to memory so we don't get anything corrupted, some of these flushes - * are performed out of an overabundance of caution as interrupts are not - * disabled yet and we can switch cores - */ -static void mpc85xx_smp_flush_dcache_kexec(struct kimage *image) -{ - kimage_entry_t *ptr, entry; - unsigned long paddr; - int i; - - if (image->type == KEXEC_TYPE_DEFAULT) { - /* normal kexec images are stored in temporary pages */ - for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); - ptr = (entry & IND_INDIRECTION) ? - phys_to_virt(entry & PAGE_MASK) : ptr + 1) { - if (!(entry & IND_DESTINATION)) { - map_and_flush(entry); - } - } - /* flush out last IND_DONE page */ - map_and_flush(entry); - } else { - /* crash type kexec images are copied to the crash region */ - for (i = 0; i < image->nr_segments; i++) { - struct kexec_segment *seg = &image->segment[i]; - for (paddr = seg->mem; paddr < seg->mem + seg->memsz; - paddr += PAGE_SIZE) { - map_and_flush(paddr); - } - } - } - - /* also flush the kimage struct to be passed in as well */ - flush_dcache_range((unsigned long)image, - (unsigned long)image + sizeof(*image)); -} - static void mpc85xx_smp_machine_kexec(struct kimage *image) { int timeout = INT_MAX; int i, num_cpus = num_present_cpus(); - mpc85xx_smp_flush_dcache_kexec(image); - if (image->type == KEXEC_TYPE_DEFAULT) smp_call_function(mpc85xx_smp_kexec_down, NULL, 0); -- cgit v1.2.3 From 31ea9d5dfd27b9aa6f92c207041eba8e81f4c497 Mon Sep 17 00:00:00 2001 From: Xie Xiaobo Date: Fri, 22 May 2015 13:07:19 +0800 Subject: powerpc/85xx: p1025twr: add module conditional to fix QE-uart issue A ioport setting was needed when used the QE uart function on TWR-P1025. Added a conditional definition to avoid missing this setting when the QE-uart driver was bulit to a module. Signed-off-by: Xie Xiaobo Signed-off-by: Li Pengbo Signed-off-by: Scott Wood --- arch/powerpc/platforms/85xx/twr_p102x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c index 1eadb6d0dc64..30e002f4648c 100644 --- a/arch/powerpc/platforms/85xx/twr_p102x.c +++ b/arch/powerpc/platforms/85xx/twr_p102x.c @@ -79,7 +79,7 @@ static void __init twr_p1025_setup_arch(void) mpc85xx_qe_init(); mpc85xx_qe_par_io_init(); -#if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE) +#if IS_ENABLED(CONFIG_UCC_GETH) || IS_ENABLED(CONFIG_SERIAL_QE) if (machine_is(twr_p1025)) { struct ccsr_guts __iomem *guts; @@ -101,7 +101,7 @@ static void __init twr_p1025_setup_arch(void) MPC85xx_PMUXCR_QE(12)); iounmap(guts); -#if defined(CONFIG_SERIAL_QE) +#if IS_ENABLED(CONFIG_SERIAL_QE) /* On P1025TWR board, the UCC7 acted as UART port. * However, The UCC7's CTS pin is low level in default, * it will impact the transmission in full duplex -- cgit v1.2.3 From 85a97da9584868b6cd0c54b93c5f65319f615b08 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 27 May 2015 16:06:55 +1000 Subject: powerpc/copro: Fix faulting kernel segments This fixes calculating the key bits (KP and KS) in the SLB VSID for kernel mappings. I'm not CCing this to stable as there are no uses of this currently. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/mm/copro_fault.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index f031a47d7701..7a71d7f81cf6 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(copro_handle_mm_fault); int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) { - u64 vsid; + u64 vsid, vsidkey; int psize, ssize; switch (REGION_ID(ea)) { @@ -109,6 +109,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) psize = get_slice_psize(mm, ea); ssize = user_segment_size(ea); vsid = get_vsid(mm->context.id, ea, ssize); + vsidkey = SLB_VSID_USER; break; case VMALLOC_REGION_ID: pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea); @@ -118,19 +119,21 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) psize = mmu_io_psize; ssize = mmu_kernel_ssize; vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + vsidkey = SLB_VSID_KERNEL; break; case KERNEL_REGION_ID: pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea); psize = mmu_linear_psize; ssize = mmu_kernel_ssize; vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + vsidkey = SLB_VSID_KERNEL; break; default: pr_debug("%s: invalid region access at %016llx\n", __func__, ea); return 1; } - vsid = (vsid << slb_vsid_shift(ssize)) | SLB_VSID_USER; + vsid = (vsid << slb_vsid_shift(ssize)) | vsidkey; vsid |= mmu_psize_defs[psize].sllp | ((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0); -- cgit v1.2.3 From 5b64d2cc41216bef3e19913d1a39c324c2587d11 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Wed, 27 May 2015 16:06:56 +1000 Subject: powerpc/pci: Export symbols for CXL Export pcibios_claim_one_bus, pcibios_scan_phb and pcibios_alloc_controller. These will be used by the CXL driver. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 0d054068a21d..2040cd2f7358 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -89,6 +89,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev) #endif return phb; } +EXPORT_SYMBOL_GPL(pcibios_alloc_controller); void pcibios_free_controller(struct pci_controller *phb) { @@ -1447,6 +1448,7 @@ void pcibios_claim_one_bus(struct pci_bus *bus) list_for_each_entry(child_bus, &bus->children, node) pcibios_claim_one_bus(child_bus); } +EXPORT_SYMBOL_GPL(pcibios_claim_one_bus); /* pcibios_finish_adding_to_bus @@ -1680,6 +1682,7 @@ void pcibios_scan_phb(struct pci_controller *hose) pcie_bus_configure_settings(child); } } +EXPORT_SYMBOL_GPL(pcibios_scan_phb); static void fixup_hide_host_resource_fsl(struct pci_dev *dev) { -- cgit v1.2.3 From 10e796309a670bb2c13eae5aa6f60e10d2d6a6e1 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 27 May 2015 16:06:57 +1000 Subject: powerpc/pci: Add release_device() hook to phb ops Add release_device() hook to phb ops so we can clean up for specific phbs. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 2 ++ arch/powerpc/kernel/pci-hotplug.c | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 6d17bb8498bf..4cf0caaa33c7 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -27,6 +27,8 @@ struct pci_controller_ops { * allow assignment/enabling of the device. */ bool (*enable_device_hook)(struct pci_dev *); + void (*release_device)(struct pci_dev *); + /* Called during PCI resource reassignment */ resource_size_t (*window_alignment)(struct pci_bus *, unsigned long type); void (*reset_secondary_bus)(struct pci_dev *dev); diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 7ed85a69a9c2..7f9ed0c1f6b9 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -29,7 +29,12 @@ */ void pcibios_release_device(struct pci_dev *dev) { + struct pci_controller *phb = pci_bus_to_host(dev->bus); + eeh_remove_device(dev); + + if (phb->controller_ops.release_device) + phb->controller_ops.release_device(dev); } /** -- cgit v1.2.3 From f46580a5cf07b3d35e357bb2401ae3c49cff7e65 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 27 May 2015 16:06:58 +1000 Subject: powerpc: Add cxl context to device archdata Add cxl context pointer to archdata. We'll want to create one of these for cxl PCI devices. Put them here until we can get a pci_dev specific private data. This location was suggested by benh. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 9f1371bab5fc..e9bdda88f1fb 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -46,6 +46,9 @@ struct dev_archdata { #ifdef CONFIG_FAIL_IOMMU int fail_iommu; #endif +#ifdef CONFIG_CXL_BASE + struct cxl_context *cxl_ctx; +#endif }; struct pdev_archdata { -- cgit v1.2.3 From 7a8e6bbf8593a9395dd6c61f7c5f421570600017 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 27 May 2015 16:06:59 +1000 Subject: powerpc/pci: Add shutdown hook to pci_controller_ops Currently pnv_pci_shutdown() calls the PHB shutdown code for all PHBs in the system. It dereferences the private_data assuming it's a powernv PHB, which won't be the case when we have different PHB in the systems (like when we add vPHBs for CXL). This moves the shutdown hook to the pci_controller_ops and fixes the call site to use that instead. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 2 ++ arch/powerpc/platforms/powernv/pci-ioda.c | 8 ++++---- arch/powerpc/platforms/powernv/pci.c | 9 +++------ arch/powerpc/platforms/powernv/pci.h | 1 - 4 files changed, 9 insertions(+), 11 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 4cf0caaa33c7..3947749de280 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -40,6 +40,8 @@ struct pci_controller_ops { #endif int (*dma_set_mask)(struct pci_dev *dev, u64 dma_mask); + + void (*shutdown)(struct pci_controller *); }; /* diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 508f530e367b..cec43c8dc44d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2643,8 +2643,10 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus, return phb->ioda.pe_rmap[(bus->number << 8) | devfn]; } -static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) +static void pnv_pci_ioda_shutdown(struct pci_controller *hose) { + struct pnv_phb *phb = hose->private_data; + opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); } @@ -2659,6 +2661,7 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .window_alignment = pnv_pci_window_alignment, .reset_secondary_bus = pnv_pci_reset_secondary_bus, .dma_set_mask = pnv_pci_ioda_dma_set_mask, + .shutdown = pnv_pci_ioda_shutdown, }; static void __init pnv_pci_init_ioda_phb(struct device_node *np, @@ -2806,9 +2809,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup; phb->dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask; - /* Setup shutdown function for kexec */ - phb->shutdown = pnv_pci_ioda_shutdown; - /* Setup MSI support */ pnv_pci_init_ioda_msis(phb); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 8a557b5aabf7..3c35487e9778 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -704,12 +704,9 @@ void pnv_pci_shutdown(void) { struct pci_controller *hose; - list_for_each_entry(hose, &hose_list, list_node) { - struct pnv_phb *phb = hose->private_data; - - if (phb && phb->shutdown) - phb->shutdown(phb); - } + list_for_each_entry(hose, &hose_list, list_node) + if (hose->controller_ops.shutdown) + hose->controller_ops.shutdown(hose); } /* Fixup wrong class code in p7ioc and p8 root complex */ diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index ac8686c853e6..714ee3c19854 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -110,7 +110,6 @@ struct pnv_phb { struct pci_dev *pdev); void (*fixup_phb)(struct pci_controller *hose); u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn); - void (*shutdown)(struct pnv_phb *phb); int (*init_m64)(struct pnv_phb *phb); void (*reserve_m64_pe)(struct pnv_phb *phb); int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all); -- cgit v1.2.3 From abeeed6d3d9948b3235a5bb77759d8c1f84de39d Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 27 May 2015 16:07:00 +1000 Subject: powerpc/pci: Add pcibios_disable_device() hook This adds a hook into the powerpc pci code for pci_disable_device() calls. The generic code already provides a weak pcibios_disable_device() symbol, so we just need to provide our own in powerpc and it'll get picked up. This is passed directly to the phb controller ops, provided one exists. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 2 ++ arch/powerpc/kernel/pci-common.c | 8 ++++++++ 2 files changed, 10 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 3947749de280..b76cd5682a8c 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -27,6 +27,8 @@ struct pci_controller_ops { * allow assignment/enabling of the device. */ bool (*enable_device_hook)(struct pci_dev *); + void (*disable_device)(struct pci_dev *); + void (*release_device)(struct pci_dev *); /* Called during PCI resource reassignment */ diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 2040cd2f7358..b9de34d44fcb 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1490,6 +1490,14 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return pci_enable_resources(dev, mask); } +void pcibios_disable_device(struct pci_dev *dev) +{ + struct pci_controller *phb = pci_bus_to_host(dev->bus); + + if (phb->controller_ops.disable_device) + phb->controller_ops.disable_device(dev); +} + resource_size_t pcibios_io_space_offset(struct pci_controller *hose) { return (unsigned long) hose->io_base_virt - _IO_BASE; -- cgit v1.2.3 From ec249dd860ed88e15b3e2bd363cbfc76ba8c1884 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 27 May 2015 16:07:16 +1000 Subject: cxl: Move include file cxl.h -> cxl-base.h This moves the current include file from cxl.h -> cxl-base.h. This current include file is used only to pass information between the base driver that needs to be built into the kernel and the cxl module. This is to make way for a new include/misc/cxl.h which will contain just the kernel API for other driver to use Signed-off-by: Michael Neuling Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- MAINTAINERS | 2 +- arch/powerpc/include/asm/pnv-pci.h | 2 +- arch/powerpc/mm/copro_fault.c | 2 +- arch/powerpc/mm/hash_native_64.c | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- drivers/misc/cxl/base.c | 2 +- drivers/misc/cxl/cxl.h | 2 +- drivers/misc/cxl/irq.c | 2 +- drivers/misc/cxl/main.c | 2 +- drivers/misc/cxl/native.c | 2 +- include/misc/cxl-base.h | 48 +++++++++++++++++++++++++++++++ include/misc/cxl.h | 48 ------------------------------- 12 files changed, 58 insertions(+), 58 deletions(-) create mode 100644 include/misc/cxl-base.h delete mode 100644 include/misc/cxl.h (limited to 'arch/powerpc') diff --git a/MAINTAINERS b/MAINTAINERS index 562ae4ef85c8..80627ddba3e0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2946,7 +2946,7 @@ M: Michael Neuling L: linuxppc-dev@lists.ozlabs.org S: Supported F: drivers/misc/cxl/ -F: include/misc/cxl.h +F: include/misc/cxl* F: include/uapi/misc/cxl.h F: Documentation/powerpc/cxl.txt F: Documentation/powerpc/cxl.txt diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index f9b498292a5c..6f77f71ee964 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -11,7 +11,7 @@ #define _ASM_PNV_PCI_H #include -#include +#include int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode); int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 7a71d7f81cf6..6527882ce05e 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include /* * This ought to be kept in sync with the powerpc specific do_page_fault diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 9c4880ddecd6..13befa35d8a8 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -29,7 +29,7 @@ #include #include -#include +#include #ifdef DEBUG_LOW #define DBG_LOW(fmt...) udbg_printf(fmt) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index cec43c8dc44d..b2841853550a 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -39,7 +39,7 @@ #include #include -#include +#include #include "powernv.h" #include "pci.h" diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index 0654ad83675e..a9f0dd3255a2 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include "cxl.h" /* protected by rcu */ diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 778161afd0a3..46d806e3b943 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 212790b4ee57..680cd263436d 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "cxl.h" #include "trace.h" diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 8ccddceead66..833348e2c9cb 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include "cxl.h" #include "trace.h" diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index a4b40d72466a..10567f245818 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "cxl.h" #include "trace.h" diff --git a/include/misc/cxl-base.h b/include/misc/cxl-base.h new file mode 100644 index 000000000000..5ae962512fb8 --- /dev/null +++ b/include/misc/cxl-base.h @@ -0,0 +1,48 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _MISC_CXL_BASE_H +#define _MISC_CXL_BASE_H + +#ifdef CONFIG_CXL_BASE + +#define CXL_IRQ_RANGES 4 + +struct cxl_irq_ranges { + irq_hw_number_t offset[CXL_IRQ_RANGES]; + irq_hw_number_t range[CXL_IRQ_RANGES]; +}; + +extern atomic_t cxl_use_count; + +static inline bool cxl_ctx_in_use(void) +{ + return (atomic_read(&cxl_use_count) != 0); +} + +static inline void cxl_ctx_get(void) +{ + atomic_inc(&cxl_use_count); +} + +static inline void cxl_ctx_put(void) +{ + atomic_dec(&cxl_use_count); +} + +void cxl_slbia(struct mm_struct *mm); + +#else /* CONFIG_CXL_BASE */ + +static inline bool cxl_ctx_in_use(void) { return false; } +static inline void cxl_slbia(struct mm_struct *mm) {} + +#endif /* CONFIG_CXL_BASE */ + +#endif diff --git a/include/misc/cxl.h b/include/misc/cxl.h deleted file mode 100644 index 975cc7861f18..000000000000 --- a/include/misc/cxl.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright 2014 IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _MISC_CXL_H -#define _MISC_CXL_H - -#ifdef CONFIG_CXL_BASE - -#define CXL_IRQ_RANGES 4 - -struct cxl_irq_ranges { - irq_hw_number_t offset[CXL_IRQ_RANGES]; - irq_hw_number_t range[CXL_IRQ_RANGES]; -}; - -extern atomic_t cxl_use_count; - -static inline bool cxl_ctx_in_use(void) -{ - return (atomic_read(&cxl_use_count) != 0); -} - -static inline void cxl_ctx_get(void) -{ - atomic_inc(&cxl_use_count); -} - -static inline void cxl_ctx_put(void) -{ - atomic_dec(&cxl_use_count); -} - -void cxl_slbia(struct mm_struct *mm); - -#else /* CONFIG_CXL_BASE */ - -static inline bool cxl_ctx_in_use(void) { return false; } -static inline void cxl_slbia(struct mm_struct *mm) {} - -#endif /* CONFIG_CXL_BASE */ - -#endif -- cgit v1.2.3 From 14aae78f084af5241476ede8e28b377fc751b191 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 8 Apr 2015 09:31:10 +0200 Subject: powerpc/powernv: convert OPAL codes returned by sysparam calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The opal_{get,set}_param calls return internal error codes which need to be translated in errnos in Linux. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-sysparam.c | 12 ++++++++---- arch/powerpc/platforms/powernv/opal.c | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c index 2e52b47393e7..afe66c576a38 100644 --- a/arch/powerpc/platforms/powernv/opal-sysparam.c +++ b/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -55,8 +55,10 @@ static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer) } ret = opal_get_param(token, param_id, (u64)buffer, length); - if (ret != OPAL_ASYNC_COMPLETION) + if (ret != OPAL_ASYNC_COMPLETION) { + ret = opal_error_code(ret); goto out_token; + } ret = opal_async_wait_response(token, &msg); if (ret) { @@ -65,7 +67,7 @@ static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer) goto out_token; } - ret = be64_to_cpu(msg.params[1]); + ret = opal_error_code(be64_to_cpu(msg.params[1])); out_token: opal_async_release_token(token); @@ -89,8 +91,10 @@ static int opal_set_sys_param(u32 param_id, u32 length, void *buffer) ret = opal_set_param(token, param_id, (u64)buffer, length); - if (ret != OPAL_ASYNC_COMPLETION) + if (ret != OPAL_ASYNC_COMPLETION) { + ret = opal_error_code(ret); goto out_token; + } ret = opal_async_wait_response(token, &msg); if (ret) { @@ -99,7 +103,7 @@ static int opal_set_sys_param(u32 param_id, u32 length, void *buffer) goto out_token; } - ret = be64_to_cpu(msg.params[1]); + ret = opal_error_code(be64_to_cpu(msg.params[1])); out_token: opal_async_release_token(token); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 8403307c5362..fd694bd51b80 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -830,6 +830,7 @@ int opal_error_code(int rc) case OPAL_ASYNC_COMPLETION: return -EINPROGRESS; case OPAL_BUSY_EVENT: return -EBUSY; case OPAL_NO_MEM: return -ENOMEM; + case OPAL_PERMISSION: return -EPERM; case OPAL_UNSUPPORTED: return -EIO; case OPAL_HARDWARE: return -EIO; -- cgit v1.2.3 From 18725226af7a6a0eac8a5bc42a6c9c90af1fb0e7 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 13 May 2015 15:13:18 +1000 Subject: powerpc/config: Enable bnx2x on ppc64 and pseries defconfigs Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/configs/ppc64_defconfig | 1 + arch/powerpc/configs/pseries_defconfig | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index aad501ae3834..a97efc2146fd 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -155,6 +155,7 @@ CONFIG_ACENIC=m CONFIG_ACENIC_OMIT_TIGON_I=y CONFIG_PCNET32=y CONFIG_TIGON3=y +CONFIG_BNX2X=m CONFIG_CHELSIO_T1=m CONFIG_BE2NET=m CONFIG_S2IO=m diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 4da826004ef8..0d9efcedaf34 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -154,6 +154,7 @@ CONFIG_ACENIC=m CONFIG_ACENIC_OMIT_TIGON_I=y CONFIG_PCNET32=y CONFIG_TIGON3=y +CONFIG_BNX2X=m CONFIG_CHELSIO_T1=m CONFIG_BE2NET=m CONFIG_S2IO=m -- cgit v1.2.3 From 48c06154952a18c5cd4d074ec3de627430cb6d23 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 20 May 2015 11:23:33 +0800 Subject: powerpc/powernv: Merge common platform device initialisation opal_ipmi_init and opal_flash_init are equivalent, except for the compatbile string. Merge these two into a common opal_pdev_init function. Signed-off-by: Jeremy Kerr Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index fd694bd51b80..fdce840f0006 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -600,21 +600,13 @@ static void __init opal_dump_region_init(void) "rc = %d\n", rc); } -static void opal_flash_init(struct device_node *opal_node) +static void opal_pdev_init(struct device_node *opal_node, + const char *compatible) { struct device_node *np; for_each_child_of_node(opal_node, np) - if (of_device_is_compatible(np, "ibm,opal-flash")) - of_platform_device_create(np, NULL, NULL); -} - -static void opal_ipmi_init(struct device_node *opal_node) -{ - struct device_node *np; - - for_each_child_of_node(opal_node, np) - if (of_device_is_compatible(np, "ibm,opal-ipmi")) + if (of_device_is_compatible(np, compatible)) of_platform_device_create(np, NULL, NULL); } @@ -717,10 +709,9 @@ static int __init opal_init(void) opal_msglog_init(); } - /* Initialize OPAL IPMI backend */ - opal_ipmi_init(opal_node); - - opal_flash_init(opal_node); + /* Initialize platform devices: IPMI backend & flash interface */ + opal_pdev_init(opal_node, "ibm,opal-ipmi"); + opal_pdev_init(opal_node, "ibm,opal-flash"); return 0; } -- cgit v1.2.3 From 594fcb9ec9e17215b4f98b5d40ba5e3af618ba82 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 20 May 2015 11:23:33 +0800 Subject: powerpc/powernv: Expose OPAL APIs required by PRD interface The (upcoming) opal-prd driver needs to access the message notifier and xscom code, so add EXPORT_SYMBOL_GPL macros for these. Signed-off-by: Jeremy Kerr Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index fdce840f0006..0379068e1c10 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -235,6 +235,7 @@ int opal_message_notifier_register(enum opal_msg_type msg_type, return atomic_notifier_chain_register( &opal_msg_notifier_head[msg_type], nb); } +EXPORT_SYMBOL_GPL(opal_message_notifier_register); int opal_message_notifier_unregister(enum opal_msg_type msg_type, struct notifier_block *nb) @@ -242,6 +243,7 @@ int opal_message_notifier_unregister(enum opal_msg_type msg_type, return atomic_notifier_chain_unregister( &opal_msg_notifier_head[msg_type], nb); } +EXPORT_SYMBOL_GPL(opal_message_notifier_unregister); static void opal_message_do_notify(uint32_t msg_type, void *msg) { @@ -743,6 +745,8 @@ void opal_shutdown(void) /* Export this so that test modules can use it */ EXPORT_SYMBOL_GPL(opal_invalid_call); +EXPORT_SYMBOL_GPL(opal_xscom_read); +EXPORT_SYMBOL_GPL(opal_xscom_write); EXPORT_SYMBOL_GPL(opal_ipmi_send); EXPORT_SYMBOL_GPL(opal_ipmi_recv); EXPORT_SYMBOL_GPL(opal_flash_read); -- cgit v1.2.3 From 0d7cd8550d30906c7461ced654306da30f1590e2 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 4 Jun 2015 21:51:47 +0800 Subject: powerpc/powernv: Add opal-prd channel This change adds a char device to access the "PRD" (processor runtime diagnostics) channel to OPAL firmware. Includes contributions from Vaidyanathan Srinivasan, Neelesh Gupta & Vishal Kulkarni. Signed-off-by: Neelesh Gupta Signed-off-by: Jeremy Kerr Acked-by: Stewart Smith Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 21 +- arch/powerpc/include/asm/opal.h | 1 + arch/powerpc/include/uapi/asm/opal-prd.h | 58 ++++ arch/powerpc/platforms/powernv/Kconfig | 7 + arch/powerpc/platforms/powernv/Makefile | 1 + arch/powerpc/platforms/powernv/opal-prd.c | 444 +++++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + arch/powerpc/platforms/powernv/opal.c | 4 +- 8 files changed, 535 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/include/uapi/asm/opal-prd.h create mode 100644 arch/powerpc/platforms/powernv/opal-prd.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index a49e5fa6f939..e9e4c52f3685 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -153,7 +153,8 @@ #define OPAL_FLASH_READ 110 #define OPAL_FLASH_WRITE 111 #define OPAL_FLASH_ERASE 112 -#define OPAL_LAST 112 +#define OPAL_PRD_MSG 113 +#define OPAL_LAST 113 /* Device tree flags */ @@ -359,6 +360,7 @@ enum opal_msg_type { OPAL_MSG_SHUTDOWN, /* params[0] = 1 reboot, 0 shutdown */ OPAL_MSG_HMI_EVT, OPAL_MSG_DPO, + OPAL_MSG_PRD, OPAL_MSG_TYPE_MAX, }; @@ -681,6 +683,23 @@ typedef struct oppanel_line { __be64 line_len; } oppanel_line_t; +enum opal_prd_msg_type { + OPAL_PRD_MSG_TYPE_INIT = 0, /* HBRT --> OPAL */ + OPAL_PRD_MSG_TYPE_FINI, /* HBRT/kernel --> OPAL */ + OPAL_PRD_MSG_TYPE_ATTN, /* HBRT <-- OPAL */ + OPAL_PRD_MSG_TYPE_ATTN_ACK, /* HBRT --> OPAL */ + OPAL_PRD_MSG_TYPE_OCC_ERROR, /* HBRT <-- OPAL */ + OPAL_PRD_MSG_TYPE_OCC_RESET, /* HBRT <-- OPAL */ +}; + +struct opal_prd_msg_header { + uint8_t type; + uint8_t pad[1]; + __be16 size; +}; + +struct opal_prd_msg; + /* * SG entries * diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 1412814347ba..958e941c0cda 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -194,6 +194,7 @@ int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg, uint64_t *msg_len); int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id, struct opal_i2c_request *oreq); +int64_t opal_prd_msg(struct opal_prd_msg *msg); int64_t opal_flash_read(uint64_t id, uint64_t offset, uint64_t buf, uint64_t size, uint64_t token); diff --git a/arch/powerpc/include/uapi/asm/opal-prd.h b/arch/powerpc/include/uapi/asm/opal-prd.h new file mode 100644 index 000000000000..319ff4a26158 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/opal-prd.h @@ -0,0 +1,58 @@ +/* + * OPAL Runtime Diagnostics interface driver + * Supported on POWERNV platform + * + * (C) Copyright IBM 2015 + * + * Author: Vaidyanathan Srinivasan + * Author: Jeremy Kerr + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _UAPI_ASM_POWERPC_OPAL_PRD_H_ +#define _UAPI_ASM_POWERPC_OPAL_PRD_H_ + +#include + +/** + * The version of the kernel interface of the PRD system. This describes the + * interface available for the /dev/opal-prd device. The actual PRD message + * layout and content is private to the firmware <--> userspace interface, so + * is not covered by this versioning. + * + * Future interface versions are backwards-compatible; if a later kernel + * version is encountered, functionality provided in earlier versions + * will work. + */ +#define OPAL_PRD_KERNEL_VERSION 1 + +#define OPAL_PRD_GET_INFO _IOR('o', 0x01, struct opal_prd_info) +#define OPAL_PRD_SCOM_READ _IOR('o', 0x02, struct opal_prd_scom) +#define OPAL_PRD_SCOM_WRITE _IOW('o', 0x03, struct opal_prd_scom) + +#ifndef __ASSEMBLY__ + +struct opal_prd_info { + __u64 version; + __u64 reserved[3]; +}; + +struct opal_prd_scom { + __u64 chip; + __u64 addr; + __u64 data; + __s64 rc; +}; + +#endif /* __ASSEMBLY__ */ + +#endif /* _UAPI_ASM_POWERPC_OPAL_PRD_H */ diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 4b044d8cb49a..604190cab522 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -19,3 +19,10 @@ config PPC_POWERNV select CPU_FREQ_GOV_CONSERVATIVE select PPC_DOORBELL default y + +config OPAL_PRD + tristate 'OPAL PRD driver' + depends on PPC_POWERNV + help + This enables the opal-prd driver, a facility to run processor + recovery diagnostics on OpenPower machines diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 26bd7e417b7c..1c8cdb6250e7 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o +obj-$(CONFIG_OPAL_PRD) += opal-prd.o diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c new file mode 100644 index 000000000000..d9e72bde79f5 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -0,0 +1,444 @@ +/* + * OPAL Runtime Diagnostics interface driver + * Supported on POWERNV platform + * + * Copyright IBM Corporation 2015 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) "opal-prd: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +struct opal_prd_msg_queue_item { + struct opal_prd_msg_header msg; + struct list_head list; +}; + +static struct device_node *prd_node; +static LIST_HEAD(opal_prd_msg_queue); +static DEFINE_SPINLOCK(opal_prd_msg_queue_lock); +static DECLARE_WAIT_QUEUE_HEAD(opal_prd_msg_wait); +static atomic_t prd_usage; + +static bool opal_prd_range_is_valid(uint64_t addr, uint64_t size) +{ + struct device_node *parent, *node; + bool found; + + if (addr + size < addr) + return false; + + parent = of_find_node_by_path("/reserved-memory"); + if (!parent) + return false; + + found = false; + + for_each_child_of_node(parent, node) { + uint64_t range_addr, range_size, range_end; + const __be32 *addrp; + const char *label; + + addrp = of_get_address(node, 0, &range_size, NULL); + + range_addr = of_read_number(addrp, 2); + range_end = range_addr + range_size; + + label = of_get_property(node, "ibm,prd-label", NULL); + + /* PRD ranges need a label */ + if (!label) + continue; + + if (range_end <= range_addr) + continue; + + if (addr >= range_addr && addr + size <= range_end) { + found = true; + of_node_put(node); + break; + } + } + + of_node_put(parent); + return found; +} + +static int opal_prd_open(struct inode *inode, struct file *file) +{ + /* + * Prevent multiple (separate) processes from concurrent interactions + * with the FW PRD channel + */ + if (atomic_xchg(&prd_usage, 1) == 1) + return -EBUSY; + + return 0; +} + +/* + * opal_prd_mmap - maps firmware-provided ranges into userspace + * @file: file structure for the device + * @vma: VMA to map the registers into + */ + +static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) +{ + size_t addr, size; + int rc; + + pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n", + vma->vm_start, vma->vm_end, vma->vm_pgoff, + vma->vm_flags); + + addr = vma->vm_pgoff << PAGE_SHIFT; + size = vma->vm_end - vma->vm_start; + + /* ensure we're mapping within one of the allowable ranges */ + if (!opal_prd_range_is_valid(addr, size)) + return -EINVAL; + + vma->vm_page_prot = __pgprot(pgprot_val(phys_mem_access_prot(file, + vma->vm_pgoff, + size, vma->vm_page_prot)) + | _PAGE_SPECIAL); + + rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, + vma->vm_page_prot); + + return rc; +} + +static bool opal_msg_queue_empty(void) +{ + unsigned long flags; + bool ret; + + spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); + ret = list_empty(&opal_prd_msg_queue); + spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags); + + return ret; +} + +static unsigned int opal_prd_poll(struct file *file, + struct poll_table_struct *wait) +{ + poll_wait(file, &opal_prd_msg_wait, wait); + + if (!opal_msg_queue_empty()) + return POLLIN | POLLRDNORM; + + return 0; +} + +static ssize_t opal_prd_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct opal_prd_msg_queue_item *item; + unsigned long flags; + ssize_t size, err; + int rc; + + /* we need at least a header's worth of data */ + if (count < sizeof(item->msg)) + return -EINVAL; + + if (*ppos) + return -ESPIPE; + + item = NULL; + + for (;;) { + + spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); + if (!list_empty(&opal_prd_msg_queue)) { + item = list_first_entry(&opal_prd_msg_queue, + struct opal_prd_msg_queue_item, list); + list_del(&item->list); + } + spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags); + + if (item) + break; + + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + + rc = wait_event_interruptible(opal_prd_msg_wait, + !opal_msg_queue_empty()); + if (rc) + return -EINTR; + } + + size = be16_to_cpu(item->msg.size); + if (size > count) { + err = -EINVAL; + goto err_requeue; + } + + rc = copy_to_user(buf, &item->msg, size); + if (rc) { + err = -EFAULT; + goto err_requeue; + } + + kfree(item); + + return size; + +err_requeue: + /* eep! re-queue at the head of the list */ + spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); + list_add(&item->list, &opal_prd_msg_queue); + spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags); + return err; +} + +static ssize_t opal_prd_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct opal_prd_msg_header hdr; + ssize_t size; + void *msg; + int rc; + + size = sizeof(hdr); + + if (count < size) + return -EINVAL; + + /* grab the header */ + rc = copy_from_user(&hdr, buf, sizeof(hdr)); + if (rc) + return -EFAULT; + + size = be16_to_cpu(hdr.size); + + msg = kmalloc(size, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + rc = copy_from_user(msg, buf, size); + if (rc) { + size = -EFAULT; + goto out_free; + } + + rc = opal_prd_msg(msg); + if (rc) { + pr_warn("write: opal_prd_msg returned %d\n", rc); + size = -EIO; + } + +out_free: + kfree(msg); + + return size; +} + +static int opal_prd_release(struct inode *inode, struct file *file) +{ + struct opal_prd_msg_header msg; + + msg.size = cpu_to_be16(sizeof(msg)); + msg.type = OPAL_PRD_MSG_TYPE_FINI; + + opal_prd_msg((struct opal_prd_msg *)&msg); + + atomic_xchg(&prd_usage, 0); + + return 0; +} + +static long opal_prd_ioctl(struct file *file, unsigned int cmd, + unsigned long param) +{ + struct opal_prd_info info; + struct opal_prd_scom scom; + int rc = 0; + + switch (cmd) { + case OPAL_PRD_GET_INFO: + memset(&info, 0, sizeof(info)); + info.version = OPAL_PRD_KERNEL_VERSION; + rc = copy_to_user((void __user *)param, &info, sizeof(info)); + if (rc) + return -EFAULT; + break; + + case OPAL_PRD_SCOM_READ: + rc = copy_from_user(&scom, (void __user *)param, sizeof(scom)); + if (rc) + return -EFAULT; + + scom.rc = opal_xscom_read(scom.chip, scom.addr, + (__be64 *)&scom.data); + scom.data = be64_to_cpu(scom.data); + pr_devel("ioctl SCOM_READ: chip %llx addr %016llx data %016llx rc %lld\n", + scom.chip, scom.addr, scom.data, scom.rc); + + rc = copy_to_user((void __user *)param, &scom, sizeof(scom)); + if (rc) + return -EFAULT; + break; + + case OPAL_PRD_SCOM_WRITE: + rc = copy_from_user(&scom, (void __user *)param, sizeof(scom)); + if (rc) + return -EFAULT; + + scom.rc = opal_xscom_write(scom.chip, scom.addr, scom.data); + pr_devel("ioctl SCOM_WRITE: chip %llx addr %016llx data %016llx rc %lld\n", + scom.chip, scom.addr, scom.data, scom.rc); + + rc = copy_to_user((void __user *)param, &scom, sizeof(scom)); + if (rc) + return -EFAULT; + break; + + default: + rc = -EINVAL; + } + + return rc; +} + +static const struct file_operations opal_prd_fops = { + .open = opal_prd_open, + .mmap = opal_prd_mmap, + .poll = opal_prd_poll, + .read = opal_prd_read, + .write = opal_prd_write, + .unlocked_ioctl = opal_prd_ioctl, + .release = opal_prd_release, + .owner = THIS_MODULE, +}; + +static struct miscdevice opal_prd_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "opal-prd", + .fops = &opal_prd_fops, +}; + +/* opal interface */ +static int opal_prd_msg_notifier(struct notifier_block *nb, + unsigned long msg_type, void *_msg) +{ + struct opal_prd_msg_queue_item *item; + struct opal_prd_msg_header *hdr; + struct opal_msg *msg = _msg; + unsigned long flags; + int size; + + if (msg_type != OPAL_MSG_PRD) + return 0; + + /* Calculate total size of the item we need to store. The 'size' field + * in the header includes the header itself. */ + hdr = (void *)msg->params; + size = (sizeof(*item) - sizeof(item->msg)) + be16_to_cpu(hdr->size); + + item = kzalloc(size, GFP_ATOMIC); + if (!item) + return -ENOMEM; + + memcpy(&item->msg, msg->params, size); + + spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); + list_add_tail(&item->list, &opal_prd_msg_queue); + spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags); + + wake_up_interruptible(&opal_prd_msg_wait); + + return 0; +} + +static struct notifier_block opal_prd_event_nb = { + .notifier_call = opal_prd_msg_notifier, + .next = NULL, + .priority = 0, +}; + +static int opal_prd_probe(struct platform_device *pdev) +{ + int rc; + + if (!pdev || !pdev->dev.of_node) + return -ENODEV; + + /* We should only have one prd driver instance per machine; ensure + * that we only get a valid probe on a single OF node. + */ + if (prd_node) + return -EBUSY; + + prd_node = pdev->dev.of_node; + + rc = opal_message_notifier_register(OPAL_MSG_PRD, &opal_prd_event_nb); + if (rc) { + pr_err("Couldn't register event notifier\n"); + return rc; + } + + rc = misc_register(&opal_prd_dev); + if (rc) { + pr_err("failed to register miscdev\n"); + opal_message_notifier_unregister(OPAL_MSG_PRD, + &opal_prd_event_nb); + return rc; + } + + return 0; +} + +static int opal_prd_remove(struct platform_device *pdev) +{ + misc_deregister(&opal_prd_dev); + opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); + return 0; +} + +static const struct of_device_id opal_prd_match[] = { + { .compatible = "ibm,opal-prd" }, + { }, +}; + +static struct platform_driver opal_prd_driver = { + .driver = { + .name = "opal-prd", + .owner = THIS_MODULE, + .of_match_table = opal_prd_match, + }, + .probe = opal_prd_probe, + .remove = opal_prd_remove, +}; + +module_platform_driver(opal_prd_driver); + +MODULE_DEVICE_TABLE(of, opal_prd_match); +MODULE_DESCRIPTION("PowerNV OPAL runtime diagnostic driver"); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index bf15ead00e12..d6a7b8252e4d 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -296,3 +296,4 @@ OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST); OPAL_CALL(opal_flash_read, OPAL_FLASH_READ); OPAL_CALL(opal_flash_write, OPAL_FLASH_WRITE); OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE); +OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 0379068e1c10..9e9c483eee4d 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -711,9 +711,10 @@ static int __init opal_init(void) opal_msglog_init(); } - /* Initialize platform devices: IPMI backend & flash interface */ + /* Initialize platform devices: IPMI backend, PRD & flash interface */ opal_pdev_init(opal_node, "ibm,opal-ipmi"); opal_pdev_init(opal_node, "ibm,opal-flash"); + opal_pdev_init(opal_node, "ibm,opal-prd"); return 0; } @@ -752,6 +753,7 @@ EXPORT_SYMBOL_GPL(opal_ipmi_recv); EXPORT_SYMBOL_GPL(opal_flash_read); EXPORT_SYMBOL_GPL(opal_flash_write); EXPORT_SYMBOL_GPL(opal_flash_erase); +EXPORT_SYMBOL_GPL(opal_prd_msg); /* Convert a region of vmalloc memory to an opal sg list */ struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, -- cgit v1.2.3 From 63a4aea556704acba1529df8f896ed65b93e66c1 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 1 Jun 2015 08:42:48 -0500 Subject: of: clean-up unnecessary libfdt include paths With the libfdt include fixups to use "" instead of <> in the latest dtc import in commit 4760597 (scripts/dtc: Update to upstream version 9d3649bd3be245c9), it is no longer necessary to add explicit include paths to use libfdt. Remove these across the kernel. Signed-off-by: Rob Herring Acked-by: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Acked-by: Michael Ellerman Acked-by: Grant Likely Cc: linux-mips@linux-mips.org Cc: linuxppc-dev@lists.ozlabs.org --- arch/mips/cavium-octeon/Makefile | 3 --- arch/mips/mti-sead3/Makefile | 2 -- arch/powerpc/kernel/Makefile | 1 - drivers/firmware/efi/libstub/Makefile | 2 -- drivers/of/Makefile | 3 --- 5 files changed, 11 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/mips/cavium-octeon/Makefile b/arch/mips/cavium-octeon/Makefile index 69a8a8dabc2b..2a5926578841 100644 --- a/arch/mips/cavium-octeon/Makefile +++ b/arch/mips/cavium-octeon/Makefile @@ -9,9 +9,6 @@ # Copyright (C) 2005-2009 Cavium Networks # -CFLAGS_octeon-platform.o = -I$(src)/../../../scripts/dtc/libfdt -CFLAGS_setup.o = -I$(src)/../../../scripts/dtc/libfdt - obj-y := cpu.o setup.o octeon-platform.o octeon-irq.o csrc-octeon.o obj-y += dma-octeon.o obj-y += octeon-memcpy.o diff --git a/arch/mips/mti-sead3/Makefile b/arch/mips/mti-sead3/Makefile index ecd71db6258b..2e52cbd20ceb 100644 --- a/arch/mips/mti-sead3/Makefile +++ b/arch/mips/mti-sead3/Makefile @@ -15,5 +15,3 @@ obj-y := sead3-lcd.o sead3-display.o sead3-init.o \ obj-y += leds-sead3.o obj-$(CONFIG_EARLY_PRINTK) += sead3-console.o - -CFLAGS_sead3-setup.o = -I$(src)/../../../scripts/dtc/libfdt diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index c1ebbdaac28f..c16e8362432b 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -2,7 +2,6 @@ # Makefile for the linux kernel. # -CFLAGS_prom.o = -I$(src)/../../../scripts/dtc/libfdt CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 280bc0a63365..816dbe9f4b82 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -24,8 +24,6 @@ KASAN_SANITIZE := n lib-y := efi-stub-helper.o lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o -CFLAGS_fdt.o += -I$(srctree)/scripts/dtc/libfdt/ - # # arm64 puts the stub in the kernel proper, which will unnecessarily retain all # code indefinitely unless it is annotated as __init/__initdata/__initconst etc. diff --git a/drivers/of/Makefile b/drivers/of/Makefile index fcacb186a67b..156c072b3117 100644 --- a/drivers/of/Makefile +++ b/drivers/of/Makefile @@ -16,6 +16,3 @@ obj-$(CONFIG_OF_RESOLVE) += resolver.o obj-$(CONFIG_OF_OVERLAY) += overlay.o obj-$(CONFIG_OF_UNITTEST) += unittest-data/ - -CFLAGS_fdt.o = -I$(src)/../../scripts/dtc/libfdt -CFLAGS_fdt_address.o = -I$(src)/../../scripts/dtc/libfdt -- cgit v1.2.3 From f1b3b4450dcbfd0098b6fc3b00f4880701550c83 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 26 May 2015 12:55:51 +0200 Subject: powerpc/85xx: Replace CONFIG_USB_ISP1760_HCD by CONFIG_USB_ISP1760 Since commit 100832abf065bc18 ("usb: isp1760: Make HCD support optional"), CONFIG_USB_ISP1760_HCD is automatically selected when needed. Enabling that option in the defconfig is now a no-op, and no longer enables ISP1760 HCD support. Re-enable the ISP1760 driver in the defconfig by enabling USB_ISP1760_HOST_ROLE instead. Signed-off-by: Geert Uytterhoeven Signed-off-by: Scott Wood --- arch/powerpc/configs/85xx/xes_mpc85xx_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig index 34f3ea1729e0..858b539d004b 100644 --- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig +++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig @@ -108,7 +108,7 @@ CONFIG_SENSORS_LM90=y CONFIG_WATCHDOG=y CONFIG_USB=y CONFIG_USB_MON=y -CONFIG_USB_ISP1760_HCD=y +CONFIG_USB_ISP1760=y CONFIG_USB_STORAGE=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y -- cgit v1.2.3 From f481b069e674378758c73761827e83ab05c46b52 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 17 May 2015 17:30:37 +0200 Subject: KVM: implement multiple address spaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only two ioctls have to be modified; the address space id is placed in the higher 16 bits of their slot id argument. As of this patch, no architecture defines more than one address space; x86 will be the first. Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 12 ++++++ arch/powerpc/include/asm/kvm_book3s_64.h | 2 +- include/linux/kvm_host.h | 26 ++++++++++-- include/uapi/linux/kvm.h | 1 + virt/kvm/kvm_main.c | 70 ++++++++++++++++++++------------ 5 files changed, 79 insertions(+), 32 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 2ddefd58b1aa..461956a0ee8e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -254,6 +254,11 @@ since the last call to this ioctl. Bit 0 is the first page in the memory slot. Ensure the entire structure is cleared to avoid padding issues. +If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies +the address space for which you want to return the dirty bitmap. +They must be less than the value that KVM_CHECK_EXTENSION returns for +the KVM_CAP_MULTI_ADDRESS_SPACE capability. + 4.9 KVM_SET_MEMORY_ALIAS @@ -924,6 +929,13 @@ slot. When changing an existing slot, it may be moved in the guest physical memory space, or its flags may be modified. It may not be resized. Slots may not overlap in guest physical address space. +If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot" +specifies the address space which is being modified. They must be +less than the value that KVM_CHECK_EXTENSION returns for the +KVM_CAP_MULTI_ADDRESS_SPACE capability. Slots in separate address spaces +are unrelated; the restriction on overlapping slots only applies within +each address space. + Memory for the region is taken starting at the address denoted by the field userspace_addr, which must point at user addressable memory for the entire memory slot size. Any object may back this memory, including diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 3536d12eb798..2aa79c864e91 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -430,7 +430,7 @@ static inline void note_hpte_modification(struct kvm *kvm, */ static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm) { - return rcu_dereference_raw_notrace(kvm->memslots); + return rcu_dereference_raw_notrace(kvm->memslots[0]); } extern void kvmppc_mmu_debugfs_init(struct kvm *kvm); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ba1ea43998e4..9564fd78c547 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -44,6 +44,10 @@ /* Two fragments for cross MMIO pages. */ #define KVM_MAX_MMIO_FRAGMENTS 2 +#ifndef KVM_ADDRESS_SPACE_NUM +#define KVM_ADDRESS_SPACE_NUM 1 +#endif + /* * For the normal pfn, the highest 12 bits should be zero, * so we can mask bit 62 ~ bit 52 to indicate the error pfn, @@ -331,6 +335,13 @@ struct kvm_kernel_irq_routing_entry { #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) #endif +#ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE +static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) +{ + return 0; +} +#endif + /* * Note: * memslots are not sorted by id anymore, please use id_to_memslot() @@ -349,7 +360,7 @@ struct kvm { spinlock_t mmu_lock; struct mutex slots_lock; struct mm_struct *mm; /* userspace tied to this vm */ - struct kvm_memslots *memslots; + struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM]; struct srcu_struct srcu; struct srcu_struct irq_srcu; #ifdef CONFIG_KVM_APIC_ARCHITECTURE @@ -464,16 +475,23 @@ void kvm_exit(void); void kvm_get_kvm(struct kvm *kvm); void kvm_put_kvm(struct kvm *kvm); -static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) +static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) { - return rcu_dereference_check(kvm->memslots, + return rcu_dereference_check(kvm->memslots[as_id], srcu_read_lock_held(&kvm->srcu) || lockdep_is_held(&kvm->slots_lock)); } +static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) +{ + return __kvm_memslots(kvm, 0); +} + static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu) { - return kvm_memslots(vcpu->kvm); + int as_id = kvm_arch_vcpu_memslots_id(vcpu); + + return __kvm_memslots(vcpu->kvm, as_id); } static inline struct kvm_memory_slot * diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index eace8babd227..5ff1038437e3 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -816,6 +816,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_HWRNG 115 #define KVM_CAP_DISABLE_QUIRKS 116 #define KVM_CAP_X86_SMM 117 +#define KVM_CAP_MULTI_ADDRESS_SPACE 118 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3a121cedcc77..848af90b8091 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -518,9 +518,11 @@ static struct kvm *kvm_create_vm(unsigned long type) BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); r = -ENOMEM; - kvm->memslots = kvm_alloc_memslots(); - if (!kvm->memslots) - goto out_err_no_srcu; + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { + kvm->memslots[i] = kvm_alloc_memslots(); + if (!kvm->memslots[i]) + goto out_err_no_srcu; + } if (init_srcu_struct(&kvm->srcu)) goto out_err_no_srcu; @@ -562,7 +564,8 @@ out_err_no_srcu: out_err_no_disable: for (i = 0; i < KVM_NR_BUSES; i++) kfree(kvm->buses[i]); - kvm_free_memslots(kvm, kvm->memslots); + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) + kvm_free_memslots(kvm, kvm->memslots[i]); kvm_arch_free_vm(kvm); return ERR_PTR(r); } @@ -612,7 +615,8 @@ static void kvm_destroy_vm(struct kvm *kvm) #endif kvm_arch_destroy_vm(kvm); kvm_destroy_devices(kvm); - kvm_free_memslots(kvm, kvm->memslots); + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) + kvm_free_memslots(kvm, kvm->memslots[i]); cleanup_srcu_struct(&kvm->irq_srcu); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); @@ -729,9 +733,9 @@ static int check_memory_region_flags(const struct kvm_userspace_memory_region *m } static struct kvm_memslots *install_new_memslots(struct kvm *kvm, - struct kvm_memslots *slots) + int as_id, struct kvm_memslots *slots) { - struct kvm_memslots *old_memslots = kvm_memslots(kvm); + struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id); /* * Set the low bit in the generation, which disables SPTE caching @@ -740,7 +744,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, WARN_ON(old_memslots->generation & 1); slots->generation = old_memslots->generation + 1; - rcu_assign_pointer(kvm->memslots, slots); + rcu_assign_pointer(kvm->memslots[as_id], slots); synchronize_srcu_expedited(&kvm->srcu); /* @@ -772,6 +776,7 @@ int __kvm_set_memory_region(struct kvm *kvm, struct kvm_memory_slot *slot; struct kvm_memory_slot old, new; struct kvm_memslots *slots = NULL, *old_memslots; + int as_id, id; enum kvm_mr_change change; r = check_memory_region_flags(mem); @@ -779,24 +784,27 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out; r = -EINVAL; + as_id = mem->slot >> 16; + id = (u16)mem->slot; + /* General sanity checks */ if (mem->memory_size & (PAGE_SIZE - 1)) goto out; if (mem->guest_phys_addr & (PAGE_SIZE - 1)) goto out; /* We can read the guest memory with __xxx_user() later on. */ - if ((mem->slot < KVM_USER_MEM_SLOTS) && + if ((id < KVM_USER_MEM_SLOTS) && ((mem->userspace_addr & (PAGE_SIZE - 1)) || !access_ok(VERIFY_WRITE, (void __user *)(unsigned long)mem->userspace_addr, mem->memory_size))) goto out; - if (mem->slot >= KVM_MEM_SLOTS_NUM) + if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM) goto out; if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) goto out; - slot = id_to_memslot(kvm_memslots(kvm), mem->slot); + slot = id_to_memslot(__kvm_memslots(kvm, as_id), id); base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; npages = mem->memory_size >> PAGE_SHIFT; @@ -805,7 +813,7 @@ int __kvm_set_memory_region(struct kvm *kvm, new = old = *slot; - new.id = mem->slot; + new.id = id; new.base_gfn = base_gfn; new.npages = npages; new.flags = mem->flags; @@ -840,9 +848,9 @@ int __kvm_set_memory_region(struct kvm *kvm, if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { /* Check for overlaps */ r = -EEXIST; - kvm_for_each_memslot(slot, kvm_memslots(kvm)) { + kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) { if ((slot->id >= KVM_USER_MEM_SLOTS) || - (slot->id == mem->slot)) + (slot->id == id)) continue; if (!((base_gfn + npages <= slot->base_gfn) || (base_gfn >= slot->base_gfn + slot->npages))) @@ -871,13 +879,13 @@ int __kvm_set_memory_region(struct kvm *kvm, slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); if (!slots) goto out_free; - memcpy(slots, kvm_memslots(kvm), sizeof(struct kvm_memslots)); + memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots)); if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { - slot = id_to_memslot(slots, mem->slot); + slot = id_to_memslot(slots, id); slot->flags |= KVM_MEMSLOT_INVALID; - old_memslots = install_new_memslots(kvm, slots); + old_memslots = install_new_memslots(kvm, as_id, slots); /* slot was deleted or moved, clear iommu mapping */ kvm_iommu_unmap_pages(kvm, &old); @@ -909,7 +917,7 @@ int __kvm_set_memory_region(struct kvm *kvm, } update_memslots(slots, &new); - old_memslots = install_new_memslots(kvm, slots); + old_memslots = install_new_memslots(kvm, as_id, slots); kvm_arch_commit_memory_region(kvm, mem, &old, &new, change); @@ -956,7 +964,7 @@ EXPORT_SYMBOL_GPL(kvm_set_memory_region); static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { - if (mem->slot >= KVM_USER_MEM_SLOTS) + if ((u16)mem->slot >= KVM_USER_MEM_SLOTS) return -EINVAL; return kvm_set_memory_region(kvm, mem); @@ -967,16 +975,18 @@ int kvm_get_dirty_log(struct kvm *kvm, { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; - int r, i; + int r, i, as_id, id; unsigned long n; unsigned long any = 0; r = -EINVAL; - if (log->slot >= KVM_USER_MEM_SLOTS) + as_id = log->slot >> 16; + id = (u16)log->slot; + if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) goto out; - slots = kvm_memslots(kvm); - memslot = id_to_memslot(slots, log->slot); + slots = __kvm_memslots(kvm, as_id); + memslot = id_to_memslot(slots, id); r = -ENOENT; if (!memslot->dirty_bitmap) goto out; @@ -1027,17 +1037,19 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; - int r, i; + int r, i, as_id, id; unsigned long n; unsigned long *dirty_bitmap; unsigned long *dirty_bitmap_buffer; r = -EINVAL; - if (log->slot >= KVM_USER_MEM_SLOTS) + as_id = log->slot >> 16; + id = (u16)log->slot; + if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) goto out; - slots = kvm_memslots(kvm); - memslot = id_to_memslot(slots, log->slot); + slots = __kvm_memslots(kvm, as_id); + memslot = id_to_memslot(slots, id); dirty_bitmap = memslot->dirty_bitmap; r = -ENOENT; @@ -2619,6 +2631,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING case KVM_CAP_IRQ_ROUTING: return KVM_MAX_IRQ_ROUTES; +#endif +#if KVM_ADDRESS_SPACE_NUM > 1 + case KVM_CAP_MULTI_ADDRESS_SPACE: + return KVM_ADDRESS_SPACE_NUM; #endif default: break; -- cgit v1.2.3 From 502f159c0239863deebfc50e09c0892d0c157101 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 3 Jun 2015 14:52:59 +1000 Subject: powerpc/eeh: Fix trivial error in eeh_restore_dev_state() Commit 28158cd "powerpc/eeh: Enhance pcibios_set_pcie_reset_state()" introduced a fix for a problem where certain configurations could lead to pci_reset_function() destroying the state of PCI devices other than the one specified. Unfortunately, the fix has a trivial bug - it calls pci_save_state() again, when it should be calling pci_restore_state(). This corrects the problem. Cc: Gavin Shan Signed-off-by: David Gibson Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 23e0aa773643..51dcdf66e9e6 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -717,7 +717,7 @@ static void *eeh_restore_dev_state(void *data, void *userdata) /* The caller should restore state for the specified device */ if (pdev != dev) - pci_save_state(pdev); + pci_restore_state(pdev); return NULL; } -- cgit v1.2.3 From c952c1c48233b95f838c8743f09feace1f0e6cab Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 21 May 2015 12:13:01 +0530 Subject: powerpc: Fix handling of DSCR related facility unavailable exception Currently DSCR (Data Stream Control Register) can be accessed with mfspr or mtspr instructions inside a thread via two different SPR numbers. One being the user accessible problem state SPR number 0x03 and the other being the privilege state SPR number 0x11. All access through the privilege state SPR number get emulated through illegal instruction exception. Any access through the problem state SPR number raises one facility unavailable exception which sets the thread based dscr_inherit bit and enables DSCR facility through FSCR register thus allowing direct access to DSCR without going through this exception in the future. We set the thread.dscr_inherit bit whether the access was with mfspr or mtspr instruction which is neither correct nor does it match the behaviour through the instruction emulation code path driven from privilege state SPR number. User currently observes two different kind of behaviour when accessing the DSCR through these two SPR numbers. This problem can be observed through these two test cases by replacing the privilege state SPR number with the problem state SPR number. (1) http://ozlabs.org/~anton/junkcode/dscr_default_test.c (2) http://ozlabs.org/~anton/junkcode/dscr_explicit_test.c This patch fixes the problem by making sure that the behaviour visible to the user remains the same irrespective of which SPR number is being used. Inside facility unavailable exception, we check whether it was cuased by a mfspr or a mtspr isntrucction. In case of mfspr instruction, just emulate the instruction. In case of mtspr instruction, set the thread based dscr_inherit bit and also enable the facility through FSCR. All user SPR based mfspr instruction will be emulated till one user SPR based mtspr has been executed. Signed-off-by: Anshuman Khandual Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/traps.c | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 19e4744b6eba..6530f1b8874d 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1377,6 +1377,7 @@ void facility_unavailable_exception(struct pt_regs *regs) }; char *facility = "unknown"; u64 value; + u32 instword, rd; u8 status; bool hv; @@ -1388,12 +1389,46 @@ void facility_unavailable_exception(struct pt_regs *regs) status = value >> 56; if (status == FSCR_DSCR_LG) { - /* User is acessing the DSCR. Set the inherit bit and allow - * the user to set it directly in future by setting via the - * FSCR DSCR bit. We always leave HFSCR DSCR set. + /* + * User is accessing the DSCR register using the problem + * state only SPR number (0x03) either through a mfspr or + * a mtspr instruction. If it is a write attempt through + * a mtspr, then we set the inherit bit. This also allows + * the user to write or read the register directly in the + * future by setting via the FSCR DSCR bit. But in case it + * is a read DSCR attempt through a mfspr instruction, we + * just emulate the instruction instead. This code path will + * always emulate all the mfspr instructions till the user + * has attempted atleast one mtspr instruction. This way it + * preserves the same behaviour when the user is accessing + * the DSCR through privilege level only SPR number (0x11) + * which is emulated through illegal instruction exception. + * We always leave HFSCR DSCR set. */ - current->thread.dscr_inherit = 1; - mtspr(SPRN_FSCR, value | FSCR_DSCR); + if (get_user(instword, (u32 __user *)(regs->nip))) { + pr_err("Failed to fetch the user instruction\n"); + return; + } + + /* Write into DSCR (mtspr 0x03, RS) */ + if ((instword & PPC_INST_MTSPR_DSCR_USER_MASK) + == PPC_INST_MTSPR_DSCR_USER) { + rd = (instword >> 21) & 0x1f; + current->thread.dscr = regs->gpr[rd]; + current->thread.dscr_inherit = 1; + mtspr(SPRN_FSCR, value | FSCR_DSCR); + } + + /* Read from DSCR (mfspr RT, 0x03) */ + if ((instword & PPC_INST_MFSPR_DSCR_USER_MASK) + == PPC_INST_MFSPR_DSCR_USER) { + if (emulate_instruction(regs)) { + pr_err("DSCR based mfspr emulation failed\n"); + return; + } + regs->nip += 4; + emulate_single_step(regs); + } return; } -- cgit v1.2.3 From 280e109992831ee16a0f74ae887c08fb26c021f1 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 21 May 2015 12:13:02 +0530 Subject: powerpc/kernel: Remove the unused extern dscr_default The process context switch code no longer uses dscr_default variable from the sysfs.c file. The variable became unused when we started storing the CPU specific DSCR value in the PACA structure instead. This patch just removes this extern declaration. It was originally added by the following commit. Signed-off-by: Anshuman Khandual Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index febb50dd5328..8005e18d1b40 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1112,7 +1112,6 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp) /* * Copy a thread.. */ -extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */ /* * Copy architecture-specific thread state -- cgit v1.2.3 From 1db365258ad9c3624897f48c764f8c557f492b26 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 21 May 2015 12:13:03 +0530 Subject: powerpc/kernel: Rename PACA_DSCR to PACA_DSCR_DEFAULT PACA_DSCR offset macro tracks dscr_default element in the paca structure. Better change the name of this macro to match that of the data element it tracks. Makes the code more readable. Signed-off-by: Anshuman Khandual Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kernel/entry_64.S | 2 +- arch/powerpc/kernel/tm.S | 4 ++-- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 0034b6b3556a..98230579d99c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -247,7 +247,7 @@ int main(void) #endif DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); - DEFINE(PACA_DSCR, offsetof(struct paca_struct, dscr_default)); + DEFINE(PACA_DSCR_DEFAULT, offsetof(struct paca_struct, dscr_default)); DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime)); DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user)); DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index afbc20019c2e..278888e89acc 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -556,7 +556,7 @@ BEGIN_FTR_SECTION ld r0,THREAD_DSCR(r4) cmpwi r6,0 bne 1f - ld r0,PACA_DSCR(r13) + ld r0,PACA_DSCR_DEFAULT(r13) 1: BEGIN_FTR_SECTION_NESTED(70) mfspr r8, SPRN_FSCR diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 5754b226da7e..bf8f34a58670 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -293,7 +293,7 @@ dont_backup_fp: ld r2, STK_GOT(r1) /* Load CPU's default DSCR */ - ld r0, PACA_DSCR(r13) + ld r0, PACA_DSCR_DEFAULT(r13) mtspr SPRN_DSCR, r0 blr @@ -473,7 +473,7 @@ restore_gprs: ld r2, STK_GOT(r1) /* Load CPU's default DSCR */ - ld r0, PACA_DSCR(r13) + ld r0, PACA_DSCR_DEFAULT(r13) mtspr SPRN_DSCR, r0 blr diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 4d70df26c402..faa86e9c0551 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -324,7 +324,7 @@ kvm_start_guest: kvm_secondary_got_guest: /* Set HSTATE_DSCR(r13) to something sensible */ - ld r6, PACA_DSCR(r13) + ld r6, PACA_DSCR_DEFAULT(r13) std r6, HSTATE_DSCR(r13) /* Order load of vcore, ptid etc. after load of vcpu */ -- cgit v1.2.3 From d3cb06e0cdc9841b289a0d68acaffd2868504902 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 21 May 2015 12:13:04 +0530 Subject: powerpc/dscr: Add some in-code documentation This patch adds some in-code documentation to the DSCR related code to make it more readable without having any functional change to it. Signed-off-by: Anshuman Khandual Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/processor.h | 9 +++++++++ arch/powerpc/kernel/sysfs.c | 38 ++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index bf117d8fb45f..28ded5d9b579 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -295,6 +295,15 @@ struct thread_struct { #endif #ifdef CONFIG_PPC64 unsigned long dscr; + /* + * This member element dscr_inherit indicates that the process + * has explicitly attempted and changed the DSCR register value + * for itself. Hence kernel wont use the default CPU DSCR value + * contained in the PACA structure anymore during process context + * switch. Once this variable is set, this behaviour will also be + * inherited to all the children of this process from that point + * onwards. + */ int dscr_inherit; unsigned long ppr; /* used to save/restore SMT priority */ #endif diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index fa1fd8a0c867..692873bff334 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -496,13 +496,34 @@ static DEVICE_ATTR(spurr, 0400, show_spurr, NULL); static DEVICE_ATTR(purr, 0400, show_purr, store_purr); static DEVICE_ATTR(pir, 0400, show_pir, NULL); +/* + * This is the system wide DSCR register default value. Any + * change to this default value through the sysfs interface + * will update all per cpu DSCR default values across the + * system stored in their respective PACA structures. + */ static unsigned long dscr_default; +/** + * read_dscr() - Fetch the cpu specific DSCR default + * @val: Returned cpu specific DSCR default value + * + * This function returns the per cpu DSCR default value + * for any cpu which is contained in it's PACA structure. + */ static void read_dscr(void *val) { *(unsigned long *)val = get_paca()->dscr_default; } + +/** + * write_dscr() - Update the cpu specific DSCR default + * @val: New cpu specific DSCR default value to update + * + * This function updates the per cpu DSCR default value + * for any cpu which is contained in it's PACA structure. + */ static void write_dscr(void *val) { get_paca()->dscr_default = *(unsigned long *)val; @@ -520,12 +541,29 @@ static void add_write_permission_dev_attr(struct device_attribute *attr) attr->attr.mode |= 0200; } +/** + * show_dscr_default() - Fetch the system wide DSCR default + * @dev: Device structure + * @attr: Device attribute structure + * @buf: Interface buffer + * + * This function returns the system wide DSCR default value. + */ static ssize_t show_dscr_default(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%lx\n", dscr_default); } +/** + * store_dscr_default() - Update the system wide DSCR default + * @dev: Device structure + * @attr: Device attribute structure + * @buf: Interface buffer + * @count: Size of the update + * + * This function updates the system wide DSCR default value. + */ static ssize_t __used store_dscr_default(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -- cgit v1.2.3 From cd11433eda42815ab53c8be44580a0009000b761 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 4 Jun 2015 16:37:34 -0500 Subject: PCI: Include , not We already include from , so just include directly. Signed-off-by: Bjorn Helgaas CC: linuxppc-dev@lists.ozlabs.org CC: linux-s390@vger.kernel.org --- arch/powerpc/platforms/52xx/mpc52xx_pci.c | 2 +- arch/s390/kernel/suspend.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/arch/powerpc/platforms/52xx/mpc52xx_pci.c index e2d401ad8fbb..6eb3b2abae90 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pci.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c @@ -12,7 +12,7 @@ #undef DEBUG -#include +#include #include #include #include diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index d3236c9e226b..39e2f41b6cf0 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -9,10 +9,10 @@ #include #include #include +#include #include #include #include -#include #include #include "entry.h" -- cgit v1.2.3 From 633adc711de0bcb6d6e1c071302880e0c8c05d57 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 4 Jun 2015 16:37:56 -0500 Subject: PCI: Remove unnecessary #includes of In include/linux/pci.h, we already #include , so we don't need to include directly. Remove the unnecessary includes. All the files here already include . Signed-off-by: Bjorn Helgaas Acked-by: Simon Horman # sh Acked-by: Ralf Baechle --- arch/alpha/kernel/core_irongate.c | 1 - arch/alpha/kernel/sys_eiger.c | 1 - arch/alpha/kernel/sys_nautilus.c | 1 - arch/mips/pci/fixup-cobalt.c | 1 - arch/mips/pci/ops-mace.c | 1 - arch/mips/pci/pci-lantiq.c | 1 - arch/powerpc/kernel/prom.c | 1 - arch/powerpc/kernel/prom_init.c | 1 - arch/sh/drivers/pci/ops-sh5.c | 1 - arch/sh/drivers/pci/pci-sh5.c | 1 - arch/x86/kernel/x86_init.c | 1 - 11 files changed, 11 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c index 00096df0f6ad..83d0a359a1b2 100644 --- a/arch/alpha/kernel/core_irongate.c +++ b/arch/alpha/kernel/core_irongate.c @@ -22,7 +22,6 @@ #include #include -#include #include #include diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c index 79d69d7f63f8..15f42083bdb3 100644 --- a/arch/alpha/kernel/sys_eiger.c +++ b/arch/alpha/kernel/sys_eiger.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 700686d04869..2cfaa0e5c577 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mips/pci/fixup-cobalt.c b/arch/mips/pci/fixup-cobalt.c index a138e8ee5cfc..b3ab59318d91 100644 --- a/arch/mips/pci/fixup-cobalt.c +++ b/arch/mips/pci/fixup-cobalt.c @@ -13,7 +13,6 @@ #include #include -#include #include #include diff --git a/arch/mips/pci/ops-mace.c b/arch/mips/pci/ops-mace.c index 6b5821febc38..951d8070fb48 100644 --- a/arch/mips/pci/ops-mace.c +++ b/arch/mips/pci/ops-mace.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #if 0 diff --git a/arch/mips/pci/pci-lantiq.c b/arch/mips/pci/pci-lantiq.c index 8b117e638306..c5347d99cf3a 100644 --- a/arch/mips/pci/pci-lantiq.c +++ b/arch/mips/pci/pci-lantiq.c @@ -20,7 +20,6 @@ #include #include -#include #include #include diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 308c5e15676b..00fdea24f854 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index fd1fe4c37599..fcca8077e6a2 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sh/drivers/pci/ops-sh5.c b/arch/sh/drivers/pci/ops-sh5.c index 4ce95a001b80..45361946460f 100644 --- a/arch/sh/drivers/pci/ops-sh5.c +++ b/arch/sh/drivers/pci/ops-sh5.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include "pci-sh5.h" diff --git a/arch/sh/drivers/pci/pci-sh5.c b/arch/sh/drivers/pci/pci-sh5.c index 16c1e721bf54..8229114c6a58 100644 --- a/arch/sh/drivers/pci/pci-sh5.c +++ b/arch/sh/drivers/pci/pci-sh5.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include "pci-sh5.h" diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 234b0722de53..eed5625b10e8 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 01d72a95188880b22190e937ed8718ed4b45bdce Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 4 Jun 2015 16:38:17 -0500 Subject: PCI: Remove unused pci_dma_burst_advice() pci_dma_burst_advice() was added by e24c2d963a60 ("[PATCH] PCI: DMA bursting advice") but apparently never used. Remove it. Signed-off-by: Bjorn Helgaas Acked-by: Michal Simek # microblaze CC: David S. Miller --- arch/alpha/include/asm/pci.h | 16 ---------------- arch/arm/include/asm/pci.h | 10 ---------- arch/frv/include/asm/pci.h | 10 ---------- arch/ia64/include/asm/pci.h | 19 ------------------- arch/microblaze/include/asm/pci.h | 10 ---------- arch/mips/include/asm/pci.h | 10 ---------- arch/parisc/include/asm/pci.h | 19 ------------------- arch/powerpc/include/asm/pci.h | 30 ------------------------------ arch/sh/include/asm/pci.h | 18 ------------------ arch/sparc/include/asm/pci_32.h | 10 ---------- arch/sparc/include/asm/pci_64.h | 19 ------------------- arch/unicore32/include/asm/pci.h | 10 ---------- arch/x86/include/asm/pci.h | 7 ------- drivers/net/ethernet/sun/cassini.c | 1 - include/linux/pci.h | 11 ----------- 15 files changed, 200 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h index f7f680f7457d..8b02afeb6319 100644 --- a/arch/alpha/include/asm/pci.h +++ b/arch/alpha/include/asm/pci.h @@ -71,22 +71,6 @@ extern void pcibios_set_master(struct pci_dev *dev); /* implement the pci_ DMA API in terms of the generic device dma_ one */ #include -static inline void pci_dma_burst_advice(struct pci_dev *pdev, - enum pci_dma_burst_strategy *strat, - unsigned long *strategy_parameter) -{ - unsigned long cacheline_size; - u8 byte; - - pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte); - if (byte == 0) - cacheline_size = 1024; - else - cacheline_size = (int) byte * 4; - - *strat = PCI_DMA_BURST_BOUNDARY; - *strategy_parameter = cacheline_size; -} #endif /* TODO: integrate with include/asm-generic/pci.h ? */ diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h index 585dc33a7a24..a5635444ca41 100644 --- a/arch/arm/include/asm/pci.h +++ b/arch/arm/include/asm/pci.h @@ -31,16 +31,6 @@ static inline int pci_proc_domain(struct pci_bus *bus) */ #define PCI_DMA_BUS_IS_PHYS (1) -#ifdef CONFIG_PCI -static inline void pci_dma_burst_advice(struct pci_dev *pdev, - enum pci_dma_burst_strategy *strat, - unsigned long *strategy_parameter) -{ - *strat = PCI_DMA_BURST_INFINITY; - *strategy_parameter = ~0UL; -} -#endif - #define HAVE_PCI_MMAP extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h index 2035a4d3f9b9..a6d4ed042c70 100644 --- a/arch/frv/include/asm/pci.h +++ b/arch/frv/include/asm/pci.h @@ -41,16 +41,6 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, /* Return the index of the PCI controller for device PDEV. */ #define pci_controller_num(PDEV) (0) -#ifdef CONFIG_PCI -static inline void pci_dma_burst_advice(struct pci_dev *pdev, - enum pci_dma_burst_strategy *strat, - unsigned long *strategy_parameter) -{ - *strat = PCI_DMA_BURST_INFINITY; - *strategy_parameter = ~0UL; -} -#endif - /* * These are pretty much arbitrary with the CoMEM implementation. * We have the whole address space to ourselves. diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h index 32ea19ac5713..b897fae1f0ca 100644 --- a/arch/ia64/include/asm/pci.h +++ b/arch/ia64/include/asm/pci.h @@ -52,25 +52,6 @@ extern unsigned long ia64_max_iommu_merge_mask; #include -#ifdef CONFIG_PCI -static inline void pci_dma_burst_advice(struct pci_dev *pdev, - enum pci_dma_burst_strategy *strat, - unsigned long *strategy_parameter) -{ - unsigned long cacheline_size; - u8 byte; - - pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte); - if (byte == 0) - cacheline_size = 1024; - else - cacheline_size = (int) byte * 4; - - *strat = PCI_DMA_BURST_MULTIPLE; - *strategy_parameter = cacheline_size; -} -#endif - #define HAVE_PCI_MMAP extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h index 95b03886e0a8..fdf2e75d7033 100644 --- a/arch/microblaze/include/asm/pci.h +++ b/arch/microblaze/include/asm/pci.h @@ -44,16 +44,6 @@ struct pci_dev; */ #define pcibios_assign_all_busses() 0 -#ifdef CONFIG_PCI -static inline void pci_dma_burst_advice(struct pci_dev *pdev, - enum pci_dma_burst_strategy *strat, - unsigned long *strategy_parameter) -{ - *strat = PCI_DMA_BURST_INFINITY; - *strategy_parameter = ~0UL; -} -#endif - extern int pci_domain_nr(struct pci_bus *bus); /* Decide whether to display the domain number in /proc */ diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index d9692993fc83..70dcc5498128 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -113,16 +113,6 @@ struct pci_dev; */ extern unsigned int PCI_DMA_BUS_IS_PHYS; -#ifdef CONFIG_PCI -static inline void pci_dma_burst_advice(struct pci_dev *pdev, - enum pci_dma_burst_strategy *strat, - unsigned long *strategy_parameter) -{ - *strat = PCI_DMA_BURST_INFINITY; - *strategy_parameter = ~0UL; -} -#endif - #ifdef CONFIG_PCI_DOMAINS #define pci_domain_nr(bus) ((struct pci_controller *)(bus)->sysdata)->index diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h index 20df2b04fc09..bf5e044281d6 100644 --- a/arch/parisc/include/asm/pci.h +++ b/arch/parisc/include/asm/pci.h @@ -196,25 +196,6 @@ static inline void pcibios_register_hba(struct pci_hba_data *x) /* export the pci_ DMA API in terms of the dma_ one */ #include -#ifdef CONFIG_PCI -static inline void pci_dma_burst_advice(struct pci_dev *pdev, - enum pci_dma_burst_strategy *strat, - unsigned long *strategy_parameter) -{ - unsigned long cacheline_size; - u8 byte; - - pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte); - if (byte == 0) - cacheline_size = 1024; - else - cacheline_size = (int) byte * 4; - - *strat = PCI_DMA_BURST_MULTIPLE; - *strategy_parameter = cacheline_size; -} -#endif - static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) { return channel ? 15 : 14; diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 4aef8d660999..99dc432b256a 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -71,36 +71,6 @@ extern struct dma_map_ops *get_pci_dma_ops(void); */ #define PCI_DISABLE_MWI -#ifdef CONFIG_PCI -static inline void pci_dma_burst_advice(struct pci_dev *pdev, - enum pci_dma_burst_strategy *strat, - unsigned long *strategy_parameter) -{ - unsigned long cacheline_size; - u8 byte; - - pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte); - if (byte == 0) - cacheline_size = 1024; - else - cacheline_size = (int) byte * 4; - - *strat = PCI_DMA_BURST_MULTIPLE; - *strategy_parameter = cacheline_size; -} -#endif - -#else /* 32-bit */ - -#ifdef CONFIG_PCI -static inline void pci_dma_burst_advice(struct pci_dev *pdev, - enum pci_dma_burst_strategy *strat, - unsigned long *strategy_parameter) -{ - *strat = PCI_DMA_BURST_INFINITY; - *strategy_parameter = ~0UL; -} -#endif #endif /* CONFIG_PPC64 */ extern int pci_domain_nr(struct pci_bus *bus); diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index 5b4511552998..e343dbd02e41 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -86,24 +86,6 @@ extern void pcibios_set_master(struct pci_dev *dev); * direct memory write. */ #define PCI_DISABLE_MWI - -static inline void pci_dma_burst_advice(struct pci_dev *pdev, - enum pci_dma_burst_strategy *strat, - unsigned long *strategy_parameter) -{ - unsigned long cacheline_size; - u8 byte; - - pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte); - - if (byte == 0) - cacheline_size = L1_CACHE_BYTES; - else - cacheline_size = byte << 2; - - *strat = PCI_DMA_BURST_MULTIPLE; - *strategy_parameter = cacheline_size; -} #endif /* Board-specific fixup routines. */ diff --git a/arch/sparc/include/asm/pci_32.h b/arch/sparc/include/asm/pci_32.h index 53e9b4987db0..b7c092df3134 100644 --- a/arch/sparc/include/asm/pci_32.h +++ b/arch/sparc/include/asm/pci_32.h @@ -22,16 +22,6 @@ struct pci_dev; -#ifdef CONFIG_PCI -static inline void pci_dma_burst_advice(struct pci_dev *pdev, - enum pci_dma_burst_strategy *strat, - unsigned long *strategy_parameter) -{ - *strat = PCI_DMA_BURST_INFINITY; - *strategy_parameter = ~0UL; -} -#endif - #endif /* __KERNEL__ */ #ifndef CONFIG_LEON_PCI diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h index bd00a6226169..022d16008a00 100644 --- a/arch/sparc/include/asm/pci_64.h +++ b/arch/sparc/include/asm/pci_64.h @@ -31,25 +31,6 @@ #define PCI64_REQUIRED_MASK (~(u64)0) #define PCI64_ADDR_BASE 0xfffc000000000000UL -#ifdef CONFIG_PCI -static inline void pci_dma_burst_advice(struct pci_dev *pdev, - enum pci_dma_burst_strategy *strat, - unsigned long *strategy_parameter) -{ - unsigned long cacheline_size; - u8 byte; - - pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte); - if (byte == 0) - cacheline_size = 1024; - else - cacheline_size = (int) byte * 4; - - *strat = PCI_DMA_BURST_BOUNDARY; - *strategy_parameter = cacheline_size; -} -#endif - /* Return the index of the PCI controller for device PDEV. */ int pci_domain_nr(struct pci_bus *bus); diff --git a/arch/unicore32/include/asm/pci.h b/arch/unicore32/include/asm/pci.h index 654407e98619..38b3f3785c3c 100644 --- a/arch/unicore32/include/asm/pci.h +++ b/arch/unicore32/include/asm/pci.h @@ -18,16 +18,6 @@ #include #include /* for PCIBIOS_MIN_* */ -#ifdef CONFIG_PCI -static inline void pci_dma_burst_advice(struct pci_dev *pdev, - enum pci_dma_burst_strategy *strat, - unsigned long *strategy_parameter) -{ - *strat = PCI_DMA_BURST_INFINITY; - *strategy_parameter = ~0UL; -} -#endif - #define HAVE_PCI_MMAP extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 4e370a5d8117..e51c229a29a4 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -80,13 +80,6 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, #ifdef CONFIG_PCI extern void early_quirks(void); -static inline void pci_dma_burst_advice(struct pci_dev *pdev, - enum pci_dma_burst_strategy *strat, - unsigned long *strategy_parameter) -{ - *strat = PCI_DMA_BURST_INFINITY; - *strategy_parameter = ~0UL; -} #else static inline void early_quirks(void) { } #endif diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 3dc1f68b322d..6ce973187225 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -3058,7 +3058,6 @@ static void cas_init_mac(struct cas *cp) /* setup core arbitration weight register */ writel(CAWR_RR_DIS, cp->regs + REG_CAWR); - /* XXX Use pci_dma_burst_advice() */ #if !defined(CONFIG_SPARC64) && !defined(CONFIG_ALPHA) /* set the infinite burst register for chips that don't have * pci issues. diff --git a/include/linux/pci.h b/include/linux/pci.h index 353db8dc4c6e..08fb4e307d68 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1197,15 +1197,6 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode, #define pci_pool_alloc(pool, flags, handle) dma_pool_alloc(pool, flags, handle) #define pci_pool_free(pool, vaddr, addr) dma_pool_free(pool, vaddr, addr) -enum pci_dma_burst_strategy { - PCI_DMA_BURST_INFINITY, /* make bursts as large as possible, - strategy_parameter is N/A */ - PCI_DMA_BURST_BOUNDARY, /* disconnect at every strategy_parameter - byte boundaries */ - PCI_DMA_BURST_MULTIPLE, /* disconnect at some multiple of - strategy_parameter byte boundaries */ -}; - struct msix_entry { u32 vector; /* kernel uses to write allocated vector */ u16 entry; /* driver uses to specify entry, OS writes */ @@ -1430,8 +1421,6 @@ static inline int pci_request_regions(struct pci_dev *dev, const char *res_name) { return -EIO; } static inline void pci_release_regions(struct pci_dev *dev) { } -#define pci_dma_burst_advice(pdev, strat, strategy_parameter) do { } while (0) - static inline void pci_block_cfg_access(struct pci_dev *dev) { } static inline int pci_block_cfg_access_in_atomic(struct pci_dev *dev) { return 0; } -- cgit v1.2.3 From cfcb3d80a28380ba027331eb548ba309c4b66559 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 14 Apr 2015 13:05:57 +0530 Subject: powerpc/mm: Add trace point for tracking hash pte fault This enables us to understand how many hash fault we are taking when running benchmarks. For ex: -bash-4.2# ./perf stat -e powerpc:hash_fault -e page-faults /tmp/ebizzy.ppc64 -S 30 -P -n 1000 ... Performance counter stats for '/tmp/ebizzy.ppc64 -S 30 -P -n 1000': 1,10,04,075 powerpc:hash_fault 1,10,03,429 page-faults 30.865978991 seconds time elapsed NOTE: The impact of the tracepoint was not noticeable when running test. It was within the run-time variance of the test. For ex: without-patch: -------------- Performance counter stats for './a.out 3000 300': 643 page-faults # 0.089 M/sec 7.236562 task-clock (msec) # 0.928 CPUs utilized 2,179,213 stalled-cycles-frontend # 0.00% frontend cycles idle 17,174,367 stalled-cycles-backend # 0.00% backend cycles idle 0 context-switches # 0.000 K/sec 0.007794658 seconds time elapsed And with-patch: --------------- Performance counter stats for './a.out 3000 300': 643 page-faults # 0.089 M/sec 7.233746 task-clock (msec) # 0.921 CPUs utilized 0 context-switches # 0.000 K/sec 0.007854876 seconds time elapsed Performance counter stats for './a.out 3000 300': 643 page-faults # 0.087 M/sec 649 powerpc:hash_fault # 0.087 M/sec 7.430376 task-clock (msec) # 0.938 CPUs utilized 2,347,174 stalled-cycles-frontend # 0.00% frontend cycles idle 17,524,282 stalled-cycles-backend # 0.00% backend cycles idle 0 context-switches # 0.000 K/sec 0.007920284 seconds time elapsed Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/trace.h | 20 ++++++++++++++++++++ arch/powerpc/mm/hash_utils_64.c | 2 ++ 2 files changed, 22 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index c15da6073cb8..8e86b48d0369 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -144,6 +144,26 @@ TRACE_EVENT_FN(opal_exit, ); #endif +TRACE_EVENT(hash_fault, + + TP_PROTO(unsigned long addr, unsigned long access, unsigned long trap), + TP_ARGS(addr, access, trap), + TP_STRUCT__entry( + __field(unsigned long, addr) + __field(unsigned long, access) + __field(unsigned long, trap) + ), + + TP_fast_assign( + __entry->addr = addr; + __entry->access = access; + __entry->trap = trap; + ), + + TP_printk("hash fault with addr 0x%lx and access = 0x%lx trap = 0x%lx", + __entry->addr, __entry->access, __entry->trap) +); + #endif /* _TRACE_POWERPC_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index a04ca922f0db..5ec987f65b2c 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -57,6 +57,7 @@ #include #include #include +#include #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -1004,6 +1005,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", ea, access, trap); + trace_hash_fault(ea, access, trap); /* Get region & vsid */ switch (REGION_ID(ea)) { -- cgit v1.2.3 From ea30e99e8eccb684490f40d011ea534ecd937e98 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:34:53 +1000 Subject: powerpc/eeh/ioda2: Use device::iommu_group to check IOMMU group This relies on the fact that a PCI device always has an IOMMU table which may not be the case when we get dynamic DMA windows so let's use more reliable check for IOMMU group here. As we do not rely on the table presence here, remove the workaround from pnv_pci_ioda2_set_bypass(); also remove the @add_to_iommu_group parameter from pnv_ioda_setup_bus_dma(). Signed-off-by: Alexey Kardashevskiy Acked-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 4 +--- arch/powerpc/platforms/powernv/pci-ioda.c | 27 +++++---------------------- 2 files changed, 6 insertions(+), 25 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 51dcdf66e9e6..af9b597b10af 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1410,13 +1410,11 @@ static int dev_has_iommu_table(struct device *dev, void *data) { struct pci_dev *pdev = to_pci_dev(dev); struct pci_dev **ppdev = data; - struct iommu_table *tbl; if (!dev) return 0; - tbl = get_iommu_table_base(dev); - if (tbl && tbl->it_group) { + if (dev->iommu_group) { *ppdev = pdev; return 1; } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b2841853550a..5491d2f0e192 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1655,21 +1655,15 @@ static u64 pnv_pci_ioda_dma_get_required_mask(struct pnv_phb *phb, } static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, - struct pci_bus *bus, - bool add_to_iommu_group) + struct pci_bus *bus) { struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { - if (add_to_iommu_group) - set_iommu_table_base_and_group(&dev->dev, - pe->tce32_table); - else - set_iommu_table_base(&dev->dev, pe->tce32_table); + set_iommu_table_base_and_group(&dev->dev, pe->tce32_table); if (dev->subordinate) - pnv_ioda_setup_bus_dma(pe, dev->subordinate, - add_to_iommu_group); + pnv_ioda_setup_bus_dma(pe, dev->subordinate); } } @@ -1846,7 +1840,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); - pnv_ioda_setup_bus_dma(pe, pe->pbus, true); + pnv_ioda_setup_bus_dma(pe, pe->pbus); } else if (pe->flags & PNV_IODA_PE_VF) { iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); @@ -1883,17 +1877,6 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) window_id, pe->tce_bypass_base, 0); - - /* - * EEH needs the mapping between IOMMU table and group - * of those VFIO/KVM pass-through devices. We can postpone - * resetting DMA ops until the DMA mask is configured in - * host side. - */ - if (pe->pdev) - set_iommu_table_base(&pe->pdev->dev, tbl); - else - pnv_ioda_setup_bus_dma(pe, pe->pbus, false); } if (rc) pe_err(pe, "OPAL error %lld configuring bypass window\n", rc); @@ -1985,7 +1968,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); - pnv_ioda_setup_bus_dma(pe, pe->pbus, true); + pnv_ioda_setup_bus_dma(pe, pe->pbus); } else if (pe->flags & PNV_IODA_PE_VF) { iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); -- cgit v1.2.3 From 4617082ec049d92a797e8be0b4ba2ba6b59d90d1 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:34:54 +1000 Subject: powerpc/iommu/powernv: Get rid of set_iommu_table_base_and_group The set_iommu_table_base_and_group() name suggests that the function sets table base and add a device to an IOMMU group. The actual purpose for table base setting is to put some reference into a device so later iommu_add_device() can get the IOMMU group reference and the device to the group. At the moment a group cannot be explicitly passed to iommu_add_device() as we want it to work from the bus notifier, we can fix it later and remove confusing calls of set_iommu_table_base(). This replaces set_iommu_table_base_and_group() with a couple of set_iommu_table_base() + iommu_add_device() which makes reading the code easier. This adds few comments why set_iommu_table_base() and iommu_add_device() are called where they are called. For IODA1/2, this essentially removes iommu_add_device() call from the pnv_pci_ioda_dma_dev_setup() as it will always fail at this particular place: - for physical PE, the device is already attached by iommu_add_device() in pnv_pci_ioda_setup_dma_pe(); - for virtual PE, the sysfs entries are not ready to create all symlinks so actual adding is happening in tce_iommu_bus_notifier. Signed-off-by: Alexey Kardashevskiy Reviewed-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 7 ------- arch/powerpc/platforms/powernv/pci-ioda.c | 27 +++++++++++++++++++++++---- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 3 ++- arch/powerpc/platforms/pseries/iommu.c | 15 ++++++++------- 4 files changed, 33 insertions(+), 19 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 1e27d6338565..8353c865db6f 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -140,13 +140,6 @@ static inline int __init tce_iommu_bus_notifier_init(void) } #endif /* !CONFIG_IOMMU_API */ -static inline void set_iommu_table_base_and_group(struct device *dev, - void *base) -{ - set_iommu_table_base(dev, base); - iommu_add_device(dev); -} - extern int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, struct scatterlist *sglist, int nelems, unsigned long mask, diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 5491d2f0e192..fd594b28fce1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1598,7 +1598,13 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev pe = &phb->ioda.pe_array[pdn->pe_number]; WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); - set_iommu_table_base_and_group(&pdev->dev, pe->tce32_table); + set_iommu_table_base(&pdev->dev, pe->tce32_table); + /* + * Note: iommu_add_device() will fail here as + * for physical PE: the device is already added by now; + * for virtual PE: sysfs entries are not ready yet and + * tce_iommu_bus_notifier will add the device to a group later. + */ } static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) @@ -1660,7 +1666,8 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { - set_iommu_table_base_and_group(&dev->dev, pe->tce32_table); + set_iommu_table_base(&dev->dev, pe->tce32_table); + iommu_add_device(&dev->dev); if (dev->subordinate) pnv_ioda_setup_bus_dma(pe, dev->subordinate); @@ -1836,7 +1843,13 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, if (pe->flags & PNV_IODA_PE_DEV) { iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); - set_iommu_table_base_and_group(&pe->pdev->dev, tbl); + /* + * Setting table base here only for carrying iommu_group + * further down to let iommu_add_device() do the job. + * pnv_pci_ioda_dma_dev_setup will override it later anyway. + */ + set_iommu_table_base(&pe->pdev->dev, tbl); + iommu_add_device(&pe->pdev->dev); } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); @@ -1964,7 +1977,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, if (pe->flags & PNV_IODA_PE_DEV) { iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); - set_iommu_table_base_and_group(&pe->pdev->dev, tbl); + /* + * Setting table base here only for carrying iommu_group + * further down to let iommu_add_device() do the job. + * pnv_pci_ioda_dma_dev_setup will override it later anyway. + */ + set_iommu_table_base(&pe->pdev->dev, tbl); + iommu_add_device(&pe->pdev->dev); } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 7f826e8cc1d4..61ae20aa9000 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -92,7 +92,8 @@ static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, pci_domain_nr(phb->hose->bus), phb->opal_id); } - set_iommu_table_base_and_group(&pdev->dev, &phb->p5ioc2.iommu_table); + set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table); + iommu_add_device(&pdev->dev); } static const struct pci_controller_ops pnv_pci_p5ioc2_controller_ops = { diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 61d5a17f45c0..05ab06dccb45 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -688,8 +688,8 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) iommu_table_setparms(phb, dn, tbl); PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); iommu_register_group(tbl, pci_domain_nr(phb->bus), 0); - set_iommu_table_base_and_group(&dev->dev, - PCI_DN(dn)->iommu_table); + set_iommu_table_base(&dev->dev, tbl); + iommu_add_device(&dev->dev); return; } @@ -700,10 +700,10 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) while (dn && PCI_DN(dn) && PCI_DN(dn)->iommu_table == NULL) dn = dn->parent; - if (dn && PCI_DN(dn)) - set_iommu_table_base_and_group(&dev->dev, - PCI_DN(dn)->iommu_table); - else + if (dn && PCI_DN(dn)) { + set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); + iommu_add_device(&dev->dev); + } else printk(KERN_WARNING "iommu: Device %s has no iommu table\n", pci_name(dev)); } @@ -1115,7 +1115,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) pr_debug(" found DMA window, table: %p\n", pci->iommu_table); } - set_iommu_table_base_and_group(&dev->dev, pci->iommu_table); + set_iommu_table_base(&dev->dev, pci->iommu_table); + iommu_add_device(&dev->dev); } static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) -- cgit v1.2.3 From c5773822c000bafd462ad02db1d61459acef0d52 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:34:55 +1000 Subject: powerpc/powernv/ioda: Clean up IOMMU group registration The existing code has 3 calls to iommu_register_group() and all 3 branches actually cover all possible cases. This replaces 3 calls with one and moves the registration earlier; the latter will make more sense when we add TCE table sharing. Signed-off-by: Alexey Kardashevskiy Reviewed-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index fd594b28fce1..2b829eb07f87 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1785,6 +1785,9 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, if (WARN_ON(pe->tce32_seg >= 0)) return; + tbl = pe->tce32_table; + iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); + /* Grab a 32-bit TCE table */ pe->tce32_seg = base; pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", @@ -1819,7 +1822,6 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, } /* Setup linux iommu table */ - tbl = pe->tce32_table; pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs, base << 28, IOMMU_PAGE_SHIFT_4K); @@ -1841,8 +1843,6 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, iommu_init_table(tbl, phb->hose->node); if (pe->flags & PNV_IODA_PE_DEV) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); /* * Setting table base here only for carrying iommu_group * further down to let iommu_add_device() do the job. @@ -1850,14 +1850,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, */ set_iommu_table_base(&pe->pdev->dev, tbl); iommu_add_device(&pe->pdev->dev); - } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); + } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) pnv_ioda_setup_bus_dma(pe, pe->pbus); - } else if (pe->flags & PNV_IODA_PE_VF) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); - } return; fail: @@ -1924,6 +1918,9 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, if (WARN_ON(pe->tce32_seg >= 0)) return; + tbl = pe->tce32_table; + iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); + /* The PE will reserve all possible 32-bits space */ pe->tce32_seg = 0; end = (1 << ilog2(phb->ioda.m32_pci_base)); @@ -1955,7 +1952,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, } /* Setup linux iommu table */ - tbl = pe->tce32_table; pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, IOMMU_PAGE_SHIFT_4K); @@ -1975,8 +1971,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, iommu_init_table(tbl, phb->hose->node); if (pe->flags & PNV_IODA_PE_DEV) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); /* * Setting table base here only for carrying iommu_group * further down to let iommu_add_device() do the job. @@ -1984,14 +1978,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, */ set_iommu_table_base(&pe->pdev->dev, tbl); iommu_add_device(&pe->pdev->dev); - } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); + } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) pnv_ioda_setup_bus_dma(pe, pe->pbus); - } else if (pe->flags & PNV_IODA_PE_VF) { - iommu_register_group(tbl, phb->hose->global_number, - pe->pe_number); - } /* Also create a bypass window */ if (!pnv_iommu_bypass_disabled) -- cgit v1.2.3 From ac9a58891a965216e9cb549a0a2012b97e3abcce Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:34:56 +1000 Subject: powerpc/iommu: Put IOMMU group explicitly So far an iommu_table lifetime was the same as PE. Dynamic DMA windows will change this and iommu_free_table() will not always require the group to be released. This moves iommu_group_put() out of iommu_free_table(). This adds a iommu_pseries_free_table() helper which does iommu_group_put() and iommu_free_table(). Later it will be changed to receive a table_group and we will have to change less lines then. This should cause no behavioural change. Signed-off-by: Alexey Kardashevskiy Reviewed-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/iommu.c | 7 ------- arch/powerpc/platforms/powernv/pci-ioda.c | 5 +++++ arch/powerpc/platforms/pseries/iommu.c | 16 +++++++++++++++- 3 files changed, 20 insertions(+), 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index b054f33ab1fb..3d47eb3e15dd 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -726,13 +726,6 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) if (tbl->it_offset == 0) clear_bit(0, tbl->it_map); -#ifdef CONFIG_IOMMU_API - if (tbl->it_group) { - iommu_group_put(tbl->it_group); - BUG_ON(tbl->it_group); - } -#endif - /* verify that table contains no entries */ if (!bitmap_empty(tbl->it_map, tbl->it_size)) pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2b829eb07f87..8070dc9921f2 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -1310,6 +1311,10 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe if (rc) pe_warn(pe, "OPAL error %ld release DMA window\n", rc); + if (tbl->it_group) { + iommu_group_put(tbl->it_group); + BUG_ON(tbl->it_group); + } iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); free_pages(addr, get_order(TCE32_TABLE_SIZE)); pe->tce32_table = NULL; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 05ab06dccb45..fe5117bac29a 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,18 @@ #include "pseries.h" +static void iommu_pseries_free_table(struct iommu_table *tbl, + const char *node_name) +{ +#ifdef CONFIG_IOMMU_API + if (tbl->it_group) { + iommu_group_put(tbl->it_group); + BUG_ON(tbl->it_group); + } +#endif + iommu_free_table(tbl, node_name); +} + static void tce_invalidate_pSeries_sw(struct iommu_table *tbl, __be64 *startp, __be64 *endp) { @@ -1271,7 +1284,8 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti */ remove_ddw(np, false); if (pci && pci->iommu_table) - iommu_free_table(pci->iommu_table, np->full_name); + iommu_pseries_free_table(pci->iommu_table, + np->full_name); spin_lock(&direct_window_list_lock); list_for_each_entry(window, &direct_window_list, list) { -- cgit v1.2.3 From 8aca92d82d9f259e0b6b0c81e5eb9baaf471e6ad Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:34:57 +1000 Subject: powerpc/iommu: Always release iommu_table in iommu_free_table() At the moment iommu_free_table() only releases memory if the table was initialized for the platform code use, i.e. it had it_map initialized (which purpose is to track DMA memory space use). With dynamic DMA windows, we will need to be able to release iommu_table even if it was used for VFIO in which case it_map is NULL so does the patch. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 3d47eb3e15dd..73eb39a7a9e1 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -713,9 +713,11 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name) unsigned long bitmap_sz; unsigned int order; - if (!tbl || !tbl->it_map) { - printk(KERN_ERR "%s: expected TCE map for %s\n", __func__, - node_name); + if (!tbl) + return; + + if (!tbl->it_map) { + kfree(tbl); return; } -- cgit v1.2.3 From 9b14a1ff8657d3ee844f8987482bc367a716848c Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:34:58 +1000 Subject: vfio: powerpc/spapr: Move page pinning from arch code to VFIO IOMMU driver This moves page pinning (get_user_pages_fast()/put_page()) code out of the platform IOMMU code and puts it to VFIO IOMMU driver where it belongs to as the platform code does not deal with page pinning. This makes iommu_take_ownership()/iommu_release_ownership() deal with the IOMMU table bitmap only. This removes page unpinning from iommu_take_ownership() as the actual TCE table might contain garbage and doing put_page() on it is undefined behaviour. Besides the last part, the rest of the patch is mechanical. Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 4 -- arch/powerpc/kernel/iommu.c | 55 ------------------------- drivers/vfio/vfio_iommu_spapr_tce.c | 80 +++++++++++++++++++++++++++++++------ 3 files changed, 67 insertions(+), 72 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 8353c865db6f..e94a5e3c5a2f 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -194,10 +194,6 @@ extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, unsigned long hwaddr, enum dma_data_direction direction); extern unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry); -extern int iommu_clear_tces_and_put_pages(struct iommu_table *tbl, - unsigned long entry, unsigned long pages); -extern int iommu_put_tce_user_mode(struct iommu_table *tbl, - unsigned long entry, unsigned long tce); extern void iommu_flush_tce(struct iommu_table *tbl); extern int iommu_take_ownership(struct iommu_table *tbl); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 73eb39a7a9e1..0019c80aa81d 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -986,30 +986,6 @@ unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry) } EXPORT_SYMBOL_GPL(iommu_clear_tce); -int iommu_clear_tces_and_put_pages(struct iommu_table *tbl, - unsigned long entry, unsigned long pages) -{ - unsigned long oldtce; - struct page *page; - - for ( ; pages; --pages, ++entry) { - oldtce = iommu_clear_tce(tbl, entry); - if (!oldtce) - continue; - - page = pfn_to_page(oldtce >> PAGE_SHIFT); - WARN_ON(!page); - if (page) { - if (oldtce & TCE_PCI_WRITE) - SetPageDirty(page); - put_page(page); - } - } - - return 0; -} -EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages); - /* * hwaddr is a kernel virtual address here (0xc... bazillion), * tce_build converts it to a physical address. @@ -1039,35 +1015,6 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, } EXPORT_SYMBOL_GPL(iommu_tce_build); -int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry, - unsigned long tce) -{ - int ret; - struct page *page = NULL; - unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; - enum dma_data_direction direction = iommu_tce_direction(tce); - - ret = get_user_pages_fast(tce & PAGE_MASK, 1, - direction != DMA_TO_DEVICE, &page); - if (unlikely(ret != 1)) { - /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n", - tce, entry << tbl->it_page_shift, ret); */ - return -EFAULT; - } - hwaddr = (unsigned long) page_address(page) + offset; - - ret = iommu_tce_build(tbl, entry, hwaddr, direction); - if (ret) - put_page(page); - - if (ret < 0) - pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n", - __func__, entry << tbl->it_page_shift, tce, ret); - - return ret; -} -EXPORT_SYMBOL_GPL(iommu_put_tce_user_mode); - int iommu_take_ownership(struct iommu_table *tbl) { unsigned long sz = (tbl->it_size + 7) >> 3; @@ -1081,7 +1028,6 @@ int iommu_take_ownership(struct iommu_table *tbl) } memset(tbl->it_map, 0xff, sz); - iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size); /* * Disable iommu bypass, otherwise the user can DMA to all of @@ -1099,7 +1045,6 @@ void iommu_release_ownership(struct iommu_table *tbl) { unsigned long sz = (tbl->it_size + 7) >> 3; - iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size); memset(tbl->it_map, 0, sz); /* Restore bit#0 set by iommu_init_table() */ diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 730b4ef3e0cc..b95fa2b64680 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -147,6 +147,67 @@ static void tce_iommu_release(void *iommu_data) kfree(container); } +static int tce_iommu_clear(struct tce_container *container, + struct iommu_table *tbl, + unsigned long entry, unsigned long pages) +{ + unsigned long oldtce; + struct page *page; + + for ( ; pages; --pages, ++entry) { + oldtce = iommu_clear_tce(tbl, entry); + if (!oldtce) + continue; + + page = pfn_to_page(oldtce >> PAGE_SHIFT); + WARN_ON(!page); + if (page) { + if (oldtce & TCE_PCI_WRITE) + SetPageDirty(page); + put_page(page); + } + } + + return 0; +} + +static long tce_iommu_build(struct tce_container *container, + struct iommu_table *tbl, + unsigned long entry, unsigned long tce, unsigned long pages) +{ + long i, ret = 0; + struct page *page = NULL; + unsigned long hva; + enum dma_data_direction direction = iommu_tce_direction(tce); + + for (i = 0; i < pages; ++i) { + unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; + + ret = get_user_pages_fast(tce & PAGE_MASK, 1, + direction != DMA_TO_DEVICE, &page); + if (unlikely(ret != 1)) { + ret = -EFAULT; + break; + } + hva = (unsigned long) page_address(page) + offset; + + ret = iommu_tce_build(tbl, entry + i, hva, direction); + if (ret) { + put_page(page); + pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", + __func__, entry << tbl->it_page_shift, + tce, ret); + break; + } + tce += IOMMU_PAGE_SIZE_4K; + } + + if (ret) + tce_iommu_clear(container, tbl, entry, i); + + return ret; +} + static long tce_iommu_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -195,7 +256,7 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_IOMMU_MAP_DMA: { struct vfio_iommu_type1_dma_map param; struct iommu_table *tbl = container->tbl; - unsigned long tce, i; + unsigned long tce; if (!tbl) return -ENXIO; @@ -229,17 +290,9 @@ static long tce_iommu_ioctl(void *iommu_data, if (ret) return ret; - for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT_4K); ++i) { - ret = iommu_put_tce_user_mode(tbl, - (param.iova >> IOMMU_PAGE_SHIFT_4K) + i, - tce); - if (ret) - break; - tce += IOMMU_PAGE_SIZE_4K; - } - if (ret) - iommu_clear_tces_and_put_pages(tbl, - param.iova >> IOMMU_PAGE_SHIFT_4K, i); + ret = tce_iommu_build(container, tbl, + param.iova >> IOMMU_PAGE_SHIFT_4K, + tce, param.size >> IOMMU_PAGE_SHIFT_4K); iommu_flush_tce(tbl); @@ -273,7 +326,7 @@ static long tce_iommu_ioctl(void *iommu_data, if (ret) return ret; - ret = iommu_clear_tces_and_put_pages(tbl, + ret = tce_iommu_clear(container, tbl, param.iova >> IOMMU_PAGE_SHIFT_4K, param.size >> IOMMU_PAGE_SHIFT_4K); iommu_flush_tce(tbl); @@ -357,6 +410,7 @@ static void tce_iommu_detach_group(void *iommu_data, /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n", iommu_group_id(iommu_group), iommu_group); */ container->tbl = NULL; + tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); iommu_release_ownership(tbl); } mutex_unlock(&container->lock); -- cgit v1.2.3 From 10b35b2b7485c342334a48cf199063eed8b8748e Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:05 +1000 Subject: powerpc/powernv: Do not set "read" flag if direction==DMA_NONE Normally a bitmap from the iommu_table is used to track what TCE entry is in use. Since we are going to use iommu_table without its locks and do xchg() instead, it becomes essential not to put bits which are not implied in the direction flag as the old TCE value (more precisely - the permission bits) will be used to decide whether to put the page or not. This adds iommu_direction_to_tce_perm() (its counterpart is there already) and uses it for powernv's pnv_tce_build(). Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 1 + arch/powerpc/kernel/iommu.c | 15 +++++++++++++++ arch/powerpc/platforms/powernv/pci.c | 7 +------ 3 files changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index e94a5e3c5a2f..d91bd69d3196 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -200,6 +200,7 @@ extern int iommu_take_ownership(struct iommu_table *tbl); extern void iommu_release_ownership(struct iommu_table *tbl); extern enum dma_data_direction iommu_tce_direction(unsigned long tce); +extern unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir); #endif /* __KERNEL__ */ #endif /* _ASM_IOMMU_H */ diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0019c80aa81d..ac2f959adf9a 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -866,6 +866,21 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, } } +unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir) +{ + switch (dir) { + case DMA_BIDIRECTIONAL: + return TCE_PCI_READ | TCE_PCI_WRITE; + case DMA_FROM_DEVICE: + return TCE_PCI_WRITE; + case DMA_TO_DEVICE: + return TCE_PCI_READ; + default: + return 0; + } +} +EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm); + #ifdef CONFIG_IOMMU_API /* * SPAPR TCE API diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 3c35487e9778..1477422b907a 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -576,15 +576,10 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs, bool rm) { - u64 proto_tce; + u64 proto_tce = iommu_direction_to_tce_perm(direction); __be64 *tcep, *tces; u64 rpn; - proto_tce = TCE_PCI_READ; // Read allowed - - if (direction != DMA_TO_DEVICE) - proto_tce |= TCE_PCI_WRITE; - tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; rpn = __pa(uaddr) >> tbl->it_page_shift; -- cgit v1.2.3 From da004c3600f52e4f05017f60970e5010978006bc Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:06 +1000 Subject: powerpc/iommu: Move tce_xxx callbacks from ppc_md to iommu_table This adds a iommu_table_ops struct and puts pointer to it into the iommu_table struct. This moves tce_build/tce_free/tce_get/tce_flush callbacks from ppc_md to the new struct where they really belong to. This adds the requirement for @it_ops to be initialized before calling iommu_init_table() to make sure that we do not leave any IOMMU table with iommu_table_ops uninitialized. This is not a parameter of iommu_init_table() though as there will be cases when iommu_init_table() will not be called on TCE tables, for example - VFIO. This does s/tce_build/set/, s/tce_free/clear/ and removes "tce_" redundant prefixes. This removes tce_xxx_rm handlers from ppc_md but does not add them to iommu_table_ops as this will be done later if we decide to support TCE hypercalls in real mode. This removes _vm callbacks as only virtual mode is supported by now so this also removes @rm parameter. For pSeries, this always uses tce_buildmulti_pSeriesLP/ tce_buildmulti_pSeriesLP. This changes multi callback to fall back to tce_build_pSeriesLP/tce_free_pSeriesLP if FW_FEATURE_MULTITCE is not present. The reason for this is we still have to support "multitce=off" boot parameter in disable_multitce() and we do not want to walk through all IOMMU tables in the system and replace "multi" callbacks with single ones. For powernv, this defines _ops per PHB type which are P5IOC2/IODA1/IODA2. This makes the callbacks for them public. Later patches will extend callbacks for IODA1/2. No change in behaviour is expected. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 17 +++++++++++ arch/powerpc/include/asm/machdep.h | 25 --------------- arch/powerpc/kernel/iommu.c | 46 ++++++++++++++-------------- arch/powerpc/kernel/vio.c | 5 +++ arch/powerpc/platforms/cell/iommu.c | 8 +++-- arch/powerpc/platforms/pasemi/iommu.c | 7 +++-- arch/powerpc/platforms/powernv/pci-ioda.c | 14 +++++++++ arch/powerpc/platforms/powernv/pci-p5ioc2.c | 7 +++++ arch/powerpc/platforms/powernv/pci.c | 47 +++++------------------------ arch/powerpc/platforms/powernv/pci.h | 5 +++ arch/powerpc/platforms/pseries/iommu.c | 34 ++++++++++++--------- arch/powerpc/sysdev/dart_iommu.c | 12 +++++--- 12 files changed, 116 insertions(+), 111 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index d91bd69d3196..e2a45c37dba8 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -44,6 +44,22 @@ extern int iommu_is_off; extern int iommu_force_on; +struct iommu_table_ops { + int (*set)(struct iommu_table *tbl, + long index, long npages, + unsigned long uaddr, + enum dma_data_direction direction, + struct dma_attrs *attrs); + void (*clear)(struct iommu_table *tbl, + long index, long npages); + unsigned long (*get)(struct iommu_table *tbl, long index); + void (*flush)(struct iommu_table *tbl); +}; + +/* These are used by VIO */ +extern struct iommu_table_ops iommu_table_lpar_multi_ops; +extern struct iommu_table_ops iommu_table_pseries_ops; + /* * IOMAP_MAX_ORDER defines the largest contiguous block * of dma space we can get. IOMAP_MAX_ORDER = 13 @@ -78,6 +94,7 @@ struct iommu_table { #ifdef CONFIG_IOMMU_API struct iommu_group *it_group; #endif + struct iommu_table_ops *it_ops; void (*set_bypass)(struct iommu_table *tbl, bool enable); #ifdef CONFIG_PPC_POWERNV void *data; diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 0d387d0570cd..952579f5e79a 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -65,31 +65,6 @@ struct machdep_calls { * destroyed as well */ void (*hpte_clear_all)(void); - int (*tce_build)(struct iommu_table *tbl, - long index, - long npages, - unsigned long uaddr, - enum dma_data_direction direction, - struct dma_attrs *attrs); - void (*tce_free)(struct iommu_table *tbl, - long index, - long npages); - unsigned long (*tce_get)(struct iommu_table *tbl, - long index); - void (*tce_flush)(struct iommu_table *tbl); - - /* _rm versions are for real mode use only */ - int (*tce_build_rm)(struct iommu_table *tbl, - long index, - long npages, - unsigned long uaddr, - enum dma_data_direction direction, - struct dma_attrs *attrs); - void (*tce_free_rm)(struct iommu_table *tbl, - long index, - long npages); - void (*tce_flush_rm)(struct iommu_table *tbl); - void __iomem * (*ioremap)(phys_addr_t addr, unsigned long size, unsigned long flags, void *caller); void (*iounmap)(volatile void __iomem *token); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index ac2f959adf9a..c0e67e945c95 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -322,11 +322,11 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, ret = entry << tbl->it_page_shift; /* Set the return dma address */ /* Put the TCEs in the HW table */ - build_fail = ppc_md.tce_build(tbl, entry, npages, + build_fail = tbl->it_ops->set(tbl, entry, npages, (unsigned long)page & IOMMU_PAGE_MASK(tbl), direction, attrs); - /* ppc_md.tce_build() only returns non-zero for transient errors. + /* tbl->it_ops->set() only returns non-zero for transient errors. * Clean up the table bitmap in this case and return * DMA_ERROR_CODE. For all other errors the functionality is * not altered. @@ -337,8 +337,8 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, } /* Flush/invalidate TLB caches if necessary */ - if (ppc_md.tce_flush) - ppc_md.tce_flush(tbl); + if (tbl->it_ops->flush) + tbl->it_ops->flush(tbl); /* Make sure updates are seen by hardware */ mb(); @@ -408,7 +408,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, if (!iommu_free_check(tbl, dma_addr, npages)) return; - ppc_md.tce_free(tbl, entry, npages); + tbl->it_ops->clear(tbl, entry, npages); spin_lock_irqsave(&(pool->lock), flags); bitmap_clear(tbl->it_map, free_entry, npages); @@ -424,8 +424,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, * not do an mb() here on purpose, it is not needed on any of * the current platforms. */ - if (ppc_md.tce_flush) - ppc_md.tce_flush(tbl); + if (tbl->it_ops->flush) + tbl->it_ops->flush(tbl); } int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, @@ -495,7 +495,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, npages, entry, dma_addr); /* Insert into HW table */ - build_fail = ppc_md.tce_build(tbl, entry, npages, + build_fail = tbl->it_ops->set(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK(tbl), direction, attrs); if(unlikely(build_fail)) @@ -534,8 +534,8 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, } /* Flush/invalidate TLB caches if necessary */ - if (ppc_md.tce_flush) - ppc_md.tce_flush(tbl); + if (tbl->it_ops->flush) + tbl->it_ops->flush(tbl); DBG("mapped %d elements:\n", outcount); @@ -600,8 +600,8 @@ void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, * do not do an mb() here, the affected platforms do not need it * when freeing. */ - if (ppc_md.tce_flush) - ppc_md.tce_flush(tbl); + if (tbl->it_ops->flush) + tbl->it_ops->flush(tbl); } static void iommu_table_clear(struct iommu_table *tbl) @@ -613,17 +613,17 @@ static void iommu_table_clear(struct iommu_table *tbl) */ if (!is_kdump_kernel() || is_fadump_active()) { /* Clear the table in case firmware left allocations in it */ - ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size); + tbl->it_ops->clear(tbl, tbl->it_offset, tbl->it_size); return; } #ifdef CONFIG_CRASH_DUMP - if (ppc_md.tce_get) { + if (tbl->it_ops->get) { unsigned long index, tceval, tcecount = 0; /* Reserve the existing mappings left by the first kernel. */ for (index = 0; index < tbl->it_size; index++) { - tceval = ppc_md.tce_get(tbl, index + tbl->it_offset); + tceval = tbl->it_ops->get(tbl, index + tbl->it_offset); /* * Freed TCE entry contains 0x7fffffffffffffff on JS20 */ @@ -657,6 +657,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) unsigned int i; struct iommu_pool *p; + BUG_ON(!tbl->it_ops); + /* number of bytes needed for the bitmap */ sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); @@ -929,8 +931,8 @@ EXPORT_SYMBOL_GPL(iommu_tce_direction); void iommu_flush_tce(struct iommu_table *tbl) { /* Flush/invalidate TLB caches if necessary */ - if (ppc_md.tce_flush) - ppc_md.tce_flush(tbl); + if (tbl->it_ops->flush) + tbl->it_ops->flush(tbl); /* Make sure updates are seen by hardware */ mb(); @@ -941,7 +943,7 @@ int iommu_tce_clear_param_check(struct iommu_table *tbl, unsigned long ioba, unsigned long tce_value, unsigned long npages) { - /* ppc_md.tce_free() does not support any value but 0 */ + /* tbl->it_ops->clear() does not support any value but 0 */ if (tce_value) return -EINVAL; @@ -989,9 +991,9 @@ unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry) spin_lock(&(pool->lock)); - oldtce = ppc_md.tce_get(tbl, entry); + oldtce = tbl->it_ops->get(tbl, entry); if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)) - ppc_md.tce_free(tbl, entry, 1); + tbl->it_ops->clear(tbl, entry, 1); else oldtce = 0; @@ -1014,10 +1016,10 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, spin_lock(&(pool->lock)); - oldtce = ppc_md.tce_get(tbl, entry); + oldtce = tbl->it_ops->get(tbl, entry); /* Add new entry if it is not busy */ if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))) - ret = ppc_md.tce_build(tbl, entry, 1, hwaddr, direction, NULL); + ret = tbl->it_ops->set(tbl, entry, 1, hwaddr, direction, NULL); spin_unlock(&(pool->lock)); diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 5bfdab9047be..b41426c60ef6 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1196,6 +1196,11 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) tbl->it_type = TCE_VB; tbl->it_blocksize = 16; + if (firmware_has_feature(FW_FEATURE_LPAR)) + tbl->it_ops = &iommu_table_lpar_multi_ops; + else + tbl->it_ops = &iommu_table_pseries_ops; + return iommu_init_table(tbl, -1); } diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 21b502398bf3..14a582b21274 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -466,6 +466,11 @@ static inline u32 cell_iommu_get_ioid(struct device_node *np) return *ioid; } +static struct iommu_table_ops cell_iommu_ops = { + .set = tce_build_cell, + .clear = tce_free_cell +}; + static struct iommu_window * __init cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, unsigned long offset, unsigned long size, @@ -492,6 +497,7 @@ cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, window->table.it_offset = (offset >> window->table.it_page_shift) + pte_offset; window->table.it_size = size >> window->table.it_page_shift; + window->table.it_ops = &cell_iommu_ops; iommu_init_table(&window->table, iommu->nid); @@ -1201,8 +1207,6 @@ static int __init cell_iommu_init(void) /* Setup various callbacks */ cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup; ppc_md.dma_get_required_mask = cell_dma_get_required_mask; - ppc_md.tce_build = tce_build_cell; - ppc_md.tce_free = tce_free_cell; if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0) goto bail; diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index b8f567b2ea19..c929644e74a6 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -134,6 +134,10 @@ static void iobmap_free(struct iommu_table *tbl, long index, } } +static struct iommu_table_ops iommu_table_iobmap_ops = { + .set = iobmap_build, + .clear = iobmap_free +}; static void iommu_table_iobmap_setup(void) { @@ -153,6 +157,7 @@ static void iommu_table_iobmap_setup(void) * Should probably be 8 (64 bytes) */ iommu_table_iobmap.it_blocksize = 4; + iommu_table_iobmap.it_ops = &iommu_table_iobmap_ops; iommu_init_table(&iommu_table_iobmap, 0); pr_debug(" <- %s\n", __func__); } @@ -252,8 +257,6 @@ void __init iommu_init_early_pasemi(void) pasemi_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pasemi; pasemi_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pasemi; - ppc_md.tce_build = iobmap_build; - ppc_md.tce_free = iobmap_free; set_pci_dma_ops(&dma_iommu_ops); } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 8070dc9921f2..b0a14c0e7774 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1726,6 +1726,12 @@ static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, */ } +static struct iommu_table_ops pnv_ioda1_iommu_ops = { + .set = pnv_tce_build, + .clear = pnv_tce_free, + .get = pnv_tce_get, +}; + static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, struct iommu_table *tbl, __be64 *startp, __be64 *endp, bool rm) @@ -1770,6 +1776,12 @@ void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm); } +static struct iommu_table_ops pnv_ioda2_iommu_ops = { + .set = pnv_tce_build, + .clear = pnv_tce_free, + .get = pnv_tce_get, +}; + static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe, unsigned int base, unsigned int segs) @@ -1845,6 +1857,7 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, TCE_PCI_SWINV_FREE | TCE_PCI_SWINV_PAIR); } + tbl->it_ops = &pnv_ioda1_iommu_ops; iommu_init_table(tbl, phb->hose->node); if (pe->flags & PNV_IODA_PE_DEV) { @@ -1973,6 +1986,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, 8); tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); } + tbl->it_ops = &pnv_ioda2_iommu_ops; iommu_init_table(tbl, phb->hose->node); if (pe->flags & PNV_IODA_PE_DEV) { diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 61ae20aa9000..5a387a9132ff 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -83,10 +83,17 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { } #endif /* CONFIG_PCI_MSI */ +static struct iommu_table_ops pnv_p5ioc2_iommu_ops = { + .set = pnv_tce_build, + .clear = pnv_tce_free, + .get = pnv_tce_get, +}; + static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) { if (phb->p5ioc2.iommu_table.it_map == NULL) { + phb->p5ioc2.iommu_table.it_ops = &pnv_p5ioc2_iommu_ops; iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node); iommu_register_group(&phb->p5ioc2.iommu_table, pci_domain_nr(phb->hose->bus), phb->opal_id); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 1477422b907a..2793b9d576e3 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -572,9 +572,9 @@ struct pci_ops pnv_pci_ops = { .write = pnv_pci_write_config, }; -static int pnv_tce_build(struct iommu_table *tbl, long index, long npages, - unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs, bool rm) +int pnv_tce_build(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction, + struct dma_attrs *attrs) { u64 proto_tce = iommu_direction_to_tce_perm(direction); __be64 *tcep, *tces; @@ -592,22 +592,12 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages, * of flags if that becomes the case */ if (tbl->it_type & TCE_PCI_SWINV_CREATE) - pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm); + pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, false); return 0; } -static int pnv_tce_build_vm(struct iommu_table *tbl, long index, long npages, - unsigned long uaddr, - enum dma_data_direction direction, - struct dma_attrs *attrs) -{ - return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, - false); -} - -static void pnv_tce_free(struct iommu_table *tbl, long index, long npages, - bool rm) +void pnv_tce_free(struct iommu_table *tbl, long index, long npages) { __be64 *tcep, *tces; @@ -617,32 +607,14 @@ static void pnv_tce_free(struct iommu_table *tbl, long index, long npages, *(tcep++) = cpu_to_be64(0); if (tbl->it_type & TCE_PCI_SWINV_FREE) - pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm); -} - -static void pnv_tce_free_vm(struct iommu_table *tbl, long index, long npages) -{ - pnv_tce_free(tbl, index, npages, false); + pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, false); } -static unsigned long pnv_tce_get(struct iommu_table *tbl, long index) +unsigned long pnv_tce_get(struct iommu_table *tbl, long index) { return ((u64 *)tbl->it_base)[index - tbl->it_offset]; } -static int pnv_tce_build_rm(struct iommu_table *tbl, long index, long npages, - unsigned long uaddr, - enum dma_data_direction direction, - struct dma_attrs *attrs) -{ - return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, true); -} - -static void pnv_tce_free_rm(struct iommu_table *tbl, long index, long npages) -{ - pnv_tce_free(tbl, index, npages, true); -} - void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned page_shift) @@ -744,11 +716,6 @@ void __init pnv_pci_init(void) pci_devs_phb_init(); /* Configure IOMMU DMA hooks */ - ppc_md.tce_build = pnv_tce_build_vm; - ppc_md.tce_free = pnv_tce_free_vm; - ppc_md.tce_build_rm = pnv_tce_build_rm; - ppc_md.tce_free_rm = pnv_tce_free_rm; - ppc_md.tce_get = pnv_tce_get; set_pci_dma_ops(&dma_iommu_ops); } diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 714ee3c19854..45a756007ec3 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -197,6 +197,11 @@ struct pnv_phb { }; extern struct pci_ops pnv_pci_ops; +extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction, + struct dma_attrs *attrs); +extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages); +extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index); void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, unsigned char *log_buff); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index fe5117bac29a..33f3a855bfd9 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -206,7 +206,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, int ret = 0; unsigned long flags; - if (npages == 1) { + if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) { return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); } @@ -298,6 +298,9 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n { u64 rc; + if (!firmware_has_feature(FW_FEATURE_MULTITCE)) + return tce_free_pSeriesLP(tbl, tcenum, npages); + rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages); if (rc && printk_ratelimit()) { @@ -473,7 +476,6 @@ static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn, return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg); } - #ifdef CONFIG_PCI static void iommu_table_setparms(struct pci_controller *phb, struct device_node *dn, @@ -559,6 +561,12 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb, tbl->it_size = size >> tbl->it_page_shift; } +struct iommu_table_ops iommu_table_pseries_ops = { + .set = tce_build_pSeries, + .clear = tce_free_pSeries, + .get = tce_get_pseries +}; + static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) { struct device_node *dn; @@ -627,6 +635,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) pci->phb->node); iommu_table_setparms(pci->phb, dn, tbl); + tbl->it_ops = &iommu_table_pseries_ops; pci->iommu_table = iommu_init_table(tbl, pci->phb->node); iommu_register_group(tbl, pci_domain_nr(bus), 0); @@ -638,6 +647,11 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size); } +struct iommu_table_ops iommu_table_lpar_multi_ops = { + .set = tce_buildmulti_pSeriesLP, + .clear = tce_freemulti_pSeriesLP, + .get = tce_get_pSeriesLP +}; static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) { @@ -672,6 +686,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, ppci->phb->node); iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); + tbl->it_ops = &iommu_table_lpar_multi_ops; ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); iommu_register_group(tbl, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->iommu_table); @@ -699,6 +714,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, phb->node); iommu_table_setparms(phb, dn, tbl); + tbl->it_ops = &iommu_table_pseries_ops; PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); iommu_register_group(tbl, pci_domain_nr(phb->bus), 0); set_iommu_table_base(&dev->dev, tbl); @@ -1121,6 +1137,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, pci->phb->node); iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); + tbl->it_ops = &iommu_table_lpar_multi_ops; pci->iommu_table = iommu_init_table(tbl, pci->phb->node); iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0); pr_debug(" created table: %p\n", pci->iommu_table); @@ -1315,22 +1332,11 @@ void iommu_init_early_pSeries(void) return; if (firmware_has_feature(FW_FEATURE_LPAR)) { - if (firmware_has_feature(FW_FEATURE_MULTITCE)) { - ppc_md.tce_build = tce_buildmulti_pSeriesLP; - ppc_md.tce_free = tce_freemulti_pSeriesLP; - } else { - ppc_md.tce_build = tce_build_pSeriesLP; - ppc_md.tce_free = tce_free_pSeriesLP; - } - ppc_md.tce_get = tce_get_pSeriesLP; pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP; pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP; ppc_md.dma_set_mask = dma_set_mask_pSeriesLP; ppc_md.dma_get_required_mask = dma_get_required_mask_pSeriesLP; } else { - ppc_md.tce_build = tce_build_pSeries; - ppc_md.tce_free = tce_free_pSeries; - ppc_md.tce_get = tce_get_pseries; pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries; pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries; } @@ -1348,8 +1354,6 @@ static int __init disable_multitce(char *str) firmware_has_feature(FW_FEATURE_LPAR) && firmware_has_feature(FW_FEATURE_MULTITCE)) { printk(KERN_INFO "Disabling MULTITCE firmware feature\n"); - ppc_md.tce_build = tce_build_pSeriesLP; - ppc_md.tce_free = tce_free_pSeriesLP; powerpc_firmware_features &= ~FW_FEATURE_MULTITCE; } return 1; diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index d00a5663e312..90bcdfeedf48 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -286,6 +286,12 @@ static int __init dart_init(struct device_node *dart_node) return 0; } +static struct iommu_table_ops iommu_dart_ops = { + .set = dart_build, + .clear = dart_free, + .flush = dart_flush, +}; + static void iommu_table_dart_setup(void) { iommu_table_dart.it_busno = 0; @@ -298,6 +304,7 @@ static void iommu_table_dart_setup(void) iommu_table_dart.it_base = (unsigned long)dart_vbase; iommu_table_dart.it_index = 0; iommu_table_dart.it_blocksize = 1; + iommu_table_dart.it_ops = &iommu_dart_ops; iommu_init_table(&iommu_table_dart, -1); /* Reserve the last page of the DART to avoid possible prefetch @@ -386,11 +393,6 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) if (dart_init(dn) != 0) goto bail; - /* Setup low level TCE operations for the core IOMMU code */ - ppc_md.tce_build = dart_build; - ppc_md.tce_free = dart_free; - ppc_md.tce_flush = dart_flush; - /* Setup bypass if supported */ if (dart_is_u4) ppc_md.dma_set_mask = dart_dma_set_mask; -- cgit v1.2.3 From decbda25728ddfbb28b77749d2545028e892ca99 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:07 +1000 Subject: powerpc/powernv/ioda/ioda2: Rework TCE invalidation in tce_build()/tce_free() The pnv_pci_ioda_tce_invalidate() helper invalidates TCE cache. It is supposed to be called on IODA1/2 and not called on p5ioc2. It receives start and end host addresses of TCE table. IODA2 actually needs PCI addresses to invalidate the cache. Those can be calculated from host addresses but since we are going to implement multi-level TCE tables, calculating PCI address from a host address might get either tricky or ugly as TCE table remains flat on PCI bus but not in RAM. This moves pnv_pci_ioda_tce_invalidate() from generic pnv_tce_build/ pnt_tce_free and defines IODA1/2-specific callbacks which call generic ones and do PHB-model-specific TCE cache invalidation. P5IOC2 keeps using generic callbacks as before. This changes pnv_pci_ioda2_tce_invalidate() to receives TCE index and number of pages which are PCI addresses shifted by IOMMU page shift. No change in behaviour is expected. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 81 ++++++++++++++++++++++--------- arch/powerpc/platforms/powernv/pci.c | 17 ++----- 2 files changed, 61 insertions(+), 37 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b0a14c0e7774..f710729a4a35 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1679,18 +1679,19 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, } } -static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, - struct iommu_table *tbl, - __be64 *startp, __be64 *endp, bool rm) +static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, + unsigned long index, unsigned long npages, bool rm) { + struct pnv_ioda_pe *pe = tbl->data; __be64 __iomem *invalidate = rm ? (__be64 __iomem *)pe->tce_inval_reg_phys : (__be64 __iomem *)tbl->it_index; unsigned long start, end, inc; const unsigned shift = tbl->it_page_shift; - start = __pa(startp); - end = __pa(endp); + start = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset); + end = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset + + npages - 1); /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */ if (tbl->it_busno) { @@ -1726,16 +1727,39 @@ static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, */ } +static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index, + long npages, unsigned long uaddr, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + int ret = pnv_tce_build(tbl, index, npages, uaddr, direction, + attrs); + + if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE)) + pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false); + + return ret; +} + +static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, + long npages) +{ + pnv_tce_free(tbl, index, npages); + + if (tbl->it_type & TCE_PCI_SWINV_FREE) + pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false); +} + static struct iommu_table_ops pnv_ioda1_iommu_ops = { - .set = pnv_tce_build, - .clear = pnv_tce_free, + .set = pnv_ioda1_tce_build, + .clear = pnv_ioda1_tce_free, .get = pnv_tce_get, }; -static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, - struct iommu_table *tbl, - __be64 *startp, __be64 *endp, bool rm) +static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, + unsigned long index, unsigned long npages, bool rm) { + struct pnv_ioda_pe *pe = tbl->data; unsigned long start, end, inc; __be64 __iomem *invalidate = rm ? (__be64 __iomem *)pe->tce_inval_reg_phys : @@ -1748,10 +1772,8 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, end = start; /* Figure out the start, end and step */ - inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64)); - start |= (inc << shift); - inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64)); - end |= (inc << shift); + start |= (index << shift); + end |= ((index + npages - 1) << shift); inc = (0x1ull << shift); mb(); @@ -1764,21 +1786,32 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, } } -void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, - __be64 *startp, __be64 *endp, bool rm) +static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, + long npages, unsigned long uaddr, + enum dma_data_direction direction, + struct dma_attrs *attrs) { - struct pnv_ioda_pe *pe = tbl->data; - struct pnv_phb *phb = pe->phb; + int ret = pnv_tce_build(tbl, index, npages, uaddr, direction, + attrs); - if (phb->type == PNV_PHB_IODA1) - pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm); - else - pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm); + if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE)) + pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false); + + return ret; +} + +static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, + long npages) +{ + pnv_tce_free(tbl, index, npages); + + if (tbl->it_type & TCE_PCI_SWINV_FREE) + pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false); } static struct iommu_table_ops pnv_ioda2_iommu_ops = { - .set = pnv_tce_build, - .clear = pnv_tce_free, + .set = pnv_ioda2_tce_build, + .clear = pnv_ioda2_tce_free, .get = pnv_tce_get, }; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 2793b9d576e3..f72fc6e7d63d 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -577,37 +577,28 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages, struct dma_attrs *attrs) { u64 proto_tce = iommu_direction_to_tce_perm(direction); - __be64 *tcep, *tces; + __be64 *tcep; u64 rpn; - tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; + tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; rpn = __pa(uaddr) >> tbl->it_page_shift; while (npages--) *(tcep++) = cpu_to_be64(proto_tce | (rpn++ << tbl->it_page_shift)); - /* Some implementations won't cache invalid TCEs and thus may not - * need that flush. We'll probably turn it_type into a bit mask - * of flags if that becomes the case - */ - if (tbl->it_type & TCE_PCI_SWINV_CREATE) - pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, false); return 0; } void pnv_tce_free(struct iommu_table *tbl, long index, long npages) { - __be64 *tcep, *tces; + __be64 *tcep; - tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; + tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; while (npages--) *(tcep++) = cpu_to_be64(0); - - if (tbl->it_type & TCE_PCI_SWINV_FREE) - pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, false); } unsigned long pnv_tce_get(struct iommu_table *tbl, long index) -- cgit v1.2.3 From b348aa65297659c310943221ac1d3f4b4491ea44 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:08 +1000 Subject: powerpc/spapr: vfio: Replace iommu_table with iommu_table_group Modern IBM POWERPC systems support multiple (currently two) TCE tables per IOMMU group (a.k.a. PE). This adds a iommu_table_group container for TCE tables. Right now just one table is supported. This defines iommu_table_group struct which stores pointers to iommu_group and iommu_table(s). This replaces iommu_table with iommu_table_group where iommu_table was used to identify a group: - iommu_register_group(); - iommudata of generic iommu_group; This removes @data from iommu_table as it_table_group provides same access to pnv_ioda_pe. For IODA, instead of embedding iommu_table, the new iommu_table_group keeps pointers to those. The iommu_table structs are allocated dynamically. For P5IOC2, both iommu_table_group and iommu_table are embedded into PE struct. As there is no EEH and SRIOV support for P5IOC2, iommu_free_table() should not be called on iommu_table struct pointers so we can keep it embedded in pnv_phb::p5ioc2. For pSeries, this replaces multiple calls of kzalloc_node() with a new iommu_pseries_alloc_group() helper and stores the table group struct pointer into the pci_dn struct. For release, a iommu_table_free_group() helper is added. This moves iommu_table struct allocation from SR-IOV code to the generic DMA initialization code in pnv_pci_ioda_setup_dma_pe and pnv_pci_ioda2_setup_dma_pe as this is where DMA is actually initialized. This change is here because those lines had to be changed anyway. This should cause no behavioural change. Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 19 ++--- arch/powerpc/include/asm/pci-bridge.h | 2 +- arch/powerpc/kernel/iommu.c | 17 ++--- arch/powerpc/platforms/powernv/pci-ioda.c | 55 +++++++------- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 18 +++-- arch/powerpc/platforms/powernv/pci.h | 3 +- arch/powerpc/platforms/pseries/iommu.c | 107 +++++++++++++++++++--------- drivers/vfio/vfio_iommu_spapr_tce.c | 23 +++--- 8 files changed, 152 insertions(+), 92 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index e2a45c37dba8..5a7267f47bdb 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -91,14 +91,9 @@ struct iommu_table { struct iommu_pool pools[IOMMU_NR_POOLS]; unsigned long *it_map; /* A simple allocation bitmap for now */ unsigned long it_page_shift;/* table iommu page size */ -#ifdef CONFIG_IOMMU_API - struct iommu_group *it_group; -#endif + struct iommu_table_group *it_table_group; struct iommu_table_ops *it_ops; void (*set_bypass)(struct iommu_table *tbl, bool enable); -#ifdef CONFIG_PPC_POWERNV - void *data; -#endif }; /* Pure 2^n version of get_order */ @@ -129,14 +124,22 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); */ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, int nid); +#define IOMMU_TABLE_GROUP_MAX_TABLES 1 + +struct iommu_table_group { + struct iommu_group *group; + struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; +}; + #ifdef CONFIG_IOMMU_API -extern void iommu_register_group(struct iommu_table *tbl, + +extern void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, unsigned long pe_num); extern int iommu_add_device(struct device *dev); extern void iommu_del_device(struct device *dev); extern int __init tce_iommu_bus_notifier_init(void); #else -static inline void iommu_register_group(struct iommu_table *tbl, +static inline void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, unsigned long pe_num) { diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index b76cd5682a8c..712add590445 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -199,7 +199,7 @@ struct pci_dn { struct pci_dn *parent; struct pci_controller *phb; /* for pci devices */ - struct iommu_table *iommu_table; /* for phb's or bridges */ + struct iommu_table_group *table_group; /* for phb's or bridges */ struct device_node *node; /* back-pointer to the device_node */ int pci_ext_config_space; /* for pci devices */ diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index c0e67e945c95..719f0485ec79 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -889,11 +889,12 @@ EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm); */ static void group_release(void *iommu_data) { - struct iommu_table *tbl = iommu_data; - tbl->it_group = NULL; + struct iommu_table_group *table_group = iommu_data; + + table_group->group = NULL; } -void iommu_register_group(struct iommu_table *tbl, +void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, unsigned long pe_num) { struct iommu_group *grp; @@ -905,8 +906,8 @@ void iommu_register_group(struct iommu_table *tbl, PTR_ERR(grp)); return; } - tbl->it_group = grp; - iommu_group_set_iommudata(grp, tbl, group_release); + table_group->group = grp; + iommu_group_set_iommudata(grp, table_group, group_release); name = kasprintf(GFP_KERNEL, "domain%d-pe%lx", pci_domain_number, pe_num); if (!name) @@ -1094,7 +1095,7 @@ int iommu_add_device(struct device *dev) } tbl = get_iommu_table_base(dev); - if (!tbl || !tbl->it_group) { + if (!tbl || !tbl->it_table_group || !tbl->it_table_group->group) { pr_debug("%s: Skipping device %s with no tbl\n", __func__, dev_name(dev)); return 0; @@ -1102,7 +1103,7 @@ int iommu_add_device(struct device *dev) pr_debug("%s: Adding %s to iommu group %d\n", __func__, dev_name(dev), - iommu_group_id(tbl->it_group)); + iommu_group_id(tbl->it_table_group->group)); if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) { pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n", @@ -1111,7 +1112,7 @@ int iommu_add_device(struct device *dev) return -EINVAL; } - return iommu_group_add_device(tbl->it_group, dev); + return iommu_group_add_device(tbl->it_table_group->group, dev); } EXPORT_SYMBOL_GPL(iommu_add_device); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index f710729a4a35..279dadf43d5a 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1087,10 +1087,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) return; } - pe->tce32_table = kzalloc_node(sizeof(struct iommu_table), - GFP_KERNEL, hose->node); - pe->tce32_table->data = pe; - /* Associate it with all child devices */ pnv_ioda_setup_same_PE(bus, pe); @@ -1292,11 +1288,12 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe struct iommu_table *tbl; unsigned long addr; int64_t rc; + struct iommu_table_group *table_group; bus = dev->bus; hose = pci_bus_to_host(bus); phb = hose->private_data; - tbl = pe->tce32_table; + tbl = pe->table_group.tables[0]; addr = tbl->it_base; opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, @@ -1311,13 +1308,14 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe if (rc) pe_warn(pe, "OPAL error %ld release DMA window\n", rc); - if (tbl->it_group) { - iommu_group_put(tbl->it_group); - BUG_ON(tbl->it_group); + table_group = tbl->it_table_group; + if (table_group->group) { + iommu_group_put(table_group->group); + BUG_ON(table_group->group); } iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); free_pages(addr, get_order(TCE32_TABLE_SIZE)); - pe->tce32_table = NULL; + pe->table_group.tables[0] = NULL; } static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs) @@ -1465,10 +1463,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) continue; } - pe->tce32_table = kzalloc_node(sizeof(struct iommu_table), - GFP_KERNEL, hose->node); - pe->tce32_table->data = pe; - /* Put PE to the list */ mutex_lock(&phb->ioda.pe_list_mutex); list_add_tail(&pe->list, &phb->ioda.pe_list); @@ -1603,7 +1597,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev pe = &phb->ioda.pe_array[pdn->pe_number]; WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); - set_iommu_table_base(&pdev->dev, pe->tce32_table); + set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]); /* * Note: iommu_add_device() will fail here as * for physical PE: the device is already added by now; @@ -1637,7 +1631,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) } else { dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); set_dma_ops(&pdev->dev, &dma_iommu_ops); - set_iommu_table_base(&pdev->dev, pe->tce32_table); + set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]); } *pdev->dev.dma_mask = dma_mask; return 0; @@ -1671,7 +1665,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { - set_iommu_table_base(&dev->dev, pe->tce32_table); + set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); iommu_add_device(&dev->dev); if (dev->subordinate) @@ -1682,7 +1676,8 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { - struct pnv_ioda_pe *pe = tbl->data; + struct pnv_ioda_pe *pe = container_of(tbl->it_table_group, + struct pnv_ioda_pe, table_group); __be64 __iomem *invalidate = rm ? (__be64 __iomem *)pe->tce_inval_reg_phys : (__be64 __iomem *)tbl->it_index; @@ -1759,7 +1754,8 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { - struct pnv_ioda_pe *pe = tbl->data; + struct pnv_ioda_pe *pe = container_of(tbl->it_table_group, + struct pnv_ioda_pe, table_group); unsigned long start, end, inc; __be64 __iomem *invalidate = rm ? (__be64 __iomem *)pe->tce_inval_reg_phys : @@ -1835,8 +1831,12 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, if (WARN_ON(pe->tce32_seg >= 0)) return; - tbl = pe->tce32_table; - iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); + tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, + phb->hose->node); + tbl->it_table_group = &pe->table_group; + pe->table_group.tables[0] = tbl; + iommu_register_group(&pe->table_group, phb->hose->global_number, + pe->pe_number); /* Grab a 32-bit TCE table */ pe->tce32_seg = base; @@ -1915,7 +1915,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) { - struct pnv_ioda_pe *pe = tbl->data; + struct pnv_ioda_pe *pe = container_of(tbl->it_table_group, + struct pnv_ioda_pe, table_group); uint16_t window_id = (pe->pe_number << 1 ) + 1; int64_t rc; @@ -1949,10 +1950,10 @@ static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb, pe->tce_bypass_base = 1ull << 59; /* Install set_bypass callback for VFIO */ - pe->tce32_table->set_bypass = pnv_pci_ioda2_set_bypass; + pe->table_group.tables[0]->set_bypass = pnv_pci_ioda2_set_bypass; /* Enable bypass by default */ - pnv_pci_ioda2_set_bypass(pe->tce32_table, true); + pnv_pci_ioda2_set_bypass(pe->table_group.tables[0], true); } static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, @@ -1969,8 +1970,12 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, if (WARN_ON(pe->tce32_seg >= 0)) return; - tbl = pe->tce32_table; - iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); + tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, + phb->hose->node); + tbl->it_table_group = &pe->table_group; + pe->table_group.tables[0] = tbl; + iommu_register_group(&pe->table_group, phb->hose->global_number, + pe->pe_number); /* The PE will reserve all possible 32-bits space */ pe->tce32_seg = 0; diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 5a387a9132ff..6e00104093d6 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -92,14 +92,16 @@ static struct iommu_table_ops pnv_p5ioc2_iommu_ops = { static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) { - if (phb->p5ioc2.iommu_table.it_map == NULL) { - phb->p5ioc2.iommu_table.it_ops = &pnv_p5ioc2_iommu_ops; - iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node); - iommu_register_group(&phb->p5ioc2.iommu_table, + struct iommu_table *tbl = phb->p5ioc2.table_group.tables[0]; + + if (!tbl->it_map) { + tbl->it_ops = &pnv_p5ioc2_iommu_ops; + iommu_init_table(tbl, phb->hose->node); + iommu_register_group(&phb->p5ioc2.table_group, pci_domain_nr(phb->hose->bus), phb->opal_id); } - set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table); + set_iommu_table_base(&pdev->dev, tbl); iommu_add_device(&pdev->dev); } @@ -188,6 +190,12 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table, tce_mem, tce_size, 0, IOMMU_PAGE_SHIFT_4K); + /* + * We do not allocate iommu_table as we do not support + * hotplug or SRIOV on P5IOC2 and therefore iommu_free_table() + * should not be called for phb->p5ioc2.table_group.tables[0] ever. + */ + phb->p5ioc2.table_group.tables[0] = &phb->p5ioc2.iommu_table; } void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 45a756007ec3..d8ba34d7af1b 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -57,7 +57,7 @@ struct pnv_ioda_pe { /* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */ int tce32_seg; int tce32_segcount; - struct iommu_table *tce32_table; + struct iommu_table_group table_group; phys_addr_t tce_inval_reg_phys; /* 64-bit TCE bypass region */ @@ -120,6 +120,7 @@ struct pnv_phb { union { struct { struct iommu_table iommu_table; + struct iommu_table_group table_group; } p5ioc2; struct { diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 33f3a855bfd9..307d704ffaa3 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -52,16 +52,51 @@ #include "pseries.h" -static void iommu_pseries_free_table(struct iommu_table *tbl, +static struct iommu_table_group *iommu_pseries_alloc_group(int node) +{ + struct iommu_table_group *table_group = NULL; + struct iommu_table *tbl = NULL; + + table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL, + node); + if (!table_group) + goto fail_exit; + + tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node); + if (!tbl) + goto fail_exit; + + tbl->it_table_group = table_group; + table_group->tables[0] = tbl; + + return table_group; + +fail_exit: + kfree(table_group); + kfree(tbl); + + return NULL; +} + +static void iommu_pseries_free_group(struct iommu_table_group *table_group, const char *node_name) { + struct iommu_table *tbl; + + if (!table_group) + return; + #ifdef CONFIG_IOMMU_API - if (tbl->it_group) { - iommu_group_put(tbl->it_group); - BUG_ON(tbl->it_group); + if (table_group->group) { + iommu_group_put(table_group->group); + BUG_ON(table_group->group); } #endif + + tbl = table_group->tables[0]; iommu_free_table(tbl, node_name); + + kfree(table_group); } static void tce_invalidate_pSeries_sw(struct iommu_table *tbl, @@ -631,13 +666,13 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) pci->phb->dma_window_size = 0x8000000ul; pci->phb->dma_window_base_cur = 0x8000000ul; - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - pci->phb->node); + pci->table_group = iommu_pseries_alloc_group(pci->phb->node); + tbl = pci->table_group->tables[0]; iommu_table_setparms(pci->phb, dn, tbl); tbl->it_ops = &iommu_table_pseries_ops; - pci->iommu_table = iommu_init_table(tbl, pci->phb->node); - iommu_register_group(tbl, pci_domain_nr(bus), 0); + iommu_init_table(tbl, pci->phb->node); + iommu_register_group(pci->table_group, pci_domain_nr(bus), 0); /* Divide the rest (1.75GB) among the children */ pci->phb->dma_window_size = 0x80000000ul; @@ -680,16 +715,17 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) ppci = PCI_DN(pdn); pr_debug(" parent is %s, iommu_table: 0x%p\n", - pdn->full_name, ppci->iommu_table); + pdn->full_name, ppci->table_group); - if (!ppci->iommu_table) { - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - ppci->phb->node); + if (!ppci->table_group) { + ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); + tbl = ppci->table_group->tables[0]; iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); tbl->it_ops = &iommu_table_lpar_multi_ops; - ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); - iommu_register_group(tbl, pci_domain_nr(bus), 0); - pr_debug(" created table: %p\n", ppci->iommu_table); + iommu_init_table(tbl, ppci->phb->node); + iommu_register_group(ppci->table_group, + pci_domain_nr(bus), 0); + pr_debug(" created table: %p\n", ppci->table_group); } } @@ -711,12 +747,13 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) struct pci_controller *phb = PCI_DN(dn)->phb; pr_debug(" --> first child, no bridge. Allocating iommu table.\n"); - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - phb->node); + PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node); + tbl = PCI_DN(dn)->table_group->tables[0]; iommu_table_setparms(phb, dn, tbl); tbl->it_ops = &iommu_table_pseries_ops; - PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); - iommu_register_group(tbl, pci_domain_nr(phb->bus), 0); + iommu_init_table(tbl, phb->node); + iommu_register_group(PCI_DN(dn)->table_group, + pci_domain_nr(phb->bus), 0); set_iommu_table_base(&dev->dev, tbl); iommu_add_device(&dev->dev); return; @@ -726,11 +763,12 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) * an already allocated iommu table is found and use that. */ - while (dn && PCI_DN(dn) && PCI_DN(dn)->iommu_table == NULL) + while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL) dn = dn->parent; if (dn && PCI_DN(dn)) { - set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); + set_iommu_table_base(&dev->dev, + PCI_DN(dn)->table_group->tables[0]); iommu_add_device(&dev->dev); } else printk(KERN_WARNING "iommu: Device %s has no iommu table\n", @@ -1117,7 +1155,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) dn = pci_device_to_OF_node(dev); pr_debug(" node is %s\n", dn->full_name); - for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table; + for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; pdn = pdn->parent) { dma_window = of_get_property(pdn, "ibm,dma-window", NULL); if (dma_window) @@ -1133,19 +1171,20 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) pr_debug(" parent is %s\n", pdn->full_name); pci = PCI_DN(pdn); - if (!pci->iommu_table) { - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - pci->phb->node); + if (!pci->table_group) { + pci->table_group = iommu_pseries_alloc_group(pci->phb->node); + tbl = pci->table_group->tables[0]; iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); tbl->it_ops = &iommu_table_lpar_multi_ops; - pci->iommu_table = iommu_init_table(tbl, pci->phb->node); - iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0); - pr_debug(" created table: %p\n", pci->iommu_table); + iommu_init_table(tbl, pci->phb->node); + iommu_register_group(pci->table_group, + pci_domain_nr(pci->phb->bus), 0); + pr_debug(" created table: %p\n", pci->table_group); } else { - pr_debug(" found DMA window, table: %p\n", pci->iommu_table); + pr_debug(" found DMA window, table: %p\n", pci->table_group); } - set_iommu_table_base(&dev->dev, pci->iommu_table); + set_iommu_table_base(&dev->dev, pci->table_group->tables[0]); iommu_add_device(&dev->dev); } @@ -1176,7 +1215,7 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) * search upwards in the tree until we either hit a dma-window * property, OR find a parent with a table already allocated. */ - for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table; + for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; pdn = pdn->parent) { dma_window = of_get_property(pdn, "ibm,dma-window", NULL); if (dma_window) @@ -1220,7 +1259,7 @@ static u64 dma_get_required_mask_pSeriesLP(struct device *dev) dn = pci_device_to_OF_node(pdev); /* search upwards for ibm,dma-window */ - for (; dn && PCI_DN(dn) && !PCI_DN(dn)->iommu_table; + for (; dn && PCI_DN(dn) && !PCI_DN(dn)->table_group; dn = dn->parent) if (of_get_property(dn, "ibm,dma-window", NULL)) break; @@ -1300,8 +1339,8 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti * the device node. */ remove_ddw(np, false); - if (pci && pci->iommu_table) - iommu_pseries_free_table(pci->iommu_table, + if (pci && pci->table_group) + iommu_pseries_free_group(pci->table_group, np->full_name); spin_lock(&direct_window_list_lock); diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index e65bc73cc8a8..c4bc345d64d7 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -190,10 +190,11 @@ static void tce_iommu_release(void *iommu_data) { struct tce_container *container = iommu_data; - WARN_ON(container->tbl && !container->tbl->it_group); + WARN_ON(container->tbl && !container->tbl->it_table_group->group); - if (container->tbl && container->tbl->it_group) - tce_iommu_detach_group(iommu_data, container->tbl->it_group); + if (container->tbl && container->tbl->it_table_group->group) + tce_iommu_detach_group(iommu_data, + container->tbl->it_table_group->group); tce_iommu_disable(container); mutex_destroy(&container->lock); @@ -345,7 +346,7 @@ static long tce_iommu_ioctl(void *iommu_data, if (!tbl) return -ENXIO; - BUG_ON(!tbl->it_group); + BUG_ON(!tbl->it_table_group->group); minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); @@ -433,11 +434,12 @@ static long tce_iommu_ioctl(void *iommu_data, mutex_unlock(&container->lock); return 0; case VFIO_EEH_PE_OP: - if (!container->tbl || !container->tbl->it_group) + if (!container->tbl || !container->tbl->it_table_group->group) return -ENODEV; - return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group, - cmd, arg); + return vfio_spapr_iommu_eeh_ioctl( + container->tbl->it_table_group->group, + cmd, arg); } return -ENOTTY; @@ -457,7 +459,8 @@ static int tce_iommu_attach_group(void *iommu_data, iommu_group_id(iommu_group), iommu_group); */ if (container->tbl) { pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n", - iommu_group_id(container->tbl->it_group), + iommu_group_id(container->tbl-> + it_table_group->group), iommu_group_id(iommu_group)); ret = -EBUSY; goto unlock_exit; @@ -491,13 +494,13 @@ static void tce_iommu_detach_group(void *iommu_data, if (tbl != container->tbl) { pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n", iommu_group_id(iommu_group), - iommu_group_id(tbl->it_group)); + iommu_group_id(tbl->it_table_group->group)); goto unlock_exit; } if (container->enabled) { pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n", - iommu_group_id(tbl->it_group)); + iommu_group_id(tbl->it_table_group->group)); tce_iommu_disable(container); } -- cgit v1.2.3 From 0eaf4defc7c44ed5dd33a03cab12a5f88c9b4b86 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:09 +1000 Subject: powerpc/spapr: vfio: Switch from iommu_table to new iommu_table_group So far one TCE table could only be used by one IOMMU group. However IODA2 hardware allows programming the same TCE table address to multiple PE allowing sharing tables. This replaces a single pointer to a group in a iommu_table struct with a linked list of groups which provides the way of invalidating TCE cache for every PE when an actual TCE table is updated. This adds pnv_pci_link_table_and_group() and pnv_pci_unlink_table_and_group() helpers to manage the list. However without VFIO, it is still going to be a single IOMMU group per iommu_table. This changes iommu_add_device() to add a device to a first group from the group list of a table as it is only called from the platform init code or PCI bus notifier and at these moments there is only one group per table. This does not change TCE invalidation code to loop through all attached groups in order to simplify this patch and because it is not really needed in most cases. IODA2 is fixed in a later patch. This should cause no behavioural change. Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 8 +- arch/powerpc/kernel/iommu.c | 14 +++- arch/powerpc/platforms/powernv/pci-ioda.c | 45 ++++++---- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 3 + arch/powerpc/platforms/powernv/pci.c | 75 +++++++++++++++++ arch/powerpc/platforms/powernv/pci.h | 7 ++ arch/powerpc/platforms/pseries/iommu.c | 27 +++++- drivers/vfio/vfio_iommu_spapr_tce.c | 122 ++++++++++++++++++++-------- 8 files changed, 241 insertions(+), 60 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 5a7267f47bdb..44a20ccf06b4 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -91,7 +91,7 @@ struct iommu_table { struct iommu_pool pools[IOMMU_NR_POOLS]; unsigned long *it_map; /* A simple allocation bitmap for now */ unsigned long it_page_shift;/* table iommu page size */ - struct iommu_table_group *it_table_group; + struct list_head it_group_list;/* List of iommu_table_group_link */ struct iommu_table_ops *it_ops; void (*set_bypass)(struct iommu_table *tbl, bool enable); }; @@ -126,6 +126,12 @@ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, int nid); #define IOMMU_TABLE_GROUP_MAX_TABLES 1 +struct iommu_table_group_link { + struct list_head next; + struct rcu_head rcu; + struct iommu_table_group *table_group; +}; + struct iommu_table_group { struct iommu_group *group; struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 719f0485ec79..be258b2ecb10 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1078,6 +1078,7 @@ EXPORT_SYMBOL_GPL(iommu_release_ownership); int iommu_add_device(struct device *dev) { struct iommu_table *tbl; + struct iommu_table_group_link *tgl; /* * The sysfs entries should be populated before @@ -1095,15 +1096,22 @@ int iommu_add_device(struct device *dev) } tbl = get_iommu_table_base(dev); - if (!tbl || !tbl->it_table_group || !tbl->it_table_group->group) { + if (!tbl) { pr_debug("%s: Skipping device %s with no tbl\n", __func__, dev_name(dev)); return 0; } + tgl = list_first_entry_or_null(&tbl->it_group_list, + struct iommu_table_group_link, next); + if (!tgl) { + pr_debug("%s: Skipping device %s with no group\n", + __func__, dev_name(dev)); + return 0; + } pr_debug("%s: Adding %s to iommu group %d\n", __func__, dev_name(dev), - iommu_group_id(tbl->it_table_group->group)); + iommu_group_id(tgl->table_group->group)); if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) { pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n", @@ -1112,7 +1120,7 @@ int iommu_add_device(struct device *dev) return -EINVAL; } - return iommu_group_add_device(tbl->it_table_group->group, dev); + return iommu_group_add_device(tgl->table_group->group, dev); } EXPORT_SYMBOL_GPL(iommu_add_device); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 279dadf43d5a..3b4130697b05 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1288,7 +1288,6 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe struct iommu_table *tbl; unsigned long addr; int64_t rc; - struct iommu_table_group *table_group; bus = dev->bus; hose = pci_bus_to_host(bus); @@ -1308,14 +1307,13 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe if (rc) pe_warn(pe, "OPAL error %ld release DMA window\n", rc); - table_group = tbl->it_table_group; - if (table_group->group) { - iommu_group_put(table_group->group); - BUG_ON(table_group->group); + pnv_pci_unlink_table_and_group(tbl, &pe->table_group); + if (pe->table_group.group) { + iommu_group_put(pe->table_group.group); + BUG_ON(pe->table_group.group); } iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); free_pages(addr, get_order(TCE32_TABLE_SIZE)); - pe->table_group.tables[0] = NULL; } static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs) @@ -1676,7 +1674,10 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { - struct pnv_ioda_pe *pe = container_of(tbl->it_table_group, + struct iommu_table_group_link *tgl = list_first_entry_or_null( + &tbl->it_group_list, struct iommu_table_group_link, + next); + struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); __be64 __iomem *invalidate = rm ? (__be64 __iomem *)pe->tce_inval_reg_phys : @@ -1754,7 +1755,10 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { - struct pnv_ioda_pe *pe = container_of(tbl->it_table_group, + struct iommu_table_group_link *tgl = list_first_entry_or_null( + &tbl->it_group_list, struct iommu_table_group_link, + next); + struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); unsigned long start, end, inc; __be64 __iomem *invalidate = rm ? @@ -1831,12 +1835,10 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, if (WARN_ON(pe->tce32_seg >= 0)) return; - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - phb->hose->node); - tbl->it_table_group = &pe->table_group; - pe->table_group.tables[0] = tbl; + tbl = pnv_pci_table_alloc(phb->hose->node); iommu_register_group(&pe->table_group, phb->hose->global_number, pe->pe_number); + pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); /* Grab a 32-bit TCE table */ pe->tce32_seg = base; @@ -1911,11 +1913,18 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, pe->tce32_seg = -1; if (tce_mem) __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs)); + if (tbl) { + pnv_pci_unlink_table_and_group(tbl, &pe->table_group); + iommu_free_table(tbl, "pnv"); + } } static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) { - struct pnv_ioda_pe *pe = container_of(tbl->it_table_group, + struct iommu_table_group_link *tgl = list_first_entry_or_null( + &tbl->it_group_list, struct iommu_table_group_link, + next); + struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); uint16_t window_id = (pe->pe_number << 1 ) + 1; int64_t rc; @@ -1970,12 +1979,10 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, if (WARN_ON(pe->tce32_seg >= 0)) return; - tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, - phb->hose->node); - tbl->it_table_group = &pe->table_group; - pe->table_group.tables[0] = tbl; + tbl = pnv_pci_table_alloc(phb->hose->node); iommu_register_group(&pe->table_group, phb->hose->global_number, pe->pe_number); + pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); /* The PE will reserve all possible 32-bits space */ pe->tce32_seg = 0; @@ -2048,6 +2055,10 @@ fail: pe->tce32_seg = -1; if (tce_mem) __free_pages(tce_mem, get_order(tce_table_size)); + if (tbl) { + pnv_pci_unlink_table_and_group(tbl, &pe->table_group); + iommu_free_table(tbl, "pnv"); + } } static void pnv_ioda_setup_dma(struct pnv_phb *phb) diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 6e00104093d6..f80e5a1d6117 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -99,6 +99,9 @@ static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, iommu_init_table(tbl, phb->hose->node); iommu_register_group(&phb->p5ioc2.table_group, pci_domain_nr(phb->hose->bus), phb->opal_id); + INIT_LIST_HEAD_RCU(&tbl->it_group_list); + pnv_pci_link_table_and_group(phb->hose->node, 0, + tbl, &phb->p5ioc2.table_group); } set_iommu_table_base(&pdev->dev, tbl); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index f72fc6e7d63d..00f1abd34379 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -606,6 +606,81 @@ unsigned long pnv_tce_get(struct iommu_table *tbl, long index) return ((u64 *)tbl->it_base)[index - tbl->it_offset]; } +struct iommu_table *pnv_pci_table_alloc(int nid) +{ + struct iommu_table *tbl; + + tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid); + INIT_LIST_HEAD_RCU(&tbl->it_group_list); + + return tbl; +} + +long pnv_pci_link_table_and_group(int node, int num, + struct iommu_table *tbl, + struct iommu_table_group *table_group) +{ + struct iommu_table_group_link *tgl = NULL; + + if (WARN_ON(!tbl || !table_group)) + return -EINVAL; + + tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, + node); + if (!tgl) + return -ENOMEM; + + tgl->table_group = table_group; + list_add_rcu(&tgl->next, &tbl->it_group_list); + + table_group->tables[num] = tbl; + + return 0; +} + +static void pnv_iommu_table_group_link_free(struct rcu_head *head) +{ + struct iommu_table_group_link *tgl = container_of(head, + struct iommu_table_group_link, rcu); + + kfree(tgl); +} + +void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, + struct iommu_table_group *table_group) +{ + long i; + bool found; + struct iommu_table_group_link *tgl; + + if (!tbl || !table_group) + return; + + /* Remove link to a group from table's list of attached groups */ + found = false; + list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { + if (tgl->table_group == table_group) { + list_del_rcu(&tgl->next); + call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free); + found = true; + break; + } + } + if (WARN_ON(!found)) + return; + + /* Clean a pointer to iommu_table in iommu_table_group::tables[] */ + found = false; + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + if (table_group->tables[i] == tbl) { + table_group->tables[i] = NULL; + found = true; + break; + } + } + WARN_ON(!found); +} + void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned page_shift) diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index d8ba34d7af1b..d37cb4da409f 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -210,6 +210,13 @@ int pnv_pci_cfg_read(struct pci_dn *pdn, int where, int size, u32 *val); int pnv_pci_cfg_write(struct pci_dn *pdn, int where, int size, u32 val); +extern struct iommu_table *pnv_pci_table_alloc(int nid); + +extern long pnv_pci_link_table_and_group(int node, int num, + struct iommu_table *tbl, + struct iommu_table_group *table_group); +extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, + struct iommu_table_group *table_group); extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned page_shift); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 307d704ffaa3..10510dea16b3 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -56,6 +57,7 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node) { struct iommu_table_group *table_group = NULL; struct iommu_table *tbl = NULL; + struct iommu_table_group_link *tgl = NULL; table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL, node); @@ -66,12 +68,21 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node) if (!tbl) goto fail_exit; - tbl->it_table_group = table_group; + tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, + node); + if (!tgl) + goto fail_exit; + + INIT_LIST_HEAD_RCU(&tbl->it_group_list); + tgl->table_group = table_group; + list_add_rcu(&tgl->next, &tbl->it_group_list); + table_group->tables[0] = tbl; return table_group; fail_exit: + kfree(tgl); kfree(table_group); kfree(tbl); @@ -82,18 +93,28 @@ static void iommu_pseries_free_group(struct iommu_table_group *table_group, const char *node_name) { struct iommu_table *tbl; +#ifdef CONFIG_IOMMU_API + struct iommu_table_group_link *tgl; +#endif if (!table_group) return; + tbl = table_group->tables[0]; #ifdef CONFIG_IOMMU_API + tgl = list_first_entry_or_null(&tbl->it_group_list, + struct iommu_table_group_link, next); + + WARN_ON_ONCE(!tgl); + if (tgl) { + list_del_rcu(&tgl->next); + kfree(tgl); + } if (table_group->group) { iommu_group_put(table_group->group); BUG_ON(table_group->group); } #endif - - tbl = table_group->tables[0]; iommu_free_table(tbl, node_name); kfree(table_group); diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index c4bc345d64d7..ffc634a75dba 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -88,7 +88,7 @@ static void decrement_locked_vm(long npages) */ struct tce_container { struct mutex lock; - struct iommu_table *tbl; + struct iommu_group *grp; bool enabled; unsigned long locked_pages; }; @@ -103,13 +103,42 @@ static bool tce_page_is_contained(struct page *page, unsigned page_shift) return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift; } +static long tce_iommu_find_table(struct tce_container *container, + phys_addr_t ioba, struct iommu_table **ptbl) +{ + long i; + struct iommu_table_group *table_group; + + table_group = iommu_group_get_iommudata(container->grp); + if (!table_group) + return -1; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = table_group->tables[i]; + + if (tbl) { + unsigned long entry = ioba >> tbl->it_page_shift; + unsigned long start = tbl->it_offset; + unsigned long end = start + tbl->it_size; + + if ((start <= entry) && (entry < end)) { + *ptbl = tbl; + return i; + } + } + } + + return -1; +} + static int tce_iommu_enable(struct tce_container *container) { int ret = 0; unsigned long locked; - struct iommu_table *tbl = container->tbl; + struct iommu_table *tbl; + struct iommu_table_group *table_group; - if (!container->tbl) + if (!container->grp) return -ENXIO; if (!current->mm) @@ -143,6 +172,11 @@ static int tce_iommu_enable(struct tce_container *container) * as this information is only available from KVM and VFIO is * KVM agnostic. */ + table_group = iommu_group_get_iommudata(container->grp); + if (!table_group) + return -ENODEV; + + tbl = table_group->tables[0]; locked = (tbl->it_size << tbl->it_page_shift) >> PAGE_SHIFT; ret = try_increment_locked_vm(locked); if (ret) @@ -190,11 +224,10 @@ static void tce_iommu_release(void *iommu_data) { struct tce_container *container = iommu_data; - WARN_ON(container->tbl && !container->tbl->it_table_group->group); + WARN_ON(container->grp); - if (container->tbl && container->tbl->it_table_group->group) - tce_iommu_detach_group(iommu_data, - container->tbl->it_table_group->group); + if (container->grp) + tce_iommu_detach_group(iommu_data, container->grp); tce_iommu_disable(container); mutex_destroy(&container->lock); @@ -312,9 +345,16 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { struct vfio_iommu_spapr_tce_info info; - struct iommu_table *tbl = container->tbl; + struct iommu_table *tbl; + struct iommu_table_group *table_group; + + if (WARN_ON(!container->grp)) + return -ENXIO; + + table_group = iommu_group_get_iommudata(container->grp); - if (WARN_ON(!tbl)) + tbl = table_group->tables[0]; + if (WARN_ON_ONCE(!tbl)) return -ENXIO; minsz = offsetofend(struct vfio_iommu_spapr_tce_info, @@ -337,17 +377,13 @@ static long tce_iommu_ioctl(void *iommu_data, } case VFIO_IOMMU_MAP_DMA: { struct vfio_iommu_type1_dma_map param; - struct iommu_table *tbl = container->tbl; + struct iommu_table *tbl = NULL; unsigned long tce; + long num; if (!container->enabled) return -EPERM; - if (!tbl) - return -ENXIO; - - BUG_ON(!tbl->it_table_group->group); - minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); if (copy_from_user(¶m, (void __user *)arg, minsz)) @@ -360,6 +396,10 @@ static long tce_iommu_ioctl(void *iommu_data, VFIO_DMA_MAP_FLAG_WRITE)) return -EINVAL; + num = tce_iommu_find_table(container, param.iova, &tbl); + if (num < 0) + return -ENXIO; + if ((param.size & ~IOMMU_PAGE_MASK(tbl)) || (param.vaddr & ~IOMMU_PAGE_MASK(tbl))) return -EINVAL; @@ -385,14 +425,12 @@ static long tce_iommu_ioctl(void *iommu_data, } case VFIO_IOMMU_UNMAP_DMA: { struct vfio_iommu_type1_dma_unmap param; - struct iommu_table *tbl = container->tbl; + struct iommu_table *tbl = NULL; + long num; if (!container->enabled) return -EPERM; - if (WARN_ON(!tbl)) - return -ENXIO; - minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); @@ -406,6 +444,10 @@ static long tce_iommu_ioctl(void *iommu_data, if (param.flags) return -EINVAL; + num = tce_iommu_find_table(container, param.iova, &tbl); + if (num < 0) + return -ENXIO; + if (param.size & ~IOMMU_PAGE_MASK(tbl)) return -EINVAL; @@ -434,12 +476,11 @@ static long tce_iommu_ioctl(void *iommu_data, mutex_unlock(&container->lock); return 0; case VFIO_EEH_PE_OP: - if (!container->tbl || !container->tbl->it_table_group->group) + if (!container->grp) return -ENODEV; - return vfio_spapr_iommu_eeh_ioctl( - container->tbl->it_table_group->group, - cmd, arg); + return vfio_spapr_iommu_eeh_ioctl(container->grp, + cmd, arg); } return -ENOTTY; @@ -450,17 +491,15 @@ static int tce_iommu_attach_group(void *iommu_data, { int ret; struct tce_container *container = iommu_data; - struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); + struct iommu_table_group *table_group; - BUG_ON(!tbl); mutex_lock(&container->lock); /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", iommu_group_id(iommu_group), iommu_group); */ - if (container->tbl) { + if (container->grp) { pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n", - iommu_group_id(container->tbl-> - it_table_group->group), + iommu_group_id(container->grp), iommu_group_id(iommu_group)); ret = -EBUSY; goto unlock_exit; @@ -473,9 +512,15 @@ static int tce_iommu_attach_group(void *iommu_data, goto unlock_exit; } - ret = iommu_take_ownership(tbl); + table_group = iommu_group_get_iommudata(iommu_group); + if (!table_group) { + ret = -ENXIO; + goto unlock_exit; + } + + ret = iommu_take_ownership(table_group->tables[0]); if (!ret) - container->tbl = tbl; + container->grp = iommu_group; unlock_exit: mutex_unlock(&container->lock); @@ -487,26 +532,31 @@ static void tce_iommu_detach_group(void *iommu_data, struct iommu_group *iommu_group) { struct tce_container *container = iommu_data; - struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); + struct iommu_table_group *table_group; + struct iommu_table *tbl; - BUG_ON(!tbl); mutex_lock(&container->lock); - if (tbl != container->tbl) { + if (iommu_group != container->grp) { pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n", iommu_group_id(iommu_group), - iommu_group_id(tbl->it_table_group->group)); + iommu_group_id(container->grp)); goto unlock_exit; } if (container->enabled) { pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n", - iommu_group_id(tbl->it_table_group->group)); + iommu_group_id(container->grp)); tce_iommu_disable(container); } /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n", iommu_group_id(iommu_group), iommu_group); */ - container->tbl = NULL; + container->grp = NULL; + + table_group = iommu_group_get_iommudata(iommu_group); + BUG_ON(!table_group); + + tbl = table_group->tables[0]; tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); iommu_release_ownership(tbl); -- cgit v1.2.3 From f87a88642e660edd8912ad39fe77848c6f9927a2 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:10 +1000 Subject: vfio: powerpc/spapr/iommu/powernv/ioda2: Rework IOMMU ownership control This adds tce_iommu_take_ownership() and tce_iommu_release_ownership which call in a loop iommu_take_ownership()/iommu_release_ownership() for every table on the group. As there is just one now, no change in behaviour is expected. At the moment the iommu_table struct has a set_bypass() which enables/ disables DMA bypass on IODA2 PHB. This is exposed to POWERPC IOMMU code which calls this callback when external IOMMU users such as VFIO are about to get over a PHB. The set_bypass() callback is not really an iommu_table function but IOMMU/PE function. This introduces a iommu_table_group_ops struct and adds take_ownership()/release_ownership() callbacks to it which are called when an external user takes/releases control over the IOMMU. This replaces set_bypass() with ownership callbacks as it is not necessarily just bypass enabling, it can be something else/more so let's give it more generic name. The callbacks is implemented for IODA2 only. Other platforms (P5IOC2, IODA1) will use the old iommu_take_ownership/iommu_release_ownership API. The following patches will replace iommu_take_ownership/ iommu_release_ownership calls in IODA2 with full IOMMU table release/ create. As we here and touching bypass control, this removes pnv_pci_ioda2_setup_bypass_pe() as it does not do much more compared to pnv_pci_ioda2_set_bypass. This moves tce_bypass_base initialization to pnv_pci_ioda2_setup_dma_pe. Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 11 ++++- arch/powerpc/kernel/iommu.c | 12 ------ arch/powerpc/platforms/powernv/pci-ioda.c | 43 ++++++++++++------- drivers/vfio/vfio_iommu_spapr_tce.c | 70 ++++++++++++++++++++++++++++--- 4 files changed, 103 insertions(+), 33 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 44a20ccf06b4..489133cf7c5e 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -93,7 +93,6 @@ struct iommu_table { unsigned long it_page_shift;/* table iommu page size */ struct list_head it_group_list;/* List of iommu_table_group_link */ struct iommu_table_ops *it_ops; - void (*set_bypass)(struct iommu_table *tbl, bool enable); }; /* Pure 2^n version of get_order */ @@ -126,6 +125,15 @@ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, int nid); #define IOMMU_TABLE_GROUP_MAX_TABLES 1 +struct iommu_table_group; + +struct iommu_table_group_ops { + /* Switch ownership from platform code to external user (e.g. VFIO) */ + void (*take_ownership)(struct iommu_table_group *table_group); + /* Switch ownership from external user (e.g. VFIO) back to core */ + void (*release_ownership)(struct iommu_table_group *table_group); +}; + struct iommu_table_group_link { struct list_head next; struct rcu_head rcu; @@ -135,6 +143,7 @@ struct iommu_table_group_link { struct iommu_table_group { struct iommu_group *group; struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; + struct iommu_table_group_ops *ops; }; #ifdef CONFIG_IOMMU_API diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index be258b2ecb10..e7f81b7399ba 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1047,14 +1047,6 @@ int iommu_take_ownership(struct iommu_table *tbl) memset(tbl->it_map, 0xff, sz); - /* - * Disable iommu bypass, otherwise the user can DMA to all of - * our physical memory via the bypass window instead of just - * the pages that has been explicitly mapped into the iommu - */ - if (tbl->set_bypass) - tbl->set_bypass(tbl, false); - return 0; } EXPORT_SYMBOL_GPL(iommu_take_ownership); @@ -1068,10 +1060,6 @@ void iommu_release_ownership(struct iommu_table *tbl) /* Restore bit#0 set by iommu_init_table() */ if (tbl->it_offset == 0) set_bit(0, tbl->it_map); - - /* The kernel owns the device now, we can restore the iommu bypass */ - if (tbl->set_bypass) - tbl->set_bypass(tbl, true); } EXPORT_SYMBOL_GPL(iommu_release_ownership); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 3b4130697b05..17a77522ea0d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1919,13 +1919,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, } } -static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) { - struct iommu_table_group_link *tgl = list_first_entry_or_null( - &tbl->it_group_list, struct iommu_table_group_link, - next); - struct pnv_ioda_pe *pe = container_of(tgl->table_group, - struct pnv_ioda_pe, table_group); uint16_t window_id = (pe->pe_number << 1 ) + 1; int64_t rc; @@ -1952,19 +1947,31 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable) pe->tce_bypass_enabled = enable; } -static void pnv_pci_ioda2_setup_bypass_pe(struct pnv_phb *phb, - struct pnv_ioda_pe *pe) +#ifdef CONFIG_IOMMU_API +static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) { - /* TVE #1 is selected by PCI address bit 59 */ - pe->tce_bypass_base = 1ull << 59; + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); - /* Install set_bypass callback for VFIO */ - pe->table_group.tables[0]->set_bypass = pnv_pci_ioda2_set_bypass; + iommu_take_ownership(table_group->tables[0]); + pnv_pci_ioda2_set_bypass(pe, false); +} - /* Enable bypass by default */ - pnv_pci_ioda2_set_bypass(pe->table_group.tables[0], true); +static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + + iommu_release_ownership(table_group->tables[0]); + pnv_pci_ioda2_set_bypass(pe, true); } +static struct iommu_table_group_ops pnv_pci_ioda2_ops = { + .take_ownership = pnv_ioda2_take_ownership, + .release_ownership = pnv_ioda2_release_ownership, +}; +#endif + static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { @@ -1979,6 +1986,9 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, if (WARN_ON(pe->tce32_seg >= 0)) return; + /* TVE #1 is selected by PCI address bit 59 */ + pe->tce_bypass_base = 1ull << 59; + tbl = pnv_pci_table_alloc(phb->hose->node); iommu_register_group(&pe->table_group, phb->hose->global_number, pe->pe_number); @@ -2033,6 +2043,9 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, } tbl->it_ops = &pnv_ioda2_iommu_ops; iommu_init_table(tbl, phb->hose->node); +#ifdef CONFIG_IOMMU_API + pe->table_group.ops = &pnv_pci_ioda2_ops; +#endif if (pe->flags & PNV_IODA_PE_DEV) { /* @@ -2047,7 +2060,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, /* Also create a bypass window */ if (!pnv_iommu_bypass_disabled) - pnv_pci_ioda2_setup_bypass_pe(phb, pe); + pnv_pci_ioda2_set_bypass(pe, true); return; fail: diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index ffc634a75dba..9c720de46c33 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -486,6 +486,61 @@ static long tce_iommu_ioctl(void *iommu_data, return -ENOTTY; } +static void tce_iommu_release_ownership(struct tce_container *container, + struct iommu_table_group *table_group) +{ + int i; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = table_group->tables[i]; + + if (!tbl) + continue; + + tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); + if (tbl->it_map) + iommu_release_ownership(tbl); + } +} + +static int tce_iommu_take_ownership(struct tce_container *container, + struct iommu_table_group *table_group) +{ + int i, j, rc = 0; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = table_group->tables[i]; + + if (!tbl || !tbl->it_map) + continue; + + rc = iommu_take_ownership(tbl); + if (rc) { + for (j = 0; j < i; ++j) + iommu_release_ownership( + table_group->tables[j]); + + return rc; + } + } + + return 0; +} + +static void tce_iommu_release_ownership_ddw(struct tce_container *container, + struct iommu_table_group *table_group) +{ + table_group->ops->release_ownership(table_group); +} + +static long tce_iommu_take_ownership_ddw(struct tce_container *container, + struct iommu_table_group *table_group) +{ + table_group->ops->take_ownership(table_group); + + return 0; +} + static int tce_iommu_attach_group(void *iommu_data, struct iommu_group *iommu_group) { @@ -518,7 +573,12 @@ static int tce_iommu_attach_group(void *iommu_data, goto unlock_exit; } - ret = iommu_take_ownership(table_group->tables[0]); + if (!table_group->ops || !table_group->ops->take_ownership || + !table_group->ops->release_ownership) + ret = tce_iommu_take_ownership(container, table_group); + else + ret = tce_iommu_take_ownership_ddw(container, table_group); + if (!ret) container->grp = iommu_group; @@ -533,7 +593,6 @@ static void tce_iommu_detach_group(void *iommu_data, { struct tce_container *container = iommu_data; struct iommu_table_group *table_group; - struct iommu_table *tbl; mutex_lock(&container->lock); if (iommu_group != container->grp) { @@ -556,9 +615,10 @@ static void tce_iommu_detach_group(void *iommu_data, table_group = iommu_group_get_iommudata(iommu_group); BUG_ON(!table_group); - tbl = table_group->tables[0]; - tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); - iommu_release_ownership(tbl); + if (!table_group->ops || !table_group->ops->release_ownership) + tce_iommu_release_ownership(container, table_group); + else + tce_iommu_release_ownership_ddw(container, table_group); unlock_exit: mutex_unlock(&container->lock); -- cgit v1.2.3 From b82c75bfbeb5b4c7c23ee09dd6f295c7f06aeb25 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:11 +1000 Subject: powerpc/iommu: Fix IOMMU ownership control functions This adds missing locks in iommu_take_ownership()/ iommu_release_ownership(). This marks all pages busy in iommu_table::it_map in order to catch errors if there is an attempt to use this table while ownership over it is taken. This only clears TCE content if there is no page marked busy in it_map. Clearing must be done outside of the table locks as iommu_clear_tce() called from iommu_clear_tces_and_put_pages() does this. In order to use bitmap_empty(), the existing code clears bit#0 which is set even in an empty table if it is bus-mapped at 0 as iommu_init_table() reserves page#0 to prevent buggy drivers from crashing when allocated page is bus-mapped at zero (which is correct). This restores the bit in the case of failure to bring the it_map to the state it was in when we called iommu_take_ownership(). Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/iommu.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index e7f81b7399ba..0fb88005c3c5 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1035,31 +1035,51 @@ EXPORT_SYMBOL_GPL(iommu_tce_build); int iommu_take_ownership(struct iommu_table *tbl) { - unsigned long sz = (tbl->it_size + 7) >> 3; + unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; + int ret = 0; + + spin_lock_irqsave(&tbl->large_pool.lock, flags); + for (i = 0; i < tbl->nr_pools; i++) + spin_lock(&tbl->pools[i].lock); if (tbl->it_offset == 0) clear_bit(0, tbl->it_map); if (!bitmap_empty(tbl->it_map, tbl->it_size)) { pr_err("iommu_tce: it_map is not empty"); - return -EBUSY; + ret = -EBUSY; + /* Restore bit#0 set by iommu_init_table() */ + if (tbl->it_offset == 0) + set_bit(0, tbl->it_map); + } else { + memset(tbl->it_map, 0xff, sz); } - memset(tbl->it_map, 0xff, sz); + for (i = 0; i < tbl->nr_pools; i++) + spin_unlock(&tbl->pools[i].lock); + spin_unlock_irqrestore(&tbl->large_pool.lock, flags); - return 0; + return ret; } EXPORT_SYMBOL_GPL(iommu_take_ownership); void iommu_release_ownership(struct iommu_table *tbl) { - unsigned long sz = (tbl->it_size + 7) >> 3; + unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; + + spin_lock_irqsave(&tbl->large_pool.lock, flags); + for (i = 0; i < tbl->nr_pools; i++) + spin_lock(&tbl->pools[i].lock); memset(tbl->it_map, 0, sz); /* Restore bit#0 set by iommu_init_table() */ if (tbl->it_offset == 0) set_bit(0, tbl->it_map); + + for (i = 0; i < tbl->nr_pools; i++) + spin_unlock(&tbl->pools[i].lock); + spin_unlock_irqrestore(&tbl->large_pool.lock, flags); } EXPORT_SYMBOL_GPL(iommu_release_ownership); -- cgit v1.2.3 From 5780fb04263c16fdb9affd1012d5eb956e0cbcd9 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:12 +1000 Subject: powerpc/powernv/ioda2: Move TCE kill register address to PE At the moment the DMA setup code looks for the "ibm,opal-tce-kill" property which contains the TCE kill register address. Writing to this register invalidates TCE cache on IODA/IODA2 hub. This moves the register address from iommu_table to pnv_pnb as this register belongs to PHB and invalidates TCE cache for all tables of all attached PEs. This moves the property reading/remapping code to a helper which is called when DMA is being configured for PE and which does DMA setup for both IODA1 and IODA2. This adds a new pnv_pci_ioda2_tce_invalidate_entire() helper which invalidates cache for the entire table. It should be called after every call to opal_pci_map_pe_dma_window(). It was not required before because there was just a single TCE table and 64bit DMA was handled via bypass window (which has no table so no cache was used) but this is going to change with Dynamic DMA windows (DDW). Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 66 ++++++++++++++++++------------- arch/powerpc/platforms/powernv/pci.h | 7 +++- 2 files changed, 44 insertions(+), 29 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 17a77522ea0d..6bbb2bb35581 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1680,8 +1680,8 @@ static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); __be64 __iomem *invalidate = rm ? - (__be64 __iomem *)pe->tce_inval_reg_phys : - (__be64 __iomem *)tbl->it_index; + (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : + pe->phb->ioda.tce_inval_reg; unsigned long start, end, inc; const unsigned shift = tbl->it_page_shift; @@ -1752,6 +1752,19 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { .get = pnv_tce_get, }; +static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe) +{ + /* 01xb - invalidate TCEs that match the specified PE# */ + unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF); + struct pnv_phb *phb = pe->phb; + + if (!phb->ioda.tce_inval_reg) + return; + + mb(); /* Ensure above stores are visible */ + __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg); +} + static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { @@ -1762,8 +1775,8 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, struct pnv_ioda_pe, table_group); unsigned long start, end, inc; __be64 __iomem *invalidate = rm ? - (__be64 __iomem *)pe->tce_inval_reg_phys : - (__be64 __iomem *)tbl->it_index; + (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : + pe->phb->ioda.tce_inval_reg; const unsigned shift = tbl->it_page_shift; /* We'll invalidate DMA address in PE scope */ @@ -1821,7 +1834,6 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, { struct page *tce_mem = NULL; - const __be64 *swinvp; struct iommu_table *tbl; unsigned int i; int64_t rc; @@ -1878,20 +1890,11 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, base << 28, IOMMU_PAGE_SHIFT_4K); /* OPAL variant of P7IOC SW invalidated TCEs */ - swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); - if (swinvp) { - /* We need a couple more fields -- an address and a data - * to or. Since the bus is only printed out on table free - * errors, and on the first pass the data will be a relative - * bus number, print that out instead. - */ - pe->tce_inval_reg_phys = be64_to_cpup(swinvp); - tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, - 8); + if (phb->ioda.tce_inval_reg) tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE | TCE_PCI_SWINV_PAIR); - } + tbl->it_ops = &pnv_ioda1_iommu_ops; iommu_init_table(tbl, phb->hose->node); @@ -1972,12 +1975,24 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = { }; #endif +static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb) +{ + const __be64 *swinvp; + + /* OPAL variant of PHB3 invalidated TCEs */ + swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); + if (!swinvp) + return; + + phb->ioda.tce_inval_reg_phys = be64_to_cpup(swinvp); + phb->ioda.tce_inval_reg = ioremap(phb->ioda.tce_inval_reg_phys, 8); +} + static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct page *tce_mem = NULL; void *addr; - const __be64 *swinvp; struct iommu_table *tbl; unsigned int tce_table_size, end; int64_t rc; @@ -2024,23 +2039,16 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, goto fail; } + pnv_pci_ioda2_tce_invalidate_entire(pe); + /* Setup linux iommu table */ pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, IOMMU_PAGE_SHIFT_4K); /* OPAL variant of PHB3 invalidated TCEs */ - swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); - if (swinvp) { - /* We need a couple more fields -- an address and a data - * to or. Since the bus is only printed out on table free - * errors, and on the first pass the data will be a relative - * bus number, print that out instead. - */ - pe->tce_inval_reg_phys = be64_to_cpup(swinvp); - tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, - 8); + if (phb->ioda.tce_inval_reg) tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); - } + tbl->it_ops = &pnv_ioda2_iommu_ops; iommu_init_table(tbl, phb->hose->node); #ifdef CONFIG_IOMMU_API @@ -2096,6 +2104,8 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) pr_info("PCI: %d PE# for a total weight of %d\n", phb->ioda.dma_pe_count, phb->ioda.dma_weight); + pnv_pci_ioda_setup_opal_tce_kill(phb); + /* Walk our PE list and configure their DMA segments, hand them * out one base segment plus any residual segments based on * weight diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index d37cb4da409f..792a723c36ec 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -58,7 +58,6 @@ struct pnv_ioda_pe { int tce32_seg; int tce32_segcount; struct iommu_table_group table_group; - phys_addr_t tce_inval_reg_phys; /* 64-bit TCE bypass region */ bool tce_bypass_enabled; @@ -184,6 +183,12 @@ struct pnv_phb { * boot for resource allocation purposes */ struct list_head pe_dma_list; + + /* TCE cache invalidate registers (physical and + * remapped) + */ + phys_addr_t tce_inval_reg_phys; + __be64 __iomem *tce_inval_reg; } ioda; }; -- cgit v1.2.3 From e57080f17da5d50f7b3b9d6c045f28632d71309d Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:13 +1000 Subject: powerpc/powernv/ioda2: Add TCE invalidation for all attached groups The iommu_table struct keeps a list of IOMMU groups it is used for. At the moment there is just a single group attached but further patches will add TCE table sharing. When sharing is enabled, TCE cache in each PE needs to be invalidated so does the patch. This does not change pnv_pci_ioda1_tce_invalidate() as there is no plan to enable TCE table sharing on PHBs older than IODA2. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 35 ++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 12 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 6bbb2bb35581..390863608569 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -1765,23 +1766,15 @@ static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe) __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg); } -static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, - unsigned long index, unsigned long npages, bool rm) +static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm, + __be64 __iomem *invalidate, unsigned shift, + unsigned long index, unsigned long npages) { - struct iommu_table_group_link *tgl = list_first_entry_or_null( - &tbl->it_group_list, struct iommu_table_group_link, - next); - struct pnv_ioda_pe *pe = container_of(tgl->table_group, - struct pnv_ioda_pe, table_group); unsigned long start, end, inc; - __be64 __iomem *invalidate = rm ? - (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : - pe->phb->ioda.tce_inval_reg; - const unsigned shift = tbl->it_page_shift; /* We'll invalidate DMA address in PE scope */ start = 0x2ull << 60; - start |= (pe->pe_number & 0xFF); + start |= (pe_number & 0xFF); end = start; /* Figure out the start, end and step */ @@ -1799,6 +1792,24 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, } } +static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, + unsigned long index, unsigned long npages, bool rm) +{ + struct iommu_table_group_link *tgl; + + list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { + struct pnv_ioda_pe *pe = container_of(tgl->table_group, + struct pnv_ioda_pe, table_group); + __be64 __iomem *invalidate = rm ? + (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : + pe->phb->ioda.tce_inval_reg; + + pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm, + invalidate, tbl->it_page_shift, + index, npages); + } +} + static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, -- cgit v1.2.3 From c5bb44edee19b2c19221a0b5a68add37ea5733c5 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:14 +1000 Subject: powerpc/powernv: Implement accessor to TCE entry This replaces direct accesses to TCE table with a helper which returns an TCE entry address. This does not make difference now but will when multi-level TCE tables get introduces. No change in behavior is expected. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 00f1abd34379..a07b83283ccc 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -572,38 +572,46 @@ struct pci_ops pnv_pci_ops = { .write = pnv_pci_write_config, }; +static __be64 *pnv_tce(struct iommu_table *tbl, long idx) +{ + __be64 *tmp = ((__be64 *)tbl->it_base); + + return tmp + idx; +} + int pnv_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs) { u64 proto_tce = iommu_direction_to_tce_perm(direction); - __be64 *tcep; - u64 rpn; - - tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; - rpn = __pa(uaddr) >> tbl->it_page_shift; + u64 rpn = __pa(uaddr) >> tbl->it_page_shift; + long i; - while (npages--) - *(tcep++) = cpu_to_be64(proto_tce | - (rpn++ << tbl->it_page_shift)); + for (i = 0; i < npages; i++) { + unsigned long newtce = proto_tce | + ((rpn + i) << tbl->it_page_shift); + unsigned long idx = index - tbl->it_offset + i; + *(pnv_tce(tbl, idx)) = cpu_to_be64(newtce); + } return 0; } void pnv_tce_free(struct iommu_table *tbl, long index, long npages) { - __be64 *tcep; + long i; - tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; + for (i = 0; i < npages; i++) { + unsigned long idx = index - tbl->it_offset + i; - while (npages--) - *(tcep++) = cpu_to_be64(0); + *(pnv_tce(tbl, idx)) = cpu_to_be64(0); + } } unsigned long pnv_tce_get(struct iommu_table *tbl, long index) { - return ((u64 *)tbl->it_base)[index - tbl->it_offset]; + return *(pnv_tce(tbl, index - tbl->it_offset)); } struct iommu_table *pnv_pci_table_alloc(int nid) -- cgit v1.2.3 From 05c6cfb9dce0d13d37e9d007ee6a4af36f1c0a58 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:15 +1000 Subject: powerpc/iommu/powernv: Release replaced TCE At the moment writing new TCE value to the IOMMU table fails with EBUSY if there is a valid entry already. However PAPR specification allows the guest to write new TCE value without clearing it first. Another problem this patch is addressing is the use of pool locks for external IOMMU users such as VFIO. The pool locks are to protect DMA page allocator rather than entries and since the host kernel does not control what pages are in use, there is no point in pool locks and exchange()+put_page(oldtce) is sufficient to avoid possible races. This adds an exchange() callback to iommu_table_ops which does the same thing as set() plus it returns replaced TCE and DMA direction so the caller can release the pages afterwards. The exchange() receives a physical address unlike set() which receives linear mapping address; and returns a physical address as the clear() does. This implements exchange() for P5IOC2/IODA/IODA2. This adds a requirement for a platform to have exchange() implemented in order to support VFIO. This replaces iommu_tce_build() and iommu_clear_tce() with a single iommu_tce_xchg(). This makes sure that TCE permission bits are not set in TCE passed to IOMMU API as those are to be calculated by platform code from DMA direction. This moves SetPageDirty() to the IOMMU code to make it work for both VFIO ioctl interface in in-kernel TCE acceleration (when it becomes available later). Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 22 ++++++++-- arch/powerpc/kernel/iommu.c | 59 +++++++++------------------ arch/powerpc/platforms/powernv/pci-ioda.c | 34 ++++++++++++++++ arch/powerpc/platforms/powernv/pci-p5ioc2.c | 3 ++ arch/powerpc/platforms/powernv/pci.c | 18 +++++++++ arch/powerpc/platforms/powernv/pci.h | 2 + drivers/vfio/vfio_iommu_spapr_tce.c | 63 +++++++++++++++++------------ 7 files changed, 132 insertions(+), 69 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 489133cf7c5e..4636734604d7 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -45,13 +45,29 @@ extern int iommu_is_off; extern int iommu_force_on; struct iommu_table_ops { + /* + * When called with direction==DMA_NONE, it is equal to clear(). + * uaddr is a linear map address. + */ int (*set)(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs); +#ifdef CONFIG_IOMMU_API + /* + * Exchanges existing TCE with new TCE plus direction bits; + * returns old TCE and DMA direction mask. + * @tce is a physical address. + */ + int (*exchange)(struct iommu_table *tbl, + long index, + unsigned long *hpa, + enum dma_data_direction *direction); +#endif void (*clear)(struct iommu_table *tbl, long index, long npages); + /* get() returns a physical address */ unsigned long (*get)(struct iommu_table *tbl, long index); void (*flush)(struct iommu_table *tbl); }; @@ -153,6 +169,8 @@ extern void iommu_register_group(struct iommu_table_group *table_group, extern int iommu_add_device(struct device *dev); extern void iommu_del_device(struct device *dev); extern int __init tce_iommu_bus_notifier_init(void); +extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, + unsigned long *hpa, enum dma_data_direction *direction); #else static inline void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, @@ -225,10 +243,6 @@ extern int iommu_tce_clear_param_check(struct iommu_table *tbl, unsigned long npages); extern int iommu_tce_put_param_check(struct iommu_table *tbl, unsigned long ioba, unsigned long tce); -extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, - unsigned long hwaddr, enum dma_data_direction direction); -extern unsigned long iommu_clear_tce(struct iommu_table *tbl, - unsigned long entry); extern void iommu_flush_tce(struct iommu_table *tbl); extern int iommu_take_ownership(struct iommu_table *tbl); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0fb88005c3c5..a8e3490b54e3 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -965,10 +965,7 @@ EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check); int iommu_tce_put_param_check(struct iommu_table *tbl, unsigned long ioba, unsigned long tce) { - if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ))) - return -EINVAL; - - if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ)) + if (tce & ~IOMMU_PAGE_MASK(tbl)) return -EINVAL; if (ioba & ~IOMMU_PAGE_MASK(tbl)) @@ -985,44 +982,16 @@ int iommu_tce_put_param_check(struct iommu_table *tbl, } EXPORT_SYMBOL_GPL(iommu_tce_put_param_check); -unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry) +long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, + unsigned long *hpa, enum dma_data_direction *direction) { - unsigned long oldtce; - struct iommu_pool *pool = get_pool(tbl, entry); + long ret; - spin_lock(&(pool->lock)); + ret = tbl->it_ops->exchange(tbl, entry, hpa, direction); - oldtce = tbl->it_ops->get(tbl, entry); - if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)) - tbl->it_ops->clear(tbl, entry, 1); - else - oldtce = 0; - - spin_unlock(&(pool->lock)); - - return oldtce; -} -EXPORT_SYMBOL_GPL(iommu_clear_tce); - -/* - * hwaddr is a kernel virtual address here (0xc... bazillion), - * tce_build converts it to a physical address. - */ -int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, - unsigned long hwaddr, enum dma_data_direction direction) -{ - int ret = -EBUSY; - unsigned long oldtce; - struct iommu_pool *pool = get_pool(tbl, entry); - - spin_lock(&(pool->lock)); - - oldtce = tbl->it_ops->get(tbl, entry); - /* Add new entry if it is not busy */ - if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))) - ret = tbl->it_ops->set(tbl, entry, 1, hwaddr, direction, NULL); - - spin_unlock(&(pool->lock)); + if (!ret && ((*direction == DMA_FROM_DEVICE) || + (*direction == DMA_BIDIRECTIONAL))) + SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT)); /* if (unlikely(ret)) pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n", @@ -1031,13 +1000,23 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry, return ret; } -EXPORT_SYMBOL_GPL(iommu_tce_build); +EXPORT_SYMBOL_GPL(iommu_tce_xchg); int iommu_take_ownership(struct iommu_table *tbl) { unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; int ret = 0; + /* + * VFIO does not control TCE entries allocation and the guest + * can write new TCEs on top of existing ones so iommu_tce_build() + * must be able to release old pages. This functionality + * requires exchange() callback defined so if it is not + * implemented, we disallow taking ownership over the table. + */ + if (!tbl->it_ops->exchange) + return -EINVAL; + spin_lock_irqsave(&tbl->large_pool.lock, flags); for (i = 0; i < tbl->nr_pools; i++) spin_lock(&tbl->pools[i].lock); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 390863608569..ecbc071a143e 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1738,6 +1738,20 @@ static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index, return ret; } +#ifdef CONFIG_IOMMU_API +static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction) +{ + long ret = pnv_tce_xchg(tbl, index, hpa, direction); + + if (!ret && (tbl->it_type & + (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE))) + pnv_pci_ioda1_tce_invalidate(tbl, index, 1, false); + + return ret; +} +#endif + static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, long npages) { @@ -1749,6 +1763,9 @@ static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, static struct iommu_table_ops pnv_ioda1_iommu_ops = { .set = pnv_ioda1_tce_build, +#ifdef CONFIG_IOMMU_API + .exchange = pnv_ioda1_tce_xchg, +#endif .clear = pnv_ioda1_tce_free, .get = pnv_tce_get, }; @@ -1824,6 +1841,20 @@ static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, return ret; } +#ifdef CONFIG_IOMMU_API +static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction) +{ + long ret = pnv_tce_xchg(tbl, index, hpa, direction); + + if (!ret && (tbl->it_type & + (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE))) + pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false); + + return ret; +} +#endif + static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, long npages) { @@ -1835,6 +1866,9 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, static struct iommu_table_ops pnv_ioda2_iommu_ops = { .set = pnv_ioda2_tce_build, +#ifdef CONFIG_IOMMU_API + .exchange = pnv_ioda2_tce_xchg, +#endif .clear = pnv_ioda2_tce_free, .get = pnv_tce_get, }; diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index f80e5a1d6117..eaec85b63b34 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -85,6 +85,9 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { } static struct iommu_table_ops pnv_p5ioc2_iommu_ops = { .set = pnv_tce_build, +#ifdef CONFIG_IOMMU_API + .exchange = pnv_tce_xchg, +#endif .clear = pnv_tce_free, .get = pnv_tce_get, }; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index a07b83283ccc..d465b9c32388 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -598,6 +598,24 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages, return 0; } +#ifdef CONFIG_IOMMU_API +int pnv_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction) +{ + u64 proto_tce = iommu_direction_to_tce_perm(*direction); + unsigned long newtce = *hpa | proto_tce, oldtce; + unsigned long idx = index - tbl->it_offset; + + BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)); + + oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce)); + *hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE); + *direction = iommu_tce_direction(oldtce); + + return 0; +} +#endif + void pnv_tce_free(struct iommu_table *tbl, long index, long npages) { long i; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 792a723c36ec..8ef2d28aded0 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -207,6 +207,8 @@ extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs); extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages); +extern int pnv_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction); extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index); void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 9c720de46c33..a9e2d13c03c0 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -236,18 +236,11 @@ static void tce_iommu_release(void *iommu_data) } static void tce_iommu_unuse_page(struct tce_container *container, - unsigned long oldtce) + unsigned long hpa) { struct page *page; - if (!(oldtce & (TCE_PCI_READ | TCE_PCI_WRITE))) - return; - - page = pfn_to_page(oldtce >> PAGE_SHIFT); - - if (oldtce & TCE_PCI_WRITE) - SetPageDirty(page); - + page = pfn_to_page(hpa >> PAGE_SHIFT); put_page(page); } @@ -255,14 +248,21 @@ static int tce_iommu_clear(struct tce_container *container, struct iommu_table *tbl, unsigned long entry, unsigned long pages) { - unsigned long oldtce; + unsigned long oldhpa; + long ret; + enum dma_data_direction direction; for ( ; pages; --pages, ++entry) { - oldtce = iommu_clear_tce(tbl, entry); - if (!oldtce) + direction = DMA_NONE; + oldhpa = 0; + ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction); + if (ret) + continue; + + if (direction == DMA_NONE) continue; - tce_iommu_unuse_page(container, oldtce); + tce_iommu_unuse_page(container, oldhpa); } return 0; @@ -284,12 +284,13 @@ static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa) static long tce_iommu_build(struct tce_container *container, struct iommu_table *tbl, - unsigned long entry, unsigned long tce, unsigned long pages) + unsigned long entry, unsigned long tce, unsigned long pages, + enum dma_data_direction direction) { long i, ret = 0; struct page *page; unsigned long hpa; - enum dma_data_direction direction = iommu_tce_direction(tce); + enum dma_data_direction dirtmp; for (i = 0; i < pages; ++i) { unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; @@ -305,8 +306,8 @@ static long tce_iommu_build(struct tce_container *container, } hpa |= offset; - ret = iommu_tce_build(tbl, entry + i, (unsigned long) __va(hpa), - direction); + dirtmp = direction; + ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp); if (ret) { tce_iommu_unuse_page(container, hpa); pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", @@ -314,6 +315,10 @@ static long tce_iommu_build(struct tce_container *container, tce, ret); break; } + + if (dirtmp != DMA_NONE) + tce_iommu_unuse_page(container, hpa); + tce += IOMMU_PAGE_SIZE(tbl); } @@ -378,8 +383,8 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_IOMMU_MAP_DMA: { struct vfio_iommu_type1_dma_map param; struct iommu_table *tbl = NULL; - unsigned long tce; long num; + enum dma_data_direction direction; if (!container->enabled) return -EPERM; @@ -405,19 +410,27 @@ static long tce_iommu_ioctl(void *iommu_data, return -EINVAL; /* iova is checked by the IOMMU API */ - tce = param.vaddr; - if (param.flags & VFIO_DMA_MAP_FLAG_READ) - tce |= TCE_PCI_READ; - if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) - tce |= TCE_PCI_WRITE; + if (param.flags & VFIO_DMA_MAP_FLAG_READ) { + if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) + direction = DMA_BIDIRECTIONAL; + else + direction = DMA_TO_DEVICE; + } else { + if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) + direction = DMA_FROM_DEVICE; + else + return -EINVAL; + } - ret = iommu_tce_put_param_check(tbl, param.iova, tce); + ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr); if (ret) return ret; ret = tce_iommu_build(container, tbl, param.iova >> tbl->it_page_shift, - tce, param.size >> tbl->it_page_shift); + param.vaddr, + param.size >> tbl->it_page_shift, + direction); iommu_flush_tce(tbl); -- cgit v1.2.3 From e5aad1e678746af663e3e3acc7f3501309997f51 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:16 +1000 Subject: powerpc/powernv/ioda2: Rework iommu_table creation This moves iommu_table creation to the beginning to make following changes easier to review. This starts using table parameters from the iommu_table struct. This should cause no behavioural change. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index ecbc071a143e..feaba5ecfa97 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2071,13 +2071,23 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, addr = page_address(tce_mem); memset(addr, 0, tce_table_size); + /* Setup linux iommu table */ + pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, + IOMMU_PAGE_SHIFT_4K); + + tbl->it_ops = &pnv_ioda2_iommu_ops; + iommu_init_table(tbl, phb->hose->node); +#ifdef CONFIG_IOMMU_API + pe->table_group.ops = &pnv_pci_ioda2_ops; +#endif + /* * Map TCE table through TVT. The TVE index is the PE number * shifted by 1 bit for 32-bits DMA space. */ rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, - pe->pe_number << 1, 1, __pa(addr), - tce_table_size, 0x1000); + pe->pe_number << 1, 1, __pa(tbl->it_base), + tbl->it_size << 3, 1ULL << tbl->it_page_shift); if (rc) { pe_err(pe, "Failed to configure 32-bit TCE table," " err %ld\n", rc); @@ -2086,20 +2096,10 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, pnv_pci_ioda2_tce_invalidate_entire(pe); - /* Setup linux iommu table */ - pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, - IOMMU_PAGE_SHIFT_4K); - /* OPAL variant of PHB3 invalidated TCEs */ if (phb->ioda.tce_inval_reg) tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); - tbl->it_ops = &pnv_ioda2_iommu_ops; - iommu_init_table(tbl, phb->hose->node); -#ifdef CONFIG_IOMMU_API - pe->table_group.ops = &pnv_pci_ioda2_ops; -#endif - if (pe->flags & PNV_IODA_PE_DEV) { /* * Setting table base here only for carrying iommu_group -- cgit v1.2.3 From aca6913f555176363cda21518ff79da7469ebd64 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:17 +1000 Subject: powerpc/powernv/ioda2: Introduce helpers to allocate TCE pages This is a part of moving TCE table allocation into an iommu_ops callback to support multiple IOMMU groups per one VFIO container. This moves the code which allocates the actual TCE tables to helpers: pnv_pci_ioda2_table_alloc_pages() and pnv_pci_ioda2_table_free_pages(). These do not allocate/free the iommu_table struct. This enforces window size to be a power of two. This should cause no behavioural change. Signed-off-by: Alexey Kardashevskiy Reviewed-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 83 +++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 20 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index feaba5ecfa97..e61610cccf6e 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -40,6 +40,7 @@ #include #include #include +#include #include @@ -49,6 +50,8 @@ /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) +static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl); + static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...) { @@ -1313,8 +1316,8 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe iommu_group_put(pe->table_group.group); BUG_ON(pe->table_group.group); } + pnv_pci_ioda2_table_free_pages(tbl); iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); - free_pages(addr, get_order(TCE32_TABLE_SIZE)); } static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs) @@ -2033,13 +2036,62 @@ static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb) phb->ioda.tce_inval_reg = ioremap(phb->ioda.tce_inval_reg_phys, 8); } -static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, - struct pnv_ioda_pe *pe) +static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift) { struct page *tce_mem = NULL; + __be64 *addr; + unsigned order = max_t(unsigned, shift, PAGE_SHIFT) - PAGE_SHIFT; + + tce_mem = alloc_pages_node(nid, GFP_KERNEL, order); + if (!tce_mem) { + pr_err("Failed to allocate a TCE memory, order=%d\n", order); + return NULL; + } + addr = page_address(tce_mem); + memset(addr, 0, 1UL << (order + PAGE_SHIFT)); + + return addr; +} + +static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, + __u32 page_shift, __u64 window_size, struct iommu_table *tbl) +{ void *addr; + const unsigned window_shift = ilog2(window_size); + unsigned entries_shift = window_shift - page_shift; + unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT); + const unsigned long tce_table_size = 1UL << table_shift; + + if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size)) + return -EINVAL; + + /* Allocate TCE table */ + addr = pnv_pci_ioda2_table_do_alloc_pages(nid, table_shift); + if (!addr) + return -ENOMEM; + + /* Setup linux iommu table */ + pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset, + page_shift); + + pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n", + window_size, tce_table_size, bus_offset); + + return 0; +} + +static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) +{ + if (!tbl->it_size) + return; + + free_pages(tbl->it_base, get_order(tbl->it_size << 3)); +} + +static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, + struct pnv_ioda_pe *pe) +{ struct iommu_table *tbl; - unsigned int tce_table_size, end; int64_t rc; /* We shouldn't already have a 32-bit DMA associated */ @@ -2056,24 +2108,16 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, /* The PE will reserve all possible 32-bits space */ pe->tce32_seg = 0; - end = (1 << ilog2(phb->ioda.m32_pci_base)); - tce_table_size = (end / 0x1000) * 8; pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n", - end); + phb->ioda.m32_pci_base); - /* Allocate TCE table */ - tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL, - get_order(tce_table_size)); - if (!tce_mem) { - pe_err(pe, "Failed to allocate a 32-bit TCE memory\n"); + /* Setup linux iommu table */ + rc = pnv_pci_ioda2_table_alloc_pages(pe->phb->hose->node, + 0, IOMMU_PAGE_SHIFT_4K, phb->ioda.m32_pci_base, tbl); + if (rc) { + pe_err(pe, "Failed to create 32-bit TCE table, err %ld", rc); goto fail; } - addr = page_address(tce_mem); - memset(addr, 0, tce_table_size); - - /* Setup linux iommu table */ - pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0, - IOMMU_PAGE_SHIFT_4K); tbl->it_ops = &pnv_ioda2_iommu_ops; iommu_init_table(tbl, phb->hose->node); @@ -2119,9 +2163,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, fail: if (pe->tce32_seg >= 0) pe->tce32_seg = -1; - if (tce_mem) - __free_pages(tce_mem, get_order(tce_table_size)); if (tbl) { + pnv_pci_ioda2_table_free_pages(tbl); pnv_pci_unlink_table_and_group(tbl, &pe->table_group); iommu_free_table(tbl, "pnv"); } -- cgit v1.2.3 From 43cb60ab7f8c91b9bf5988edc318dee99ec93b9b Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:18 +1000 Subject: powerpc/powernv/ioda2: Introduce pnv_pci_ioda2_set_window This is a part of moving DMA window programming to an iommu_ops callback. pnv_pci_ioda2_set_window() takes an iommu_table_group as a first parameter (not pnv_ioda_pe) as it is going to be used as a callback for VFIO DDW code. This should cause no behavioural change. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 47 +++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 9 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index e61610cccf6e..552f6ac52560 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1970,6 +1970,43 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, } } +static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, + int num, struct iommu_table *tbl) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + struct pnv_phb *phb = pe->phb; + int64_t rc; + const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; + const __u64 win_size = tbl->it_size << tbl->it_page_shift; + + pe_info(pe, "Setting up window %llx..%llx pg=%x\n", + start_addr, start_addr + win_size - 1, + IOMMU_PAGE_SIZE(tbl)); + + /* + * Map TCE table through TVT. The TVE index is the PE number + * shifted by 1 bit for 32-bits DMA space. + */ + rc = opal_pci_map_pe_dma_window(phb->opal_id, + pe->pe_number, + pe->pe_number << 1, + 1, + __pa(tbl->it_base), + tbl->it_size << 3, + IOMMU_PAGE_SIZE(tbl)); + if (rc) { + pe_err(pe, "Failed to configure TCE table, err %ld\n", rc); + return rc; + } + + pnv_pci_link_table_and_group(phb->hose->node, num, + tbl, &pe->table_group); + pnv_pci_ioda2_tce_invalidate_entire(pe); + + return 0; +} + static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) { uint16_t window_id = (pe->pe_number << 1 ) + 1; @@ -2125,21 +2162,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, pe->table_group.ops = &pnv_pci_ioda2_ops; #endif - /* - * Map TCE table through TVT. The TVE index is the PE number - * shifted by 1 bit for 32-bits DMA space. - */ - rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, - pe->pe_number << 1, 1, __pa(tbl->it_base), - tbl->it_size << 3, 1ULL << tbl->it_page_shift); + rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); if (rc) { pe_err(pe, "Failed to configure 32-bit TCE table," " err %ld\n", rc); goto fail; } - pnv_pci_ioda2_tce_invalidate_entire(pe); - /* OPAL variant of PHB3 invalidated TCEs */ if (phb->ioda.tce_inval_reg) tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); -- cgit v1.2.3 From bbb845c4bac88d8feffa8945dd28b50849984e30 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:19 +1000 Subject: powerpc/powernv: Implement multilevel TCE tables TCE tables might get too big in case of 4K IOMMU pages and DDW enabled on huge guests (hundreds of GB of RAM) so the kernel might be unable to allocate contiguous chunk of physical memory to store the TCE table. To address this, POWER8 CPU (actually, IODA2) supports multi-level TCE tables, up to 5 levels which splits the table into a tree of smaller subtables. This adds multi-level TCE tables support to pnv_pci_ioda2_table_alloc_pages() and pnv_pci_ioda2_table_free_pages() helpers. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 2 + arch/powerpc/platforms/powernv/pci-ioda.c | 105 +++++++++++++++++++++++++++--- arch/powerpc/platforms/powernv/pci.c | 13 ++++ 3 files changed, 111 insertions(+), 9 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 4636734604d7..706cfc0b1190 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -96,6 +96,8 @@ struct iommu_pool { struct iommu_table { unsigned long it_busno; /* Bus number this table belongs to */ unsigned long it_size; /* Size of iommu table in entries */ + unsigned long it_indirect_levels; + unsigned long it_level_size; unsigned long it_offset; /* Offset into global table */ unsigned long it_base; /* mapped address of tce table */ unsigned long it_index; /* which iommu table this is */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 552f6ac52560..21d8c61f0b03 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -50,6 +50,9 @@ /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) +#define POWERNV_IOMMU_DEFAULT_LEVELS 1 +#define POWERNV_IOMMU_MAX_LEVELS 5 + static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl); static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, @@ -1977,6 +1980,8 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, table_group); struct pnv_phb *phb = pe->phb; int64_t rc; + const unsigned long size = tbl->it_indirect_levels ? + tbl->it_level_size : tbl->it_size; const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; const __u64 win_size = tbl->it_size << tbl->it_page_shift; @@ -1991,9 +1996,9 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, pe->pe_number << 1, - 1, + tbl->it_indirect_levels + 1, __pa(tbl->it_base), - tbl->it_size << 3, + size << 3, IOMMU_PAGE_SIZE(tbl)); if (rc) { pe_err(pe, "Failed to configure TCE table, err %ld\n", rc); @@ -2073,11 +2078,16 @@ static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb) phb->ioda.tce_inval_reg = ioremap(phb->ioda.tce_inval_reg_phys, 8); } -static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift) +static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift, + unsigned levels, unsigned long limit, + unsigned long *current_offset) { struct page *tce_mem = NULL; - __be64 *addr; + __be64 *addr, *tmp; unsigned order = max_t(unsigned, shift, PAGE_SHIFT) - PAGE_SHIFT; + unsigned long allocated = 1UL << (order + PAGE_SHIFT); + unsigned entries = 1UL << (shift - 3); + long i; tce_mem = alloc_pages_node(nid, GFP_KERNEL, order); if (!tce_mem) { @@ -2085,31 +2095,79 @@ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift) return NULL; } addr = page_address(tce_mem); - memset(addr, 0, 1UL << (order + PAGE_SHIFT)); + memset(addr, 0, allocated); + + --levels; + if (!levels) { + *current_offset += allocated; + return addr; + } + + for (i = 0; i < entries; ++i) { + tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift, + levels, limit, current_offset); + if (!tmp) + break; + + addr[i] = cpu_to_be64(__pa(tmp) | + TCE_PCI_READ | TCE_PCI_WRITE); + + if (*current_offset >= limit) + break; + } return addr; } +static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, + unsigned long size, unsigned level); + static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, - __u32 page_shift, __u64 window_size, struct iommu_table *tbl) + __u32 page_shift, __u64 window_size, __u32 levels, + struct iommu_table *tbl) { void *addr; + unsigned long offset = 0, level_shift; const unsigned window_shift = ilog2(window_size); unsigned entries_shift = window_shift - page_shift; unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT); const unsigned long tce_table_size = 1UL << table_shift; + if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS)) + return -EINVAL; + if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size)) return -EINVAL; + /* Adjust direct table size from window_size and levels */ + entries_shift = (entries_shift + levels - 1) / levels; + level_shift = entries_shift + 3; + level_shift = max_t(unsigned, level_shift, PAGE_SHIFT); + /* Allocate TCE table */ - addr = pnv_pci_ioda2_table_do_alloc_pages(nid, table_shift); + addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, + levels, tce_table_size, &offset); + + /* addr==NULL means that the first level allocation failed */ if (!addr) return -ENOMEM; + /* + * First level was allocated but some lower level failed as + * we did not allocate as much as we wanted, + * release partially allocated table. + */ + if (offset < tce_table_size) { + pnv_pci_ioda2_table_do_free_pages(addr, + 1ULL << (level_shift - 3), levels - 1); + return -ENOMEM; + } + /* Setup linux iommu table */ pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset, page_shift); + tbl->it_level_size = 1ULL << (level_shift - 3); + tbl->it_indirect_levels = levels - 1; pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n", window_size, tce_table_size, bus_offset); @@ -2117,12 +2175,40 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, return 0; } +static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, + unsigned long size, unsigned level) +{ + const unsigned long addr_ul = (unsigned long) addr & + ~(TCE_PCI_READ | TCE_PCI_WRITE); + + if (level) { + long i; + u64 *tmp = (u64 *) addr_ul; + + for (i = 0; i < size; ++i) { + unsigned long hpa = be64_to_cpu(tmp[i]); + + if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE))) + continue; + + pnv_pci_ioda2_table_do_free_pages(__va(hpa), size, + level - 1); + } + } + + free_pages(addr_ul, get_order(size << 3)); +} + static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) { + const unsigned long size = tbl->it_indirect_levels ? + tbl->it_level_size : tbl->it_size; + if (!tbl->it_size) return; - free_pages(tbl->it_base, get_order(tbl->it_size << 3)); + pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size, + tbl->it_indirect_levels); } static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, @@ -2150,7 +2236,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, /* Setup linux iommu table */ rc = pnv_pci_ioda2_table_alloc_pages(pe->phb->hose->node, - 0, IOMMU_PAGE_SHIFT_4K, phb->ioda.m32_pci_base, tbl); + 0, IOMMU_PAGE_SHIFT_4K, phb->ioda.m32_pci_base, + POWERNV_IOMMU_DEFAULT_LEVELS, tbl); if (rc) { pe_err(pe, "Failed to create 32-bit TCE table, err %ld", rc); goto fail; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index d465b9c32388..765d8ed558d0 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -575,6 +575,19 @@ struct pci_ops pnv_pci_ops = { static __be64 *pnv_tce(struct iommu_table *tbl, long idx) { __be64 *tmp = ((__be64 *)tbl->it_base); + int level = tbl->it_indirect_levels; + const long shift = ilog2(tbl->it_level_size); + unsigned long mask = (tbl->it_level_size - 1) << (level * shift); + + while (level) { + int n = (idx & mask) >> (level * shift); + unsigned long tce = be64_to_cpu(tmp[n]); + + tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE)); + idx &= ~mask; + mask >>= shift; + --level; + } return tmp + idx; } -- cgit v1.2.3 From 4793d65d1ac056d92b594d05c6aab3c040d913dd Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:20 +1000 Subject: vfio: powerpc/spapr: powerpc/powernv/ioda: Define and implement DMA windows API This extends iommu_table_group_ops by a set of callbacks to support dynamic DMA windows management. create_table() creates a TCE table with specific parameters. it receives iommu_table_group to know nodeid in order to allocate TCE table memory closer to the PHB. The exact format of allocated multi-level table might be also specific to the PHB model (not the case now though). This callback calculated the DMA window offset on a PCI bus from @num and stores it in a just created table. set_window() sets the window at specified TVT index + @num on PHB. unset_window() unsets the window from specified TVT. This adds a free() callback to iommu_table_ops to free the memory (potentially a tree of tables) allocated for the TCE table. create_table() and free() are supposed to be called once per VFIO container and set_window()/unset_window() are supposed to be called for every group in a container. This adds IOMMU capabilities to iommu_table_group such as default 32bit window parameters and others. This makes use of new values in vfio_iommu_spapr_tce. IODA1/P5IOC2 do not support DDW so they do not advertise pagemasks to the userspace. Signed-off-by: Alexey Kardashevskiy Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 19 ++++++ arch/powerpc/platforms/powernv/pci-ioda.c | 96 ++++++++++++++++++++++++++--- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 7 ++- drivers/vfio/vfio_iommu_spapr_tce.c | 19 +++--- 4 files changed, 124 insertions(+), 17 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 706cfc0b1190..e5541759a37d 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -70,6 +70,7 @@ struct iommu_table_ops { /* get() returns a physical address */ unsigned long (*get)(struct iommu_table *tbl, long index); void (*flush)(struct iommu_table *tbl); + void (*free)(struct iommu_table *tbl); }; /* These are used by VIO */ @@ -146,6 +147,17 @@ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, struct iommu_table_group; struct iommu_table_group_ops { + long (*create_table)(struct iommu_table_group *table_group, + int num, + __u32 page_shift, + __u64 window_size, + __u32 levels, + struct iommu_table **ptbl); + long (*set_window)(struct iommu_table_group *table_group, + int num, + struct iommu_table *tblnew); + long (*unset_window)(struct iommu_table_group *table_group, + int num); /* Switch ownership from platform code to external user (e.g. VFIO) */ void (*take_ownership)(struct iommu_table_group *table_group); /* Switch ownership from external user (e.g. VFIO) back to core */ @@ -159,6 +171,13 @@ struct iommu_table_group_link { }; struct iommu_table_group { + /* IOMMU properties */ + __u32 tce32_start; + __u32 tce32_size; + __u64 pgsizes; /* Bitmap of supported page sizes */ + __u32 max_dynamic_windows_supported; + __u32 max_levels; + struct iommu_group *group; struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; struct iommu_table_group_ops *ops; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 21d8c61f0b03..ee3e643c16bb 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -1870,6 +1871,12 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false); } +static void pnv_ioda2_table_free(struct iommu_table *tbl) +{ + pnv_pci_ioda2_table_free_pages(tbl); + iommu_free_table(tbl, "pnv"); +} + static struct iommu_table_ops pnv_ioda2_iommu_ops = { .set = pnv_ioda2_tce_build, #ifdef CONFIG_IOMMU_API @@ -1877,6 +1884,7 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = { #endif .clear = pnv_ioda2_tce_free, .get = pnv_tce_get, + .free = pnv_ioda2_table_free, }; static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, @@ -1947,6 +1955,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, TCE_PCI_SWINV_PAIR); tbl->it_ops = &pnv_ioda1_iommu_ops; + pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift; + pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift; iommu_init_table(tbl, phb->hose->node); if (pe->flags & PNV_IODA_PE_DEV) { @@ -1985,7 +1995,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; const __u64 win_size = tbl->it_size << tbl->it_page_shift; - pe_info(pe, "Setting up window %llx..%llx pg=%x\n", + pe_info(pe, "Setting up window#%d %llx..%llx pg=%x\n", num, start_addr, start_addr + win_size - 1, IOMMU_PAGE_SIZE(tbl)); @@ -1995,7 +2005,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, */ rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, - pe->pe_number << 1, + (pe->pe_number << 1) + num, tbl->it_indirect_levels + 1, __pa(tbl->it_base), size << 3, @@ -2040,7 +2050,67 @@ static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) pe->tce_bypass_enabled = enable; } +static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, + __u32 page_shift, __u64 window_size, __u32 levels, + struct iommu_table *tbl); + +static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, + int num, __u32 page_shift, __u64 window_size, __u32 levels, + struct iommu_table **ptbl) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + int nid = pe->phb->hose->node; + __u64 bus_offset = num ? pe->tce_bypass_base : table_group->tce32_start; + long ret; + struct iommu_table *tbl; + + tbl = pnv_pci_table_alloc(nid); + if (!tbl) + return -ENOMEM; + + ret = pnv_pci_ioda2_table_alloc_pages(nid, + bus_offset, page_shift, window_size, + levels, tbl); + if (ret) { + iommu_free_table(tbl, "pnv"); + return ret; + } + + tbl->it_ops = &pnv_ioda2_iommu_ops; + if (pe->phb->ioda.tce_inval_reg) + tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); + + *ptbl = tbl; + + return 0; +} + #ifdef CONFIG_IOMMU_API +static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, + int num) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + struct pnv_phb *phb = pe->phb; + long ret; + + pe_info(pe, "Removing DMA window #%d\n", num); + + ret = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, + (pe->pe_number << 1) + num, + 0/* levels */, 0/* table address */, + 0/* table size */, 0/* page size */); + if (ret) + pe_warn(pe, "Unmapping failed, ret = %ld\n", ret); + else + pnv_pci_ioda2_tce_invalidate_entire(pe); + + pnv_pci_unlink_table_and_group(table_group->tables[num], table_group); + + return ret; +} + static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) { struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, @@ -2060,6 +2130,9 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) } static struct iommu_table_group_ops pnv_pci_ioda2_ops = { + .create_table = pnv_pci_ioda2_create_table, + .set_window = pnv_pci_ioda2_set_window, + .unset_window = pnv_pci_ioda2_unset_window, .take_ownership = pnv_ioda2_take_ownership, .release_ownership = pnv_ioda2_release_ownership, }; @@ -2214,7 +2287,7 @@ static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { - struct iommu_table *tbl; + struct iommu_table *tbl = NULL; int64_t rc; /* We shouldn't already have a 32-bit DMA associated */ @@ -2224,10 +2297,8 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, /* TVE #1 is selected by PCI address bit 59 */ pe->tce_bypass_base = 1ull << 59; - tbl = pnv_pci_table_alloc(phb->hose->node); iommu_register_group(&pe->table_group, phb->hose->global_number, pe->pe_number); - pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); /* The PE will reserve all possible 32-bits space */ pe->tce32_seg = 0; @@ -2235,13 +2306,22 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, phb->ioda.m32_pci_base); /* Setup linux iommu table */ - rc = pnv_pci_ioda2_table_alloc_pages(pe->phb->hose->node, - 0, IOMMU_PAGE_SHIFT_4K, phb->ioda.m32_pci_base, - POWERNV_IOMMU_DEFAULT_LEVELS, tbl); + pe->table_group.tce32_start = 0; + pe->table_group.tce32_size = phb->ioda.m32_pci_base; + pe->table_group.max_dynamic_windows_supported = + IOMMU_TABLE_GROUP_MAX_TABLES; + pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS; + pe->table_group.pgsizes = SZ_4K | SZ_64K | SZ_16M; + + rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, + IOMMU_PAGE_SHIFT_4K, + pe->table_group.tce32_size, + POWERNV_IOMMU_DEFAULT_LEVELS, &tbl); if (rc) { pe_err(pe, "Failed to create 32-bit TCE table, err %ld", rc); goto fail; } + pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); tbl->it_ops = &pnv_ioda2_iommu_ops; iommu_init_table(tbl, phb->hose->node); diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index eaec85b63b34..f2bdfea3b68d 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -127,6 +127,8 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, u64 phb_id; int64_t rc; static int primary = 1; + struct iommu_table_group *table_group; + struct iommu_table *tbl; pr_info(" Initializing p5ioc2 PHB %s\n", np->full_name); @@ -201,7 +203,10 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, * hotplug or SRIOV on P5IOC2 and therefore iommu_free_table() * should not be called for phb->p5ioc2.table_group.tables[0] ever. */ - phb->p5ioc2.table_group.tables[0] = &phb->p5ioc2.iommu_table; + tbl = phb->p5ioc2.table_group.tables[0] = &phb->p5ioc2.iommu_table; + table_group = &phb->p5ioc2.table_group; + table_group->tce32_start = tbl->it_offset << tbl->it_page_shift; + table_group->tce32_size = tbl->it_size << tbl->it_page_shift; } void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index a9e2d13c03c0..6d919eb4251f 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -135,7 +135,6 @@ static int tce_iommu_enable(struct tce_container *container) { int ret = 0; unsigned long locked; - struct iommu_table *tbl; struct iommu_table_group *table_group; if (!container->grp) @@ -171,13 +170,19 @@ static int tce_iommu_enable(struct tce_container *container) * this is that we cannot tell here the amount of RAM used by the guest * as this information is only available from KVM and VFIO is * KVM agnostic. + * + * So we do not allow enabling a container without a group attached + * as there is no way to know how much we should increment + * the locked_vm counter. */ table_group = iommu_group_get_iommudata(container->grp); if (!table_group) return -ENODEV; - tbl = table_group->tables[0]; - locked = (tbl->it_size << tbl->it_page_shift) >> PAGE_SHIFT; + if (!table_group->tce32_size) + return -EPERM; + + locked = table_group->tce32_size >> PAGE_SHIFT; ret = try_increment_locked_vm(locked); if (ret) return ret; @@ -350,7 +355,6 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { struct vfio_iommu_spapr_tce_info info; - struct iommu_table *tbl; struct iommu_table_group *table_group; if (WARN_ON(!container->grp)) @@ -358,8 +362,7 @@ static long tce_iommu_ioctl(void *iommu_data, table_group = iommu_group_get_iommudata(container->grp); - tbl = table_group->tables[0]; - if (WARN_ON_ONCE(!tbl)) + if (!table_group) return -ENXIO; minsz = offsetofend(struct vfio_iommu_spapr_tce_info, @@ -371,8 +374,8 @@ static long tce_iommu_ioctl(void *iommu_data, if (info.argsz < minsz) return -EINVAL; - info.dma32_window_start = tbl->it_offset << tbl->it_page_shift; - info.dma32_window_size = tbl->it_size << tbl->it_page_shift; + info.dma32_window_start = table_group->tce32_start; + info.dma32_window_size = table_group->tce32_size; info.flags = 0; if (copy_to_user((void __user *)arg, &info, minsz)) -- cgit v1.2.3 From c035e37b58c75ca216bfd1d5de3c1080ac0022b9 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:21 +1000 Subject: powerpc/powernv/ioda2: Use new helpers to do proper cleanup on PE release The existing code programmed TVT#0 with some address and then immediately released that memory. This makes use of pnv_pci_ioda2_unset_window() and pnv_pci_ioda2_set_bypass() which do correct resource release and TVT update. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index ee3e643c16bb..510bb9c8a9e1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1288,34 +1288,21 @@ m64_failed: return -EBUSY; } +static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, + int num); +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); + static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe) { - struct pci_bus *bus; - struct pci_controller *hose; - struct pnv_phb *phb; struct iommu_table *tbl; - unsigned long addr; int64_t rc; - bus = dev->bus; - hose = pci_bus_to_host(bus); - phb = hose->private_data; tbl = pe->table_group.tables[0]; - addr = tbl->it_base; - - opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, - pe->pe_number << 1, 1, __pa(addr), - 0, 0x1000); - - rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, - pe->pe_number, - (pe->pe_number << 1) + 1, - pe->tce_bypass_base, - 0); + rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); if (rc) pe_warn(pe, "OPAL error %ld release DMA window\n", rc); - pnv_pci_unlink_table_and_group(tbl, &pe->table_group); + pnv_pci_ioda2_set_bypass(pe, false); if (pe->table_group.group) { iommu_group_put(pe->table_group.group); BUG_ON(pe->table_group.group); -- cgit v1.2.3 From 0054719386d96984153ad31d714a8be4ec7eba80 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:22 +1000 Subject: powerpc/iommu/ioda2: Add get_table_size() to calculate the size of future table This adds a way for the IOMMU user to know how much a new table will use so it can be accounted in the locked_vm limit before allocation happens. This stores the allocated table size in pnv_pci_ioda2_get_table_size() so the locked_vm counter can be updated correctly when a table is being disposed. This defines an iommu_table_group_ops callback to let VFIO know how much memory will be locked if a table is created. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 5 +++++ arch/powerpc/platforms/powernv/pci-ioda.c | 34 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index e5541759a37d..9d3749287689 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -99,6 +99,7 @@ struct iommu_table { unsigned long it_size; /* Size of iommu table in entries */ unsigned long it_indirect_levels; unsigned long it_level_size; + unsigned long it_allocated_size; unsigned long it_offset; /* Offset into global table */ unsigned long it_base; /* mapped address of tce table */ unsigned long it_index; /* which iommu table this is */ @@ -147,6 +148,10 @@ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, struct iommu_table_group; struct iommu_table_group_ops { + unsigned long (*get_table_size)( + __u32 page_shift, + __u64 window_size, + __u32 levels); long (*create_table)(struct iommu_table_group *table_group, int num, __u32 page_shift, diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 510bb9c8a9e1..a7e098dba23d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2074,6 +2074,38 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, } #ifdef CONFIG_IOMMU_API +static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, + __u64 window_size, __u32 levels) +{ + unsigned long bytes = 0; + const unsigned window_shift = ilog2(window_size); + unsigned entries_shift = window_shift - page_shift; + unsigned table_shift = entries_shift + 3; + unsigned long tce_table_size = max(0x1000UL, 1UL << table_shift); + unsigned long direct_table_size; + + if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) || + (window_size > memory_hotplug_max()) || + !is_power_of_2(window_size)) + return 0; + + /* Calculate a direct table size from window_size and levels */ + entries_shift = (entries_shift + levels - 1) / levels; + table_shift = entries_shift + 3; + table_shift = max_t(unsigned, table_shift, PAGE_SHIFT); + direct_table_size = 1UL << table_shift; + + for ( ; levels; --levels) { + bytes += _ALIGN_UP(tce_table_size, direct_table_size); + + tce_table_size /= direct_table_size; + tce_table_size <<= 3; + tce_table_size = _ALIGN_UP(tce_table_size, direct_table_size); + } + + return bytes; +} + static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, int num) { @@ -2117,6 +2149,7 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) } static struct iommu_table_group_ops pnv_pci_ioda2_ops = { + .get_table_size = pnv_pci_ioda2_get_table_size, .create_table = pnv_pci_ioda2_create_table, .set_window = pnv_pci_ioda2_set_window, .unset_window = pnv_pci_ioda2_unset_window, @@ -2228,6 +2261,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, page_shift); tbl->it_level_size = 1ULL << (level_shift - 3); tbl->it_indirect_levels = levels - 1; + tbl->it_allocated_size = offset; pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n", window_size, tce_table_size, bus_offset); -- cgit v1.2.3 From 46d3e1e16294c587a74093b1f5474c1b33b72381 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:23 +1000 Subject: vfio: powerpc/spapr: powerpc/powernv/ioda2: Use DMA windows API in ownership control Before the IOMMU user (VFIO) would take control over the IOMMU table belonging to a specific IOMMU group. This approach did not allow sharing tables between IOMMU groups attached to the same container. This introduces a new IOMMU ownership flavour when the user can not just control the existing IOMMU table but remove/create tables on demand. If an IOMMU implements take/release_ownership() callbacks, this lets the user have full control over the IOMMU group. When the ownership is taken, the platform code removes all the windows so the caller must create them. Before returning the ownership back to the platform code, VFIO unprograms and removes all the tables it created. This changes IODA2's onwership handler to remove the existing table rather than manipulating with the existing one. From now on, iommu_take_ownership() and iommu_release_ownership() are only called from the vfio_iommu_spapr_tce driver. Old-style ownership is still supported allowing VFIO to run on older P5IOC2 and IODA IO controllers. No change in userspace-visible behaviour is expected. Since it recreates TCE tables on each ownership change, related kernel traces will appear more often. This adds a pnv_pci_ioda2_setup_default_config() which is called when PE is being configured at boot time and when the ownership is passed from VFIO to the platform code. Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 101 ++++++++++++++++-------------- drivers/vfio/vfio_iommu_spapr_tce.c | 88 +++++++++++++++++++++++++- 2 files changed, 141 insertions(+), 48 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index a7e098dba23d..b9f0f430e249 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2073,6 +2073,49 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, return 0; } +static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) +{ + struct iommu_table *tbl = NULL; + long rc; + + rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, + IOMMU_PAGE_SHIFT_4K, + pe->table_group.tce32_size, + POWERNV_IOMMU_DEFAULT_LEVELS, &tbl); + if (rc) { + pe_err(pe, "Failed to create 32-bit TCE table, err %ld", + rc); + return rc; + } + + iommu_init_table(tbl, pe->phb->hose->node); + + rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); + if (rc) { + pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", + rc); + pnv_ioda2_table_free(tbl); + return rc; + } + + if (!pnv_iommu_bypass_disabled) + pnv_pci_ioda2_set_bypass(pe, true); + + /* OPAL variant of PHB3 invalidated TCEs */ + if (pe->phb->ioda.tce_inval_reg) + tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); + + /* + * Setting table base here only for carrying iommu_group + * further down to let iommu_add_device() do the job. + * pnv_pci_ioda_dma_dev_setup will override it later anyway. + */ + if (pe->flags & PNV_IODA_PE_DEV) + set_iommu_table_base(&pe->pdev->dev, tbl); + + return 0; +} + #ifdef CONFIG_IOMMU_API static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, __u64 window_size, __u32 levels) @@ -2134,9 +2177,12 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) { struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, table_group); + /* Store @tbl as pnv_pci_ioda2_unset_window() resets it */ + struct iommu_table *tbl = pe->table_group.tables[0]; - iommu_take_ownership(table_group->tables[0]); pnv_pci_ioda2_set_bypass(pe, false); + pnv_pci_ioda2_unset_window(&pe->table_group, 0); + pnv_ioda2_table_free(tbl); } static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) @@ -2144,8 +2190,7 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, table_group); - iommu_release_ownership(table_group->tables[0]); - pnv_pci_ioda2_set_bypass(pe, true); + pnv_pci_ioda2_setup_default_config(pe); } static struct iommu_table_group_ops pnv_pci_ioda2_ops = { @@ -2308,7 +2353,6 @@ static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { - struct iommu_table *tbl = NULL; int64_t rc; /* We shouldn't already have a 32-bit DMA associated */ @@ -2333,58 +2377,21 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, IOMMU_TABLE_GROUP_MAX_TABLES; pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS; pe->table_group.pgsizes = SZ_4K | SZ_64K | SZ_16M; - - rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, - IOMMU_PAGE_SHIFT_4K, - pe->table_group.tce32_size, - POWERNV_IOMMU_DEFAULT_LEVELS, &tbl); - if (rc) { - pe_err(pe, "Failed to create 32-bit TCE table, err %ld", rc); - goto fail; - } - pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); - - tbl->it_ops = &pnv_ioda2_iommu_ops; - iommu_init_table(tbl, phb->hose->node); #ifdef CONFIG_IOMMU_API pe->table_group.ops = &pnv_pci_ioda2_ops; #endif - rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); + rc = pnv_pci_ioda2_setup_default_config(pe); if (rc) { - pe_err(pe, "Failed to configure 32-bit TCE table," - " err %ld\n", rc); - goto fail; + if (pe->tce32_seg >= 0) + pe->tce32_seg = -1; + return; } - /* OPAL variant of PHB3 invalidated TCEs */ - if (phb->ioda.tce_inval_reg) - tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); - - if (pe->flags & PNV_IODA_PE_DEV) { - /* - * Setting table base here only for carrying iommu_group - * further down to let iommu_add_device() do the job. - * pnv_pci_ioda_dma_dev_setup will override it later anyway. - */ - set_iommu_table_base(&pe->pdev->dev, tbl); + if (pe->flags & PNV_IODA_PE_DEV) iommu_add_device(&pe->pdev->dev); - } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) + else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) pnv_ioda_setup_bus_dma(pe, pe->pbus); - - /* Also create a bypass window */ - if (!pnv_iommu_bypass_disabled) - pnv_pci_ioda2_set_bypass(pe, true); - - return; -fail: - if (pe->tce32_seg >= 0) - pe->tce32_seg = -1; - if (tbl) { - pnv_pci_ioda2_table_free_pages(tbl); - pnv_pci_unlink_table_and_group(tbl, &pe->table_group); - iommu_free_table(tbl, "pnv"); - } } static void pnv_ioda_setup_dma(struct pnv_phb *phb) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 6d919eb4251f..203caacf2242 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -333,6 +333,45 @@ static long tce_iommu_build(struct tce_container *container, return ret; } +static long tce_iommu_create_table(struct tce_container *container, + struct iommu_table_group *table_group, + int num, + __u32 page_shift, + __u64 window_size, + __u32 levels, + struct iommu_table **ptbl) +{ + long ret, table_size; + + table_size = table_group->ops->get_table_size(page_shift, window_size, + levels); + if (!table_size) + return -EINVAL; + + ret = try_increment_locked_vm(table_size >> PAGE_SHIFT); + if (ret) + return ret; + + ret = table_group->ops->create_table(table_group, num, + page_shift, window_size, levels, ptbl); + + WARN_ON(!ret && !(*ptbl)->it_ops->free); + WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size)); + + if (ret) + decrement_locked_vm(table_size >> PAGE_SHIFT); + + return ret; +} + +static void tce_iommu_free_table(struct iommu_table *tbl) +{ + unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; + + tbl->it_ops->free(tbl); + decrement_locked_vm(pages); +} + static long tce_iommu_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -546,15 +585,62 @@ static int tce_iommu_take_ownership(struct tce_container *container, static void tce_iommu_release_ownership_ddw(struct tce_container *container, struct iommu_table_group *table_group) { + long i; + + if (!table_group->ops->unset_window) { + WARN_ON_ONCE(1); + return; + } + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + /* Store table pointer as unset_window resets it */ + struct iommu_table *tbl = table_group->tables[i]; + + if (!tbl) + continue; + + table_group->ops->unset_window(table_group, i); + tce_iommu_clear(container, tbl, + tbl->it_offset, tbl->it_size); + tce_iommu_free_table(tbl); + } + table_group->ops->release_ownership(table_group); } static long tce_iommu_take_ownership_ddw(struct tce_container *container, struct iommu_table_group *table_group) { + long ret; + struct iommu_table *tbl = NULL; + + if (!table_group->ops->create_table || !table_group->ops->set_window || + !table_group->ops->release_ownership) { + WARN_ON_ONCE(1); + return -EFAULT; + } + table_group->ops->take_ownership(table_group); - return 0; + ret = tce_iommu_create_table(container, + table_group, + 0, /* window number */ + IOMMU_PAGE_SHIFT_4K, + table_group->tce32_size, + 1, /* default levels */ + &tbl); + if (!ret) { + ret = table_group->ops->set_window(table_group, 0, tbl); + if (ret) + tce_iommu_free_table(tbl); + else + table_group->tables[0] = tbl; + } + + if (ret) + table_group->ops->release_ownership(table_group); + + return ret; } static int tce_iommu_attach_group(void *iommu_data, -- cgit v1.2.3 From 15b244a88e1b2895605be4300b40b575345bcf50 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:24 +1000 Subject: powerpc/mmu: Add userspace-to-physical addresses translation cache We are adding support for DMA memory pre-registration to be used in conjunction with VFIO. The idea is that the userspace which is going to run a guest may want to pre-register a user space memory region so it all gets pinned once and never goes away. Having this done, a hypervisor will not have to pin/unpin pages on every DMA map/unmap request. This is going to help with multiple pinning of the same memory. Another use of it is in-kernel real mode (mmu off) acceleration of DMA requests where real time translation of guest physical to host physical addresses is non-trivial and may fail as linux ptes may be temporarily invalid. Also, having cached host physical addresses (compared to just pinning at the start and then walking the page table again on every H_PUT_TCE), we can be sure that the addresses which we put into TCE table are the ones we already pinned. This adds a list of memory regions to mm_context_t. Each region consists of a header and a list of physical addresses. This adds API to: 1. register/unregister memory regions; 2. do final cleanup (which puts all pre-registered pages); 3. do userspace to physical address translation; 4. manage usage counters; multiple registration of the same memory is allowed (once per container). This implements 2 counters per registered memory region: - @mapped: incremented on every DMA mapping; decremented on unmapping; initialized to 1 when a region is just registered; once it becomes zero, no more mappings allowe; - @used: incremented on every "register" ioctl; decremented on "unregister"; unregistration is allowed for DMA mapped regions unless it is the very last reference. For the very last reference this checks that the region is still mapped and returns -EBUSY so the userspace gets to know that memory is still pinned and unregistration needs to be retried; @used remains 1. Host physical addresses are stored in vmalloc'ed array. In order to access these in the real mode (mmu off), there is a real_vmalloc_addr() helper. In-kernel acceleration patchset will move it from KVM to MMU code. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu-hash64.h | 3 + arch/powerpc/include/asm/mmu_context.h | 18 ++ arch/powerpc/kernel/setup_64.c | 3 + arch/powerpc/mm/Makefile | 1 + arch/powerpc/mm/mmu_context_hash64.c | 6 + arch/powerpc/mm/mmu_context_iommu.c | 316 +++++++++++++++++++++++++++++++++ 6 files changed, 347 insertions(+) create mode 100644 arch/powerpc/mm/mmu_context_iommu.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 1da6a81ce541..a82f5347540a 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -536,6 +536,9 @@ typedef struct { /* for 4K PTE fragment support */ void *pte_frag; #endif +#ifdef CONFIG_SPAPR_TCE_IOMMU + struct list_head iommu_group_mem_list; +#endif } mm_context_t; diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 73382eba02dc..3e5184210d9b 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -16,6 +16,24 @@ */ extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm); extern void destroy_context(struct mm_struct *mm); +#ifdef CONFIG_SPAPR_TCE_IOMMU +struct mm_iommu_table_group_mem_t; + +extern bool mm_iommu_preregistered(void); +extern long mm_iommu_get(unsigned long ua, unsigned long entries, + struct mm_iommu_table_group_mem_t **pmem); +extern long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem); +extern void mm_iommu_init(mm_context_t *ctx); +extern void mm_iommu_cleanup(mm_context_t *ctx); +extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, + unsigned long size); +extern struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, + unsigned long entries); +extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, + unsigned long ua, unsigned long *hpa); +extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); +extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); +#endif extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next); extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 9fe3dcdbfca7..bdcbb716f4d6 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -686,6 +686,9 @@ void __init setup_arch(char **cmdline_p) init_mm.brk = klimit; #ifdef CONFIG_PPC_64K_PAGES init_mm.context.pte_frag = NULL; +#endif +#ifdef CONFIG_SPAPR_TCE_IOMMU + mm_iommu_init(&init_mm.context); #endif irqstack_early_init(); exc_lvl_early_init(); diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 9c8770b5f96f..3eb73a38220d 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -36,3 +36,4 @@ obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o +obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index 178876aef40f..4e4efbc2658e 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -88,6 +88,9 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) #ifdef CONFIG_PPC_64K_PAGES mm->context.pte_frag = NULL; +#endif +#ifdef CONFIG_SPAPR_TCE_IOMMU + mm_iommu_init(&mm->context); #endif return 0; } @@ -132,6 +135,9 @@ static inline void destroy_pagetable_page(struct mm_struct *mm) void destroy_context(struct mm_struct *mm) { +#ifdef CONFIG_SPAPR_TCE_IOMMU + mm_iommu_cleanup(&mm->context); +#endif #ifdef CONFIG_PPC_ICSWX drop_cop(mm->context.acop, mm); diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c new file mode 100644 index 000000000000..da6a2168ae9e --- /dev/null +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -0,0 +1,316 @@ +/* + * IOMMU helpers in MMU context. + * + * Copyright (C) 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include + +static DEFINE_MUTEX(mem_list_mutex); + +struct mm_iommu_table_group_mem_t { + struct list_head next; + struct rcu_head rcu; + unsigned long used; + atomic64_t mapped; + u64 ua; /* userspace address */ + u64 entries; /* number of entries in hpas[] */ + u64 *hpas; /* vmalloc'ed */ +}; + +static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, + unsigned long npages, bool incr) +{ + long ret = 0, locked, lock_limit; + + if (!npages) + return 0; + + down_write(&mm->mmap_sem); + + if (incr) { + locked = mm->locked_vm + npages; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) + ret = -ENOMEM; + else + mm->locked_vm += npages; + } else { + if (WARN_ON_ONCE(npages > mm->locked_vm)) + npages = mm->locked_vm; + mm->locked_vm -= npages; + } + + pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n", + current->pid, + incr ? '+' : '-', + npages << PAGE_SHIFT, + mm->locked_vm << PAGE_SHIFT, + rlimit(RLIMIT_MEMLOCK)); + up_write(&mm->mmap_sem); + + return ret; +} + +bool mm_iommu_preregistered(void) +{ + if (!current || !current->mm) + return false; + + return !list_empty(¤t->mm->context.iommu_group_mem_list); +} +EXPORT_SYMBOL_GPL(mm_iommu_preregistered); + +long mm_iommu_get(unsigned long ua, unsigned long entries, + struct mm_iommu_table_group_mem_t **pmem) +{ + struct mm_iommu_table_group_mem_t *mem; + long i, j, ret = 0, locked_entries = 0; + struct page *page = NULL; + + if (!current || !current->mm) + return -ESRCH; /* process exited */ + + mutex_lock(&mem_list_mutex); + + list_for_each_entry_rcu(mem, ¤t->mm->context.iommu_group_mem_list, + next) { + if ((mem->ua == ua) && (mem->entries == entries)) { + ++mem->used; + *pmem = mem; + goto unlock_exit; + } + + /* Overlap? */ + if ((mem->ua < (ua + (entries << PAGE_SHIFT))) && + (ua < (mem->ua + + (mem->entries << PAGE_SHIFT)))) { + ret = -EINVAL; + goto unlock_exit; + } + + } + + ret = mm_iommu_adjust_locked_vm(current->mm, entries, true); + if (ret) + goto unlock_exit; + + locked_entries = entries; + + mem = kzalloc(sizeof(*mem), GFP_KERNEL); + if (!mem) { + ret = -ENOMEM; + goto unlock_exit; + } + + mem->hpas = vzalloc(entries * sizeof(mem->hpas[0])); + if (!mem->hpas) { + kfree(mem); + ret = -ENOMEM; + goto unlock_exit; + } + + for (i = 0; i < entries; ++i) { + if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT), + 1/* pages */, 1/* iswrite */, &page)) { + for (j = 0; j < i; ++j) + put_page(pfn_to_page( + mem->hpas[j] >> PAGE_SHIFT)); + vfree(mem->hpas); + kfree(mem); + ret = -EFAULT; + goto unlock_exit; + } + + mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; + } + + atomic64_set(&mem->mapped, 1); + mem->used = 1; + mem->ua = ua; + mem->entries = entries; + *pmem = mem; + + list_add_rcu(&mem->next, ¤t->mm->context.iommu_group_mem_list); + +unlock_exit: + if (locked_entries && ret) + mm_iommu_adjust_locked_vm(current->mm, locked_entries, false); + + mutex_unlock(&mem_list_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_get); + +static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem) +{ + long i; + struct page *page = NULL; + + for (i = 0; i < mem->entries; ++i) { + if (!mem->hpas[i]) + continue; + + page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT); + if (!page) + continue; + + put_page(page); + mem->hpas[i] = 0; + } +} + +static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem) +{ + + mm_iommu_unpin(mem); + vfree(mem->hpas); + kfree(mem); +} + +static void mm_iommu_free(struct rcu_head *head) +{ + struct mm_iommu_table_group_mem_t *mem = container_of(head, + struct mm_iommu_table_group_mem_t, rcu); + + mm_iommu_do_free(mem); +} + +static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) +{ + list_del_rcu(&mem->next); + mm_iommu_adjust_locked_vm(current->mm, mem->entries, false); + call_rcu(&mem->rcu, mm_iommu_free); +} + +long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem) +{ + long ret = 0; + + if (!current || !current->mm) + return -ESRCH; /* process exited */ + + mutex_lock(&mem_list_mutex); + + if (mem->used == 0) { + ret = -ENOENT; + goto unlock_exit; + } + + --mem->used; + /* There are still users, exit */ + if (mem->used) + goto unlock_exit; + + /* Are there still mappings? */ + if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) { + ++mem->used; + ret = -EBUSY; + goto unlock_exit; + } + + /* @mapped became 0 so now mappings are disabled, release the region */ + mm_iommu_release(mem); + +unlock_exit: + mutex_unlock(&mem_list_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_put); + +struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, + unsigned long size) +{ + struct mm_iommu_table_group_mem_t *mem, *ret = NULL; + + list_for_each_entry_rcu(mem, + ¤t->mm->context.iommu_group_mem_list, + next) { + if ((mem->ua <= ua) && + (ua + size <= mem->ua + + (mem->entries << PAGE_SHIFT))) { + ret = mem; + break; + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_lookup); + +struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, + unsigned long entries) +{ + struct mm_iommu_table_group_mem_t *mem, *ret = NULL; + + list_for_each_entry_rcu(mem, + ¤t->mm->context.iommu_group_mem_list, + next) { + if ((mem->ua == ua) && (mem->entries == entries)) { + ret = mem; + break; + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_find); + +long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, + unsigned long ua, unsigned long *hpa) +{ + const long entry = (ua - mem->ua) >> PAGE_SHIFT; + u64 *va = &mem->hpas[entry]; + + if (entry >= mem->entries) + return -EFAULT; + + *hpa = *va | (ua & ~PAGE_MASK); + + return 0; +} +EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); + +long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) +{ + if (atomic64_inc_not_zero(&mem->mapped)) + return 0; + + /* Last mm_iommu_put() has been called, no more mappings allowed() */ + return -ENXIO; +} +EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc); + +void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem) +{ + atomic64_add_unless(&mem->mapped, -1, 1); +} +EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec); + +void mm_iommu_init(mm_context_t *ctx) +{ + INIT_LIST_HEAD_RCU(&ctx->iommu_group_mem_list); +} + +void mm_iommu_cleanup(mm_context_t *ctx) +{ + struct mm_iommu_table_group_mem_t *mem, *tmp; + + list_for_each_entry_safe(mem, tmp, &ctx->iommu_group_mem_list, next) { + list_del_rcu(&mem->next); + mm_iommu_do_free(mem); + } +} -- cgit v1.2.3 From 2157e7b82f3b81f57bd80cd67cef09ef26e5f74c Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:25 +1000 Subject: vfio: powerpc/spapr: Register memory and define IOMMU v2 The existing implementation accounts the whole DMA window in the locked_vm counter. This is going to be worse with multiple containers and huge DMA windows. Also, real-time accounting would requite additional tracking of accounted pages due to the page size difference - IOMMU uses 4K pages and system uses 4K or 64K pages. Another issue is that actual pages pinning/unpinning happens on every DMA map/unmap request. This does not affect the performance much now as we spend way too much time now on switching context between guest/userspace/host but this will start to matter when we add in-kernel DMA map/unmap acceleration. This introduces a new IOMMU type for SPAPR - VFIO_SPAPR_TCE_v2_IOMMU. New IOMMU deprecates VFIO_IOMMU_ENABLE/VFIO_IOMMU_DISABLE and introduces 2 new ioctls to register/unregister DMA memory - VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - which receive user space address and size of a memory region which needs to be pinned/unpinned and counted in locked_vm. New IOMMU splits physical pages pinning and TCE table update into 2 different operations. It requires: 1) guest pages to be registered first 2) consequent map/unmap requests to work only with pre-registered memory. For the default single window case this means that the entire guest (instead of 2GB) needs to be pinned before using VFIO. When a huge DMA window is added, no additional pinning will be required, otherwise it would be guest RAM + 2GB. The new memory registration ioctls are not supported by VFIO_SPAPR_TCE_IOMMU. Dynamic DMA window and in-kernel acceleration will require memory to be preregistered in order to work. The accounting is done per the user process. This advertises v2 SPAPR TCE IOMMU and restricts what the userspace can do with v1 or v2 IOMMUs. In order to support memory pre-registration, we need a way to track the use of every registered memory region and only allow unregistration if a region is not in use anymore. So we need a way to tell from what region the just cleared TCE was from. This adds a userspace view of the TCE table into iommu_table struct. It contains userspace address, one per TCE entry. The table is only allocated when the ownership over an IOMMU group is taken which means it is only used from outside of the powernv code (such as VFIO). As v2 IOMMU supports IODA2 and pre-IODA2 IOMMUs (which do not support DDW API), this creates a default DMA window for IODA2 for consistency. Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- Documentation/vfio.txt | 31 ++- arch/powerpc/include/asm/iommu.h | 6 + drivers/vfio/vfio_iommu_spapr_tce.c | 501 ++++++++++++++++++++++++++++++------ include/uapi/linux/vfio.h | 27 ++ 4 files changed, 482 insertions(+), 83 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index 4c746a7e717a..dcc37e109c68 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt @@ -289,10 +289,12 @@ PPC64 sPAPR implementation note This implementation has some specifics: -1) Only one IOMMU group per container is supported as an IOMMU group -represents the minimal entity which isolation can be guaranteed for and -groups are allocated statically, one per a Partitionable Endpoint (PE) +1) On older systems (POWER7 with P5IOC2/IODA1) only one IOMMU group per +container is supported as an IOMMU table is allocated at the boot time, +one table per a IOMMU group which is a Partitionable Endpoint (PE) (PE is often a PCI domain but not always). +Newer systems (POWER8 with IODA2) have improved hardware design which allows +to remove this limitation and have multiple IOMMU groups per a VFIO container. 2) The hardware supports so called DMA windows - the PCI address range within which DMA transfer is allowed, any attempt to access address space @@ -439,6 +441,29 @@ The code flow from the example above should be slightly changed: .... +5) There is v2 of SPAPR TCE IOMMU. It deprecates VFIO_IOMMU_ENABLE/ +VFIO_IOMMU_DISABLE and implements 2 new ioctls: +VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY +(which are unsupported in v1 IOMMU). + +PPC64 paravirtualized guests generate a lot of map/unmap requests, +and the handling of those includes pinning/unpinning pages and updating +mm::locked_vm counter to make sure we do not exceed the rlimit. +The v2 IOMMU splits accounting and pinning into separate operations: + +- VFIO_IOMMU_SPAPR_REGISTER_MEMORY/VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY ioctls +receive a user space address and size of the block to be pinned. +Bisecting is not supported and VFIO_IOMMU_UNREGISTER_MEMORY is expected to +be called with the exact address and size used for registering +the memory block. The userspace is not expected to call these often. +The ranges are stored in a linked list in a VFIO container. + +- VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA ioctls only update the actual +IOMMU table and do not do pinning; instead these check that the userspace +address is from pre-registered range. + +This separation helps in optimizing DMA for guests. + ------------------------------------------------------------------------------- [1] VFIO was originally an acronym for "Virtual Function I/O" in its diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 9d3749287689..f9957eb4c659 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -112,9 +112,15 @@ struct iommu_table { unsigned long *it_map; /* A simple allocation bitmap for now */ unsigned long it_page_shift;/* table iommu page size */ struct list_head it_group_list;/* List of iommu_table_group_link */ + unsigned long *it_userspace; /* userspace view of the table */ struct iommu_table_ops *it_ops; }; +#define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \ + ((tbl)->it_userspace ? \ + &((tbl)->it_userspace[(entry) - (tbl)->it_offset]) : \ + NULL) + /* Pure 2^n version of get_order */ static inline __attribute_const__ int get_iommu_order(unsigned long size, struct iommu_table *tbl) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 203caacf2242..91a32239bd0a 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -19,8 +19,10 @@ #include #include #include +#include #include #include +#include #define DRIVER_VERSION "0.1" #define DRIVER_AUTHOR "aik@ozlabs.ru" @@ -81,6 +83,11 @@ static void decrement_locked_vm(long npages) * into DMA'ble space using the IOMMU */ +struct tce_iommu_group { + struct list_head next; + struct iommu_group *grp; +}; + /* * The container descriptor supports only a single group per container. * Required by the API as the container is not supplied with the IOMMU group @@ -88,11 +95,84 @@ static void decrement_locked_vm(long npages) */ struct tce_container { struct mutex lock; - struct iommu_group *grp; bool enabled; + bool v2; unsigned long locked_pages; + struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; + struct list_head group_list; }; +static long tce_iommu_unregister_pages(struct tce_container *container, + __u64 vaddr, __u64 size) +{ + struct mm_iommu_table_group_mem_t *mem; + + if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK)) + return -EINVAL; + + mem = mm_iommu_find(vaddr, size >> PAGE_SHIFT); + if (!mem) + return -ENOENT; + + return mm_iommu_put(mem); +} + +static long tce_iommu_register_pages(struct tce_container *container, + __u64 vaddr, __u64 size) +{ + long ret = 0; + struct mm_iommu_table_group_mem_t *mem = NULL; + unsigned long entries = size >> PAGE_SHIFT; + + if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) || + ((vaddr + size) < vaddr)) + return -EINVAL; + + ret = mm_iommu_get(vaddr, entries, &mem); + if (ret) + return ret; + + container->enabled = true; + + return 0; +} + +static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl) +{ + unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * + tbl->it_size, PAGE_SIZE); + unsigned long *uas; + long ret; + + BUG_ON(tbl->it_userspace); + + ret = try_increment_locked_vm(cb >> PAGE_SHIFT); + if (ret) + return ret; + + uas = vzalloc(cb); + if (!uas) { + decrement_locked_vm(cb >> PAGE_SHIFT); + return -ENOMEM; + } + tbl->it_userspace = uas; + + return 0; +} + +static void tce_iommu_userspace_view_free(struct iommu_table *tbl) +{ + unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * + tbl->it_size, PAGE_SIZE); + + if (!tbl->it_userspace) + return; + + vfree(tbl->it_userspace); + tbl->it_userspace = NULL; + decrement_locked_vm(cb >> PAGE_SHIFT); +} + static bool tce_page_is_contained(struct page *page, unsigned page_shift) { /* @@ -103,18 +183,18 @@ static bool tce_page_is_contained(struct page *page, unsigned page_shift) return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift; } +static inline bool tce_groups_attached(struct tce_container *container) +{ + return !list_empty(&container->group_list); +} + static long tce_iommu_find_table(struct tce_container *container, phys_addr_t ioba, struct iommu_table **ptbl) { long i; - struct iommu_table_group *table_group; - - table_group = iommu_group_get_iommudata(container->grp); - if (!table_group) - return -1; for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { - struct iommu_table *tbl = table_group->tables[i]; + struct iommu_table *tbl = container->tables[i]; if (tbl) { unsigned long entry = ioba >> tbl->it_page_shift; @@ -136,9 +216,7 @@ static int tce_iommu_enable(struct tce_container *container) int ret = 0; unsigned long locked; struct iommu_table_group *table_group; - - if (!container->grp) - return -ENXIO; + struct tce_iommu_group *tcegrp; if (!current->mm) return -ESRCH; /* process exited */ @@ -175,7 +253,12 @@ static int tce_iommu_enable(struct tce_container *container) * as there is no way to know how much we should increment * the locked_vm counter. */ - table_group = iommu_group_get_iommudata(container->grp); + if (!tce_groups_attached(container)) + return -ENODEV; + + tcegrp = list_first_entry(&container->group_list, + struct tce_iommu_group, next); + table_group = iommu_group_get_iommudata(tcegrp->grp); if (!table_group) return -ENODEV; @@ -211,7 +294,7 @@ static void *tce_iommu_open(unsigned long arg) { struct tce_container *container; - if (arg != VFIO_SPAPR_TCE_IOMMU) { + if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) { pr_err("tce_vfio: Wrong IOMMU type\n"); return ERR_PTR(-EINVAL); } @@ -221,18 +304,45 @@ static void *tce_iommu_open(unsigned long arg) return ERR_PTR(-ENOMEM); mutex_init(&container->lock); + INIT_LIST_HEAD_RCU(&container->group_list); + + container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU; return container; } +static int tce_iommu_clear(struct tce_container *container, + struct iommu_table *tbl, + unsigned long entry, unsigned long pages); +static void tce_iommu_free_table(struct iommu_table *tbl); + static void tce_iommu_release(void *iommu_data) { struct tce_container *container = iommu_data; + struct iommu_table_group *table_group; + struct tce_iommu_group *tcegrp; + long i; - WARN_ON(container->grp); + while (tce_groups_attached(container)) { + tcegrp = list_first_entry(&container->group_list, + struct tce_iommu_group, next); + table_group = iommu_group_get_iommudata(tcegrp->grp); + tce_iommu_detach_group(iommu_data, tcegrp->grp); + } - if (container->grp) - tce_iommu_detach_group(iommu_data, container->grp); + /* + * If VFIO created a table, it was not disposed + * by tce_iommu_detach_group() so do it now. + */ + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = container->tables[i]; + + if (!tbl) + continue; + + tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); + tce_iommu_free_table(tbl); + } tce_iommu_disable(container); mutex_destroy(&container->lock); @@ -249,6 +359,47 @@ static void tce_iommu_unuse_page(struct tce_container *container, put_page(page); } +static int tce_iommu_prereg_ua_to_hpa(unsigned long tce, unsigned long size, + unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem) +{ + long ret = 0; + struct mm_iommu_table_group_mem_t *mem; + + mem = mm_iommu_lookup(tce, size); + if (!mem) + return -EINVAL; + + ret = mm_iommu_ua_to_hpa(mem, tce, phpa); + if (ret) + return -EINVAL; + + *pmem = mem; + + return 0; +} + +static void tce_iommu_unuse_page_v2(struct iommu_table *tbl, + unsigned long entry) +{ + struct mm_iommu_table_group_mem_t *mem = NULL; + int ret; + unsigned long hpa = 0; + unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + + if (!pua || !current || !current->mm) + return; + + ret = tce_iommu_prereg_ua_to_hpa(*pua, IOMMU_PAGE_SIZE(tbl), + &hpa, &mem); + if (ret) + pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n", + __func__, *pua, entry, ret); + if (mem) + mm_iommu_mapped_dec(mem); + + *pua = 0; +} + static int tce_iommu_clear(struct tce_container *container, struct iommu_table *tbl, unsigned long entry, unsigned long pages) @@ -267,6 +418,11 @@ static int tce_iommu_clear(struct tce_container *container, if (direction == DMA_NONE) continue; + if (container->v2) { + tce_iommu_unuse_page_v2(tbl, entry); + continue; + } + tce_iommu_unuse_page(container, oldhpa); } @@ -333,6 +489,64 @@ static long tce_iommu_build(struct tce_container *container, return ret; } +static long tce_iommu_build_v2(struct tce_container *container, + struct iommu_table *tbl, + unsigned long entry, unsigned long tce, unsigned long pages, + enum dma_data_direction direction) +{ + long i, ret = 0; + struct page *page; + unsigned long hpa; + enum dma_data_direction dirtmp; + + for (i = 0; i < pages; ++i) { + struct mm_iommu_table_group_mem_t *mem = NULL; + unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, + entry + i); + + ret = tce_iommu_prereg_ua_to_hpa(tce, IOMMU_PAGE_SIZE(tbl), + &hpa, &mem); + if (ret) + break; + + page = pfn_to_page(hpa >> PAGE_SHIFT); + if (!tce_page_is_contained(page, tbl->it_page_shift)) { + ret = -EPERM; + break; + } + + /* Preserve offset within IOMMU page */ + hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; + dirtmp = direction; + + /* The registered region is being unregistered */ + if (mm_iommu_mapped_inc(mem)) + break; + + ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp); + if (ret) { + /* dirtmp cannot be DMA_NONE here */ + tce_iommu_unuse_page_v2(tbl, entry + i); + pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", + __func__, entry << tbl->it_page_shift, + tce, ret); + break; + } + + if (dirtmp != DMA_NONE) + tce_iommu_unuse_page_v2(tbl, entry + i); + + *pua = tce; + + tce += IOMMU_PAGE_SIZE(tbl); + } + + if (ret) + tce_iommu_clear(container, tbl, entry, i); + + return ret; +} + static long tce_iommu_create_table(struct tce_container *container, struct iommu_table_group *table_group, int num, @@ -358,6 +572,12 @@ static long tce_iommu_create_table(struct tce_container *container, WARN_ON(!ret && !(*ptbl)->it_ops->free); WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size)); + if (!ret && container->v2) { + ret = tce_iommu_userspace_view_alloc(*ptbl); + if (ret) + (*ptbl)->it_ops->free(*ptbl); + } + if (ret) decrement_locked_vm(table_size >> PAGE_SHIFT); @@ -368,6 +588,7 @@ static void tce_iommu_free_table(struct iommu_table *tbl) { unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; + tce_iommu_userspace_view_free(tbl); tbl->it_ops->free(tbl); decrement_locked_vm(pages); } @@ -383,6 +604,7 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_CHECK_EXTENSION: switch (arg) { case VFIO_SPAPR_TCE_IOMMU: + case VFIO_SPAPR_TCE_v2_IOMMU: ret = 1; break; default: @@ -394,12 +616,15 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { struct vfio_iommu_spapr_tce_info info; + struct tce_iommu_group *tcegrp; struct iommu_table_group *table_group; - if (WARN_ON(!container->grp)) + if (!tce_groups_attached(container)) return -ENXIO; - table_group = iommu_group_get_iommudata(container->grp); + tcegrp = list_first_entry(&container->group_list, + struct tce_iommu_group, next); + table_group = iommu_group_get_iommudata(tcegrp->grp); if (!table_group) return -ENXIO; @@ -468,11 +693,18 @@ static long tce_iommu_ioctl(void *iommu_data, if (ret) return ret; - ret = tce_iommu_build(container, tbl, - param.iova >> tbl->it_page_shift, - param.vaddr, - param.size >> tbl->it_page_shift, - direction); + if (container->v2) + ret = tce_iommu_build_v2(container, tbl, + param.iova >> tbl->it_page_shift, + param.vaddr, + param.size >> tbl->it_page_shift, + direction); + else + ret = tce_iommu_build(container, tbl, + param.iova >> tbl->it_page_shift, + param.vaddr, + param.size >> tbl->it_page_shift, + direction); iommu_flush_tce(tbl); @@ -518,7 +750,62 @@ static long tce_iommu_ioctl(void *iommu_data, return ret; } + case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: { + struct vfio_iommu_spapr_register_memory param; + + if (!container->v2) + break; + + minsz = offsetofend(struct vfio_iommu_spapr_register_memory, + size); + + if (copy_from_user(¶m, (void __user *)arg, minsz)) + return -EFAULT; + + if (param.argsz < minsz) + return -EINVAL; + + /* No flag is supported now */ + if (param.flags) + return -EINVAL; + + mutex_lock(&container->lock); + ret = tce_iommu_register_pages(container, param.vaddr, + param.size); + mutex_unlock(&container->lock); + + return ret; + } + case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: { + struct vfio_iommu_spapr_register_memory param; + + if (!container->v2) + break; + + minsz = offsetofend(struct vfio_iommu_spapr_register_memory, + size); + + if (copy_from_user(¶m, (void __user *)arg, minsz)) + return -EFAULT; + + if (param.argsz < minsz) + return -EINVAL; + + /* No flag is supported now */ + if (param.flags) + return -EINVAL; + + mutex_lock(&container->lock); + ret = tce_iommu_unregister_pages(container, param.vaddr, + param.size); + mutex_unlock(&container->lock); + + return ret; + } case VFIO_IOMMU_ENABLE: + if (container->v2) + break; + mutex_lock(&container->lock); ret = tce_iommu_enable(container); mutex_unlock(&container->lock); @@ -526,16 +813,27 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_IOMMU_DISABLE: + if (container->v2) + break; + mutex_lock(&container->lock); tce_iommu_disable(container); mutex_unlock(&container->lock); return 0; - case VFIO_EEH_PE_OP: - if (!container->grp) - return -ENODEV; - return vfio_spapr_iommu_eeh_ioctl(container->grp, - cmd, arg); + case VFIO_EEH_PE_OP: { + struct tce_iommu_group *tcegrp; + + ret = 0; + list_for_each_entry(tcegrp, &container->group_list, next) { + ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp, + cmd, arg); + if (ret) + return ret; + } + return ret; + } + } return -ENOTTY; @@ -547,14 +845,17 @@ static void tce_iommu_release_ownership(struct tce_container *container, int i; for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { - struct iommu_table *tbl = table_group->tables[i]; + struct iommu_table *tbl = container->tables[i]; if (!tbl) continue; tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); + tce_iommu_userspace_view_free(tbl); if (tbl->it_map) iommu_release_ownership(tbl); + + container->tables[i] = NULL; } } @@ -569,7 +870,10 @@ static int tce_iommu_take_ownership(struct tce_container *container, if (!tbl || !tbl->it_map) continue; - rc = iommu_take_ownership(tbl); + rc = tce_iommu_userspace_view_alloc(tbl); + if (!rc) + rc = iommu_take_ownership(tbl); + if (rc) { for (j = 0; j < i; ++j) iommu_release_ownership( @@ -579,6 +883,9 @@ static int tce_iommu_take_ownership(struct tce_container *container, } } + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) + container->tables[i] = table_group->tables[i]; + return 0; } @@ -592,18 +899,8 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container, return; } - for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { - /* Store table pointer as unset_window resets it */ - struct iommu_table *tbl = table_group->tables[i]; - - if (!tbl) - continue; - + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) table_group->ops->unset_window(table_group, i); - tce_iommu_clear(container, tbl, - tbl->it_offset, tbl->it_size); - tce_iommu_free_table(tbl); - } table_group->ops->release_ownership(table_group); } @@ -611,7 +908,7 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container, static long tce_iommu_take_ownership_ddw(struct tce_container *container, struct iommu_table_group *table_group) { - long ret; + long i, ret = 0; struct iommu_table *tbl = NULL; if (!table_group->ops->create_table || !table_group->ops->set_window || @@ -622,23 +919,45 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container, table_group->ops->take_ownership(table_group); - ret = tce_iommu_create_table(container, - table_group, - 0, /* window number */ - IOMMU_PAGE_SHIFT_4K, - table_group->tce32_size, - 1, /* default levels */ - &tbl); - if (!ret) { - ret = table_group->ops->set_window(table_group, 0, tbl); + /* + * If it the first group attached, check if there is + * a default DMA window and create one if none as + * the userspace expects it to exist. + */ + if (!tce_groups_attached(container) && !container->tables[0]) { + ret = tce_iommu_create_table(container, + table_group, + 0, /* window number */ + IOMMU_PAGE_SHIFT_4K, + table_group->tce32_size, + 1, /* default levels */ + &tbl); if (ret) - tce_iommu_free_table(tbl); + goto release_exit; else - table_group->tables[0] = tbl; + container->tables[0] = tbl; } - if (ret) - table_group->ops->release_ownership(table_group); + /* Set all windows to the new group */ + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + tbl = container->tables[i]; + + if (!tbl) + continue; + + /* Set the default window to a new group */ + ret = table_group->ops->set_window(table_group, i, tbl); + if (ret) + goto release_exit; + } + + return 0; + +release_exit: + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) + table_group->ops->unset_window(table_group, i); + + table_group->ops->release_ownership(table_group); return ret; } @@ -649,29 +968,44 @@ static int tce_iommu_attach_group(void *iommu_data, int ret; struct tce_container *container = iommu_data; struct iommu_table_group *table_group; + struct tce_iommu_group *tcegrp = NULL; mutex_lock(&container->lock); /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", iommu_group_id(iommu_group), iommu_group); */ - if (container->grp) { - pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n", - iommu_group_id(container->grp), - iommu_group_id(iommu_group)); + table_group = iommu_group_get_iommudata(iommu_group); + + if (tce_groups_attached(container) && (!table_group->ops || + !table_group->ops->take_ownership || + !table_group->ops->release_ownership)) { ret = -EBUSY; goto unlock_exit; } - if (container->enabled) { - pr_err("tce_vfio: attaching group #%u to enabled container\n", - iommu_group_id(iommu_group)); - ret = -EBUSY; - goto unlock_exit; + /* Check if new group has the same iommu_ops (i.e. compatible) */ + list_for_each_entry(tcegrp, &container->group_list, next) { + struct iommu_table_group *table_group_tmp; + + if (tcegrp->grp == iommu_group) { + pr_warn("tce_vfio: Group %d is already attached\n", + iommu_group_id(iommu_group)); + ret = -EBUSY; + goto unlock_exit; + } + table_group_tmp = iommu_group_get_iommudata(tcegrp->grp); + if (table_group_tmp->ops != table_group->ops) { + pr_warn("tce_vfio: Group %d is incompatible with group %d\n", + iommu_group_id(iommu_group), + iommu_group_id(tcegrp->grp)); + ret = -EPERM; + goto unlock_exit; + } } - table_group = iommu_group_get_iommudata(iommu_group); - if (!table_group) { - ret = -ENXIO; + tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL); + if (!tcegrp) { + ret = -ENOMEM; goto unlock_exit; } @@ -681,10 +1015,15 @@ static int tce_iommu_attach_group(void *iommu_data, else ret = tce_iommu_take_ownership_ddw(container, table_group); - if (!ret) - container->grp = iommu_group; + if (!ret) { + tcegrp->grp = iommu_group; + list_add(&tcegrp->next, &container->group_list); + } unlock_exit: + if (ret && tcegrp) + kfree(tcegrp); + mutex_unlock(&container->lock); return ret; @@ -695,24 +1034,26 @@ static void tce_iommu_detach_group(void *iommu_data, { struct tce_container *container = iommu_data; struct iommu_table_group *table_group; + bool found = false; + struct tce_iommu_group *tcegrp; mutex_lock(&container->lock); - if (iommu_group != container->grp) { - pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n", - iommu_group_id(iommu_group), - iommu_group_id(container->grp)); - goto unlock_exit; + + list_for_each_entry(tcegrp, &container->group_list, next) { + if (tcegrp->grp == iommu_group) { + found = true; + break; + } } - if (container->enabled) { - pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n", - iommu_group_id(container->grp)); - tce_iommu_disable(container); + if (!found) { + pr_warn("tce_vfio: detaching unattached group #%u\n", + iommu_group_id(iommu_group)); + goto unlock_exit; } - /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n", - iommu_group_id(iommu_group), iommu_group); */ - container->grp = NULL; + list_del(&tcegrp->next); + kfree(tcegrp); table_group = iommu_group_get_iommudata(iommu_group); BUG_ON(!table_group); diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index e4fa1995f613..fa84391a0d00 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -36,6 +36,8 @@ /* Two-stage IOMMU */ #define VFIO_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ +#define VFIO_SPAPR_TCE_v2_IOMMU 7 + /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between @@ -507,6 +509,31 @@ struct vfio_eeh_pe_op { #define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21) +/** + * VFIO_IOMMU_SPAPR_REGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 17, struct vfio_iommu_spapr_register_memory) + * + * Registers user space memory where DMA is allowed. It pins + * user pages and does the locked memory accounting so + * subsequent VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA calls + * get faster. + */ +struct vfio_iommu_spapr_register_memory { + __u32 argsz; + __u32 flags; + __u64 vaddr; /* Process virtual address */ + __u64 size; /* Size of mapping (bytes) */ +}; +#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17) + +/** + * VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 18, struct vfio_iommu_spapr_register_memory) + * + * Unregisters user space memory registered with + * VFIO_IOMMU_SPAPR_REGISTER_MEMORY. + * Uses vfio_iommu_spapr_register_memory for parameters. + */ +#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18) + /* ***************************************************************** */ #endif /* _UAPIVFIO_H */ -- cgit v1.2.3 From e633bc86a922468a82300eef5b9802e17be5e23d Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:26 +1000 Subject: vfio: powerpc/spapr: Support Dynamic DMA windows This adds create/remove window ioctls to create and remove DMA windows. sPAPR defines a Dynamic DMA windows capability which allows para-virtualized guests to create additional DMA windows on a PCI bus. The existing linux kernels use this new window to map the entire guest memory and switch to the direct DMA operations saving time on map/unmap requests which would normally happen in a big amounts. This adds 2 ioctl handlers - VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE - to create and remove windows. Up to 2 windows are supported now by the hardware and by this driver. This changes VFIO_IOMMU_SPAPR_TCE_GET_INFO handler to return additional information such as a number of supported windows and maximum number levels of TCE tables. DDW is added as a capability, not as a SPAPR TCE IOMMU v2 unique feature as we still want to support v2 on platforms which cannot do DDW for the sake of TCE acceleration in KVM (coming soon). Signed-off-by: Alexey Kardashevskiy [aw: for the vfio related changes] Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- Documentation/vfio.txt | 19 ++++ arch/powerpc/include/asm/iommu.h | 2 +- drivers/vfio/vfio_iommu_spapr_tce.c | 196 +++++++++++++++++++++++++++++++++++- include/uapi/linux/vfio.h | 61 ++++++++++- 4 files changed, 273 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index dcc37e109c68..1dd3fddfd3a1 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt @@ -464,6 +464,25 @@ address is from pre-registered range. This separation helps in optimizing DMA for guests. +6) sPAPR specification allows guests to have an additional DMA window(s) on +a PCI bus with a variable page size. Two ioctls have been added to support +this: VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE. +The platform has to support the functionality or error will be returned to +the userspace. The existing hardware supports up to 2 DMA windows, one is +2GB long, uses 4K pages and called "default 32bit window"; the other can +be as big as entire RAM, use different page size, it is optional - guests +create those in run-time if the guest driver supports 64bit DMA. + +VFIO_IOMMU_SPAPR_TCE_CREATE receives a page shift, a DMA window size and +a number of TCE table levels (if a TCE table is going to be big enough and +the kernel may not be able to allocate enough of physically contiguous memory). +It creates a new window in the available slot and returns the bus address where +the new window starts. Due to hardware limitation, the user space cannot choose +the location of DMA windows. + +VFIO_IOMMU_SPAPR_TCE_REMOVE receives the bus start address of the window +and removes it. + ------------------------------------------------------------------------------- [1] VFIO was originally an acronym for "Virtual Function I/O" in its diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index f9957eb4c659..ca18cff90900 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -149,7 +149,7 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); */ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, int nid); -#define IOMMU_TABLE_GROUP_MAX_TABLES 1 +#define IOMMU_TABLE_GROUP_MAX_TABLES 2 struct iommu_table_group; diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 91a32239bd0a..0582b72ef377 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -211,6 +211,18 @@ static long tce_iommu_find_table(struct tce_container *container, return -1; } +static int tce_iommu_find_free_table(struct tce_container *container) +{ + int i; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + if (!container->tables[i]) + return i; + } + + return -ENOSPC; +} + static int tce_iommu_enable(struct tce_container *container) { int ret = 0; @@ -593,11 +605,115 @@ static void tce_iommu_free_table(struct iommu_table *tbl) decrement_locked_vm(pages); } +static long tce_iommu_create_window(struct tce_container *container, + __u32 page_shift, __u64 window_size, __u32 levels, + __u64 *start_addr) +{ + struct tce_iommu_group *tcegrp; + struct iommu_table_group *table_group; + struct iommu_table *tbl = NULL; + long ret, num; + + num = tce_iommu_find_free_table(container); + if (num < 0) + return num; + + /* Get the first group for ops::create_table */ + tcegrp = list_first_entry(&container->group_list, + struct tce_iommu_group, next); + table_group = iommu_group_get_iommudata(tcegrp->grp); + if (!table_group) + return -EFAULT; + + if (!(table_group->pgsizes & (1ULL << page_shift))) + return -EINVAL; + + if (!table_group->ops->set_window || !table_group->ops->unset_window || + !table_group->ops->get_table_size || + !table_group->ops->create_table) + return -EPERM; + + /* Create TCE table */ + ret = tce_iommu_create_table(container, table_group, num, + page_shift, window_size, levels, &tbl); + if (ret) + return ret; + + BUG_ON(!tbl->it_ops->free); + + /* + * Program the table to every group. + * Groups have been tested for compatibility at the attach time. + */ + list_for_each_entry(tcegrp, &container->group_list, next) { + table_group = iommu_group_get_iommudata(tcegrp->grp); + + ret = table_group->ops->set_window(table_group, num, tbl); + if (ret) + goto unset_exit; + } + + container->tables[num] = tbl; + + /* Return start address assigned by platform in create_table() */ + *start_addr = tbl->it_offset << tbl->it_page_shift; + + return 0; + +unset_exit: + list_for_each_entry(tcegrp, &container->group_list, next) { + table_group = iommu_group_get_iommudata(tcegrp->grp); + table_group->ops->unset_window(table_group, num); + } + tce_iommu_free_table(tbl); + + return ret; +} + +static long tce_iommu_remove_window(struct tce_container *container, + __u64 start_addr) +{ + struct iommu_table_group *table_group = NULL; + struct iommu_table *tbl; + struct tce_iommu_group *tcegrp; + int num; + + num = tce_iommu_find_table(container, start_addr, &tbl); + if (num < 0) + return -EINVAL; + + BUG_ON(!tbl->it_size); + + /* Detach groups from IOMMUs */ + list_for_each_entry(tcegrp, &container->group_list, next) { + table_group = iommu_group_get_iommudata(tcegrp->grp); + + /* + * SPAPR TCE IOMMU exposes the default DMA window to + * the guest via dma32_window_start/size of + * VFIO_IOMMU_SPAPR_TCE_GET_INFO. Some platforms allow + * the userspace to remove this window, some do not so + * here we check for the platform capability. + */ + if (!table_group->ops || !table_group->ops->unset_window) + return -EPERM; + + table_group->ops->unset_window(table_group, num); + } + + /* Free table */ + tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); + tce_iommu_free_table(tbl); + container->tables[num] = NULL; + + return 0; +} + static long tce_iommu_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { struct tce_container *container = iommu_data; - unsigned long minsz; + unsigned long minsz, ddwsz; long ret; switch (cmd) { @@ -641,6 +757,21 @@ static long tce_iommu_ioctl(void *iommu_data, info.dma32_window_start = table_group->tce32_start; info.dma32_window_size = table_group->tce32_size; info.flags = 0; + memset(&info.ddw, 0, sizeof(info.ddw)); + + if (table_group->max_dynamic_windows_supported && + container->v2) { + info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW; + info.ddw.pgsizes = table_group->pgsizes; + info.ddw.max_dynamic_windows_supported = + table_group->max_dynamic_windows_supported; + info.ddw.levels = table_group->max_levels; + } + + ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw); + + if (info.argsz >= ddwsz) + minsz = ddwsz; if (copy_to_user((void __user *)arg, &info, minsz)) return -EFAULT; @@ -834,6 +965,69 @@ static long tce_iommu_ioctl(void *iommu_data, return ret; } + case VFIO_IOMMU_SPAPR_TCE_CREATE: { + struct vfio_iommu_spapr_tce_create create; + + if (!container->v2) + break; + + if (!tce_groups_attached(container)) + return -ENXIO; + + minsz = offsetofend(struct vfio_iommu_spapr_tce_create, + start_addr); + + if (copy_from_user(&create, (void __user *)arg, minsz)) + return -EFAULT; + + if (create.argsz < minsz) + return -EINVAL; + + if (create.flags) + return -EINVAL; + + mutex_lock(&container->lock); + + ret = tce_iommu_create_window(container, create.page_shift, + create.window_size, create.levels, + &create.start_addr); + + mutex_unlock(&container->lock); + + if (!ret && copy_to_user((void __user *)arg, &create, minsz)) + ret = -EFAULT; + + return ret; + } + case VFIO_IOMMU_SPAPR_TCE_REMOVE: { + struct vfio_iommu_spapr_tce_remove remove; + + if (!container->v2) + break; + + if (!tce_groups_attached(container)) + return -ENXIO; + + minsz = offsetofend(struct vfio_iommu_spapr_tce_remove, + start_addr); + + if (copy_from_user(&remove, (void __user *)arg, minsz)) + return -EFAULT; + + if (remove.argsz < minsz) + return -EINVAL; + + if (remove.flags) + return -EINVAL; + + mutex_lock(&container->lock); + + ret = tce_iommu_remove_window(container, remove.start_addr); + + mutex_unlock(&container->lock); + + return ret; + } } return -ENOTTY; diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index fa84391a0d00..9fd7b5d8df2f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -444,6 +444,23 @@ struct vfio_iommu_type1_dma_unmap { /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ +/* + * The SPAPR TCE DDW info struct provides the information about + * the details of Dynamic DMA window capability. + * + * @pgsizes contains a page size bitmask, 4K/64K/16M are supported. + * @max_dynamic_windows_supported tells the maximum number of windows + * which the platform can create. + * @levels tells the maximum number of levels in multi-level IOMMU tables; + * this allows splitting a table into smaller chunks which reduces + * the amount of physically contiguous memory required for the table. + */ +struct vfio_iommu_spapr_tce_ddw_info { + __u64 pgsizes; /* Bitmap of supported page sizes */ + __u32 max_dynamic_windows_supported; + __u32 levels; +}; + /* * The SPAPR TCE info struct provides the information about the PCI bus * address ranges available for DMA, these values are programmed into @@ -454,14 +471,17 @@ struct vfio_iommu_type1_dma_unmap { * addresses too so the window works as a filter rather than an offset * for IOVA addresses. * - * A flag will need to be added if other page sizes are supported, - * so as defined here, it is always 4k. + * Flags supported: + * - VFIO_IOMMU_SPAPR_INFO_DDW: informs the userspace that dynamic DMA windows + * (DDW) support is present. @ddw is only supported when DDW is present. */ struct vfio_iommu_spapr_tce_info { __u32 argsz; - __u32 flags; /* reserved for future use */ + __u32 flags; +#define VFIO_IOMMU_SPAPR_INFO_DDW (1 << 0) /* DDW supported */ __u32 dma32_window_start; /* 32 bit window start (bytes) */ __u32 dma32_window_size; /* 32 bit window size (bytes) */ + struct vfio_iommu_spapr_tce_ddw_info ddw; }; #define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) @@ -534,6 +554,41 @@ struct vfio_iommu_spapr_register_memory { */ #define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18) +/** + * VFIO_IOMMU_SPAPR_TCE_CREATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19, struct vfio_iommu_spapr_tce_create) + * + * Creates an additional TCE table and programs it (sets a new DMA window) + * to every IOMMU group in the container. It receives page shift, window + * size and number of levels in the TCE table being created. + * + * It allocates and returns an offset on a PCI bus of the new DMA window. + */ +struct vfio_iommu_spapr_tce_create { + __u32 argsz; + __u32 flags; + /* in */ + __u32 page_shift; + __u64 window_size; + __u32 levels; + /* out */ + __u64 start_addr; +}; +#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19) + +/** + * VFIO_IOMMU_SPAPR_TCE_REMOVE - _IOW(VFIO_TYPE, VFIO_BASE + 20, struct vfio_iommu_spapr_tce_remove) + * + * Unprograms a TCE table from all groups in the container and destroys it. + * It receives a PCI bus offset as a window id. + */ +struct vfio_iommu_spapr_tce_remove { + __u32 argsz; + __u32 flags; + /* in */ + __u64 start_addr; +}; +#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20) + /* ***************************************************************** */ #endif /* _UAPIVFIO_H */ -- cgit v1.2.3 From b91c1e3e7a6f22a6b898e345b745b6a43273c973 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 08:53:25 +1000 Subject: powerpc: Fix duplicate const clang warning in user access code We see a large number of duplicate const errors in the user access code when building with llvm/clang: include/linux/pagemap.h:576:8: warning: duplicate 'const' declaration specifier [-Wduplicate-decl-specifier] ret = __get_user(c, uaddr); The problem is we are doing const __typeof__(*(ptr)), which will hit the warning if ptr is marked const. Removing const does not seem to have any effect on GCC code generation. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/uaccess.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index a0c071d24e0e..2a8ebae0936b 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -265,7 +265,7 @@ do { \ ({ \ long __gu_err; \ unsigned long __gu_val; \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ @@ -279,7 +279,7 @@ do { \ ({ \ long __gu_err; \ long long __gu_val; \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ @@ -293,7 +293,7 @@ do { \ ({ \ long __gu_err = -EFAULT; \ unsigned long __gu_val = 0; \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ might_fault(); \ if (access_ok(VERIFY_READ, __gu_addr, (size))) \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ @@ -305,7 +305,7 @@ do { \ ({ \ long __gu_err; \ unsigned long __gu_val; \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ -- cgit v1.2.3 From 1fb3f5a7ca599f322e6bf21272ad215301159aa0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 08:53:26 +1000 Subject: powerpc: Only use -mabi=altivec if toolchain supports it The -mabi=altivec option is not recognised on LLVM, so use call cc-option to check for support. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/lib/Makefile | 2 +- lib/raid6/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 7902802a19a5..a47e14277fd8 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -33,6 +33,6 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o obj-$(CONFIG_ALTIVEC) += xor_vmx.o -CFLAGS_xor_vmx.o += -maltivec -mabi=altivec +CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec) obj-$(CONFIG_PPC64) += $(obj64-y) diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index c7dab0645554..3b10a48fa040 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -15,7 +15,7 @@ quiet_cmd_unroll = UNROLL $@ < $< > $@ || ( rm -f $@ && exit 1 ) ifeq ($(CONFIG_ALTIVEC),y) -altivec_flags := -maltivec -mabi=altivec +altivec_flags := -maltivec $(call cc-option,-mabi=altivec) endif # The GCC option -ffreestanding is required in order to compile code containing -- cgit v1.2.3 From a50a862e74a9d7b447df30c33aa3d534ce3a977a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 08:53:27 +1000 Subject: powerpc: Only use -mtraceback=no, -mno-string and -msoft-float if toolchain supports it These options are not recognised on LLVM, so use call cc-option to check for support. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index edf39cebfed8..799e67d54c4d 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -113,7 +113,7 @@ else endif endif -CFLAGS-$(CONFIG_PPC64) := -mtraceback=no +CFLAGS-$(CONFIG_PPC64) := $(call cc-option,-mtraceback=no) ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,-mcall-aixdesc) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) @@ -160,7 +160,8 @@ asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1) KBUILD_CPPFLAGS += -Iarch/$(ARCH) $(asinstr) KBUILD_AFLAGS += -Iarch/$(ARCH) $(AFLAGS-y) -KBUILD_CFLAGS += -msoft-float -pipe -Iarch/$(ARCH) $(CFLAGS-y) +KBUILD_CFLAGS += $(call cc-option,-msoft-float) +KBUILD_CFLAGS += -pipe -Iarch/$(ARCH) $(CFLAGS-y) CPP = $(CC) -E $(KBUILD_CFLAGS) CHECKFLAGS += -m$(CONFIG_WORD_SIZE) -D__powerpc__ -D__powerpc$(CONFIG_WORD_SIZE)__ @@ -192,7 +193,7 @@ KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm) # Never use string load/store instructions as they are # often slow when they are implemented at all -KBUILD_CFLAGS += -mno-string +KBUILD_CFLAGS += $(call cc-option,-mno-string) ifeq ($(CONFIG_6xx),y) KBUILD_CFLAGS += -mcpu=powerpc -- cgit v1.2.3 From 92d6cf2dab83cfb00b3d80fd38a38a77c6873277 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 08:53:28 +1000 Subject: powerpc: Don't use -mno-strict-align on clang We added -mno-strict-align in commit f036b3681962 (powerpc: Work around little endian gcc bug) to fix gcc bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57134 Clang doesn't understand it. We need to use a conditional because we can't use the simpler call cc-option here. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 799e67d54c4d..1eec1a0462b6 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -66,7 +66,10 @@ endif UTS_MACHINE := $(OLDARCH) ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) -override CC += -mlittle-endian -mno-strict-align +override CC += -mlittle-endian +ifneq ($(COMPILER),clang) +override CC += -mno-strict-align +endif override AS += -mlittle-endian override LD += -EL override CROSS32CC += -mlittle-endian -- cgit v1.2.3 From 238abecde8ad43f914e095fcf23e0bd35dc7a7f2 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 08:53:29 +1000 Subject: powerpc: Don't use gcc specific options on clang We have code to choose between several options, eg. -mabi=elfv2 vs -mcall-aixdesc, and -mcmodel=medium vs -mminimal-toc. But these are all GCC specific, so use cc-option on all of them. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 1eec1a0462b6..05f464eb6952 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -118,12 +118,12 @@ endif CFLAGS-$(CONFIG_PPC64) := $(call cc-option,-mtraceback=no) ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) -CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,-mcall-aixdesc) +CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc)) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) else -CFLAGS-$(CONFIG_PPC64) += -mcall-aixdesc +CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) endif -CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,-mminimal-toc) +CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD) -- cgit v1.2.3 From 4bece972fce6e597cb513bdcae4a04e14fc0dd81 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 15 Jun 2015 15:01:32 +1000 Subject: powerpc/powernv: pnv_init_idle_states() should only run on powernv Although this init call checks for device tree properties before doing anything, it should still only run on powernv machines. Reviewed-by: Shreyas B Prabhu Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/idle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index bd39a120bd60..59d735d2e5c0 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -289,5 +290,4 @@ out_free: out: return 0; } - -subsys_initcall(pnv_init_idle_states); +machine_subsys_initcall(powernv, pnv_init_idle_states); -- cgit v1.2.3 From 8f6b9512ceadc6bd52777c299111dc642b4c65b6 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 1 May 2015 20:05:49 -0400 Subject: powerpc: use device_initcall for registering rtc devices Currently these two RTC devices are in core platform code where it is not possible for them to be modular. It will never be modular, so using module_init as an alias for __initcall can be somewhat misleading. Fix this up now, so that we can relocate module_init from init.h into module.h in the future. If we don't do this, we'd have to add module.h to obviously non-modular code, and that would be a worse thing. Note that direct use of __initcall is discouraged, vs. one of the priority categorized subgroups. As __initcall gets mapped onto device_initcall, our use of device_initcall directly in this change means that the runtime impact is zero -- they will remain at level 6 in initcall ordering. Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Geoff Levand Acked-by: Geoff Levand Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Paul Gortmaker --- arch/powerpc/kernel/time.c | 2 +- arch/powerpc/platforms/ps3/time.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 56f44848b044..43922509a483 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -1124,4 +1124,4 @@ static int __init rtc_init(void) return PTR_ERR_OR_ZERO(pdev); } -module_init(rtc_init); +device_initcall(rtc_init); diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c index ce73ce865613..791c6142c4a7 100644 --- a/arch/powerpc/platforms/ps3/time.c +++ b/arch/powerpc/platforms/ps3/time.c @@ -92,5 +92,4 @@ static int __init ps3_rtc_init(void) return PTR_ERR_OR_ZERO(pdev); } - -module_init(ps3_rtc_init); +device_initcall(ps3_rtc_init); -- cgit v1.2.3 From a390a2f18147533359d4e45cb13438d42580da84 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 1 May 2015 20:05:49 -0400 Subject: powerpc: don't use module_init in non-modular 83xx suspend code The suspend.o is built for SUSPEND -- which is bool, and hence this code is either present or absent. It will never be modular, so using module_init as an alias for __initcall can be somewhat misleading. Fix this up now, so that we can relocate module_init from init.h into module.h in the future. If we don't do this, we'd have to add module.h to obviously non-modular code, and that would be a worse thing. Note that direct use of __initcall is discouraged, vs. one of the priority categorized subgroups. As __initcall gets mapped onto device_initcall, our use of device_initcall directly in this change means that the runtime impact is zero -- it will remain at level 6 in initcall ordering. Cc: Scott Wood Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Paul Gortmaker --- arch/powerpc/platforms/83xx/suspend.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index c9adbfb65006..fcbea4b51a78 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -445,5 +445,4 @@ static int pmc_init(void) { return platform_driver_register(&pmc_driver); } - -module_init(pmc_init); +device_initcall(pmc_init); -- cgit v1.2.3 From 383d14a5365879bc193d29ad2ed17ac5299753c3 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 1 May 2015 20:08:21 -0400 Subject: powerpc: use subsys_initcall for Freescale Local Bus The FSL_SOC option is bool, and hence this code is either present or absent. It will never be modular, so using module_init as an alias for __initcall is rather misleading. Fix this up now, so that we can relocate module_init from init.h into module.h in the future. If we don't do this, we'd have to add module.h to obviously non-modular code, and that would be a worse thing. Note that direct use of __initcall is discouraged, vs. one of the priority categorized subgroups. As __initcall gets mapped onto device_initcall, our use of subsys_initcall (which makes sense for bus code) will thus change this registration from level 6-device to level 4-subsys (i.e. slightly earlier). However no observable impact of that small difference has been observed during testing, or is expected. Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Scott Wood Acked-by: Scott Wood Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Paul Gortmaker --- arch/powerpc/sysdev/fsl_lbc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/fsl_lbc.c b/arch/powerpc/sysdev/fsl_lbc.c index d631022ffb4b..38138cf8d33e 100644 --- a/arch/powerpc/sysdev/fsl_lbc.c +++ b/arch/powerpc/sysdev/fsl_lbc.c @@ -407,4 +407,4 @@ static int __init fsl_lbc_init(void) { return platform_driver_register(&fsl_lbc_ctrl_driver); } -module_init(fsl_lbc_init); +subsys_initcall(fsl_lbc_init); -- cgit v1.2.3 From 6f114281c4ad543392f5b7c8345e11e103675cee Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 1 May 2015 20:08:21 -0400 Subject: powerpc: don't use module_init for non-modular core hugetlb code The hugetlbpage.o is obj-y (always built in). It will never be modular, so using module_init as an alias for __initcall is somewhat misleading. Fix this up now, so that we can relocate module_init from init.h into module.h in the future. If we don't do this, we'd have to add module.h to obviously non-modular code, and that would be a worse thing. Note that direct use of __initcall is discouraged, vs. one of the priority categorized subgroups. As __initcall gets mapped onto device_initcall, our use of arch_initcall (which makes sense for arch code) will thus change this registration from level 6-device to level 3-arch (i.e. slightly earlier). However no observable impact of that small difference has been observed during testing, or is expected. Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Paul Gortmaker --- arch/powerpc/mm/hugetlbpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 3385e3d0506e..f1e00ac9283c 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -933,7 +933,7 @@ static int __init hugetlbpage_init(void) return 0; } #endif -module_init(hugetlbpage_init); +arch_initcall(hugetlbpage_init); void flush_dcache_icache_hugepage(struct page *page) { -- cgit v1.2.3 From 3609d819a36c65857816ca1278d80767d6d9b990 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Wed, 20 May 2015 00:30:14 +0530 Subject: powerpc: Make doorbell check preemption safe Doorbell can be used to cause ipi on cpus which are sibling threads on the same core. So icp_native_cause_ipi checks if the destination cpu is a sibling thread of the current cpu and uses doorbell in such cases. But while running with CONFIG_PREEMPT=y, since this section is preemtible, we can run into issues if after we check if the destination cpu is a sibling cpu, the task gets migrated from a sibling cpu to a cpu on another core. Fix this by using get_cpu()/ put_cpu() Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/xics/icp-native.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index 2fc4cf1b7557..eae32654bdf2 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -147,12 +147,16 @@ static void icp_native_cause_ipi(int cpu, unsigned long data) { kvmppc_set_host_ipi(cpu, 1); #ifdef CONFIG_PPC_DOORBELL - if (cpu_has_feature(CPU_FTR_DBELL) && - (cpumask_test_cpu(cpu, cpu_sibling_mask(smp_processor_id())))) - doorbell_cause_ipi(cpu, data); - else + if (cpu_has_feature(CPU_FTR_DBELL)) { + if (cpumask_test_cpu(cpu, cpu_sibling_mask(get_cpu()))) { + doorbell_cause_ipi(cpu, data); + put_cpu(); + return; + } + put_cpu(); + } #endif - icp_native_set_qirr(cpu, IPI_PRIORITY); + icp_native_set_qirr(cpu, IPI_PRIORITY); } /* -- cgit v1.2.3 From 02b6505c8f84cbb4be459c9bf8ebbb7b0754e764 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Wed, 17 Jun 2015 11:36:57 +1000 Subject: powerpc/powernv: Increase opal-irqchip initcall priority The eeh subsystem for powernv requires the opal event irqchip to be initialised prior to initialisation or the following errors are produced (and eeh doesn't work as expected): irq: XICS didn't like hwirq-0x9 to VIRQ17 mapping (rc=-22) pnv_eeh_post_init: Can't request OPAL event interrupt (0) On powernv eeh is initialised from a subsys_initcall due to a check for machine_is(powernv) in eeh_init(). This patch increases the initcall priority of opal_event_init() to an arch_initcall to ensure the opal event interface is initialised prior to any users of it. Signed-off-by: Alistair Popple Reported-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-irqchip.c | 4 ++++ arch/powerpc/platforms/powernv/opal.c | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index 841135f48981..e2e7d75f52f3 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -231,6 +231,7 @@ out: of_node_put(opal_node); return rc; } +machine_arch_initcall(powernv, opal_event_init); /** * opal_event_request(unsigned int opal_event_nr) - Request an event @@ -244,6 +245,9 @@ out: */ int opal_event_request(unsigned int opal_event_nr) { + if (WARN_ON_ONCE(!opal_event_irqchip.domain)) + return NO_IRQ; + return irq_create_mapping(opal_event_irqchip.domain, opal_event_nr); } EXPORT_SYMBOL(opal_event_request); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 9e9c483eee4d..f084afa0e3ba 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -657,9 +657,6 @@ static int __init opal_init(void) return -ENODEV; } - /* Initialise OPAL events */ - opal_event_init(); - /* Register OPAL consoles if any ports */ if (firmware_has_feature(FW_FEATURE_OPALv2)) consoles = of_find_node_by_path("/ibm,opal/consoles"); -- cgit v1.2.3 From 7185795a62589292015484985635b4a38029a2b9 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 15 Jun 2015 17:25:34 +0800 Subject: powerpc/powernv: fix construction of opal PRD messages We currently have a bug in the PRD code, where the contents of an incoming message (beyond the header) will be overwritten by the list item manipulations when adding to to the prd_msg_queue. This change reorders struct opal_prd_msg_queue_item, so that the message body doesn't overlap the list_head. We also clarify the memcpy of the message, as we're copying unnecessary bytes at the end of the message data. Signed-off-by: Jeremy Kerr Acked-by: Stewart Smith Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-prd.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index d9e72bde79f5..46cb3feb0a13 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -32,9 +32,13 @@ #include +/** + * The msg member must be at the end of the struct, as it's followed by the + * message data. + */ struct opal_prd_msg_queue_item { - struct opal_prd_msg_header msg; struct list_head list; + struct opal_prd_msg_header msg; }; static struct device_node *prd_node; @@ -351,22 +355,23 @@ static int opal_prd_msg_notifier(struct notifier_block *nb, struct opal_prd_msg_queue_item *item; struct opal_prd_msg_header *hdr; struct opal_msg *msg = _msg; + int msg_size, item_size; unsigned long flags; - int size; if (msg_type != OPAL_MSG_PRD) return 0; - /* Calculate total size of the item we need to store. The 'size' field - * in the header includes the header itself. */ + /* Calculate total size of the message and item we need to store. The + * 'size' field in the header includes the header itself. */ hdr = (void *)msg->params; - size = (sizeof(*item) - sizeof(item->msg)) + be16_to_cpu(hdr->size); + msg_size = be16_to_cpu(hdr->size); + item_size = msg_size + sizeof(*item) - sizeof(item->msg); - item = kzalloc(size, GFP_ATOMIC); + item = kzalloc(item_size, GFP_ATOMIC); if (!item) return -ENOMEM; - memcpy(&item->msg, msg->params, size); + memcpy(&item->msg, msg->params, msg_size); spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); list_add_tail(&item->list, &opal_prd_msg_queue); -- cgit v1.2.3 From cdf2bc1c283066d6f0834cc5dd4fcf5d01e44af9 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Wed, 17 Jun 2015 08:35:39 +0800 Subject: powerpc/include: Add opal-prd to installed uapi headers We'll want to build the opal-prd daemon with the prd headers, so include this in the uapi headers list. Signed-off-by: Jeremy Kerr Signed-off-by: Michael Ellerman --- arch/powerpc/include/uapi/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild index 79c4068be278..f44a027818af 100644 --- a/arch/powerpc/include/uapi/asm/Kbuild +++ b/arch/powerpc/include/uapi/asm/Kbuild @@ -18,6 +18,7 @@ header-y += kvm_para.h header-y += mman.h header-y += msgbuf.h header-y += nvram.h +header-y += opal-prd.h header-y += param.h header-y += perf_event.h header-y += poll.h -- cgit v1.2.3 From b5926430dfa07d17e5d768c16b0d81c13a793f7c Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 15 Jun 2015 17:49:59 +1000 Subject: powerpc/iommu/ioda2: Enable compile with IOV=on and IOMMU_API=off The pnv_pci_ioda2_unset_window() function is used to do the final cleanup of a DMA window being released: - via VFIO ioctl by the guest request; - via unplugging a virtual PCI function. However the function was under #ifdef CONFIG_IOMMU_API and was missing. This moves the helper outside of IOMMU_API block and enables it for either or both IOMMU_API and PCI_IOV. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 50 ++++++++++++++++--------------- 1 file changed, 26 insertions(+), 24 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b9f0f430e249..8424f5cdf0b1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2116,6 +2116,32 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) return 0; } +#if defined(CONFIG_IOMMU_API) || defined(CONFIG_PCI_IOV) +static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, + int num) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + struct pnv_phb *phb = pe->phb; + long ret; + + pe_info(pe, "Removing DMA window #%d\n", num); + + ret = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, + (pe->pe_number << 1) + num, + 0/* levels */, 0/* table address */, + 0/* table size */, 0/* page size */); + if (ret) + pe_warn(pe, "Unmapping failed, ret = %ld\n", ret); + else + pnv_pci_ioda2_tce_invalidate_entire(pe); + + pnv_pci_unlink_table_and_group(table_group->tables[num], table_group); + + return ret; +} +#endif + #ifdef CONFIG_IOMMU_API static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, __u64 window_size, __u32 levels) @@ -2149,30 +2175,6 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, return bytes; } -static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, - int num) -{ - struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, - table_group); - struct pnv_phb *phb = pe->phb; - long ret; - - pe_info(pe, "Removing DMA window #%d\n", num); - - ret = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, - (pe->pe_number << 1) + num, - 0/* levels */, 0/* table address */, - 0/* table size */, 0/* page size */); - if (ret) - pe_warn(pe, "Unmapping failed, ret = %ld\n", ret); - else - pnv_pci_ioda2_tce_invalidate_entire(pe); - - pnv_pci_unlink_table_and_group(table_group->tables[num], table_group); - - return ret; -} - static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) { struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, -- cgit v1.2.3 From b4b56f9ecab40f3b4ef53e130c9f6663be491894 Mon Sep 17 00:00:00 2001 From: Sam bobroff Date: Fri, 12 Jun 2015 11:06:32 +1000 Subject: powerpc/tm: Abort syscalls in active transactions This patch changes the syscall handler to doom (tabort) active transactions when a syscall is made and return very early without performing the syscall and keeping side effects to a minimum (no CPU accounting or system call tracing is performed). Also included is a new HWCAP2 bit, PPC_FEATURE2_HTM_NOSC, to indicate this behaviour to userspace. Currently, the system call instruction automatically suspends an active transaction which causes side effects to persist when an active transaction fails. This does change the kernel's behaviour, but in a way that was documented as unsupported. It doesn't reduce functionality as syscalls will still be performed after tsuspend; it just requires that the transaction be explicitly suspended. It also provides a consistent interface and makes the behaviour of user code substantially the same across powerpc and platforms that do not support suspended transactions (e.g. x86 and s390). Performance measurements using http://ozlabs.org/~anton/junkcode/null_syscall.c indicate the cost of a normal (non-aborted) system call increases by about 0.25%. Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman --- Documentation/powerpc/transactional_memory.txt | 32 +++++++++++----------- arch/powerpc/include/asm/cputable.h | 10 ++++--- arch/powerpc/include/uapi/asm/cputable.h | 1 + arch/powerpc/include/uapi/asm/tm.h | 2 +- arch/powerpc/kernel/cputable.c | 4 ++- arch/powerpc/kernel/entry_64.S | 35 +++++++++++++++++++++++++ tools/testing/selftests/powerpc/tm/Makefile | 4 +-- tools/testing/selftests/powerpc/tm/tm-syscall.c | 3 ++- 8 files changed, 66 insertions(+), 25 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/powerpc/transactional_memory.txt b/Documentation/powerpc/transactional_memory.txt index ded69794a5c0..ba0a2a4a54ba 100644 --- a/Documentation/powerpc/transactional_memory.txt +++ b/Documentation/powerpc/transactional_memory.txt @@ -74,22 +74,23 @@ Causes of transaction aborts Syscalls ======== -Performing syscalls from within transaction is not recommended, and can lead -to unpredictable results. +Syscalls made from within an active transaction will not be performed and the +transaction will be doomed by the kernel with the failure code TM_CAUSE_SYSCALL +| TM_CAUSE_PERSISTENT. -Syscalls do not by design abort transactions, but beware: The kernel code will -not be running in transactional state. The effect of syscalls will always -remain visible, but depending on the call they may abort your transaction as a -side-effect, read soon-to-be-aborted transactional data that should not remain -invisible, etc. If you constantly retry a transaction that constantly aborts -itself by calling a syscall, you'll have a livelock & make no progress. +Syscalls made from within a suspended transaction are performed as normal and +the transaction is not explicitly doomed by the kernel. However, what the +kernel does to perform the syscall may result in the transaction being doomed +by the hardware. The syscall is performed in suspended mode so any side +effects will be persistent, independent of transaction success or failure. No +guarantees are provided by the kernel about which syscalls will affect +transaction success. -Simple syscalls (e.g. sigprocmask()) "could" be OK. Even things like write() -from, say, printf() should be OK as long as the kernel does not access any -memory that was accessed transactionally. - -Consider any syscalls that happen to work as debug-only -- not recommended for -production use. Best to queue them up till after the transaction is over. +Care must be taken when relying on syscalls to abort during active transactions +if the calls are made via a library. Libraries may cache values (which may +give the appearance of success) or perform operations that cause transaction +failure before entering the kernel (which may produce different failure codes). +Examples are glibc's getpid() and lazy symbol resolution. Signals @@ -176,8 +177,7 @@ kernel aborted a transaction: TM_CAUSE_RESCHED Thread was rescheduled. TM_CAUSE_TLBI Software TLB invalid. TM_CAUSE_FAC_UNAV FP/VEC/VSX unavailable trap. - TM_CAUSE_SYSCALL Currently unused; future syscalls that must abort - transactions for consistency will use this. + TM_CAUSE_SYSCALL Syscall from active transaction. TM_CAUSE_SIGNAL Signal delivered. TM_CAUSE_MISC Currently unused. TM_CAUSE_ALIGNMENT Alignment fault. diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 6367b8347dad..4994648b9265 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -242,11 +242,13 @@ enum { /* We only set the TM feature if the kernel was compiled with TM supprt */ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -#define CPU_FTR_TM_COMP CPU_FTR_TM -#define PPC_FEATURE2_HTM_COMP PPC_FEATURE2_HTM +#define CPU_FTR_TM_COMP CPU_FTR_TM +#define PPC_FEATURE2_HTM_COMP PPC_FEATURE2_HTM +#define PPC_FEATURE2_HTM_NOSC_COMP PPC_FEATURE2_HTM_NOSC #else -#define CPU_FTR_TM_COMP 0 -#define PPC_FEATURE2_HTM_COMP 0 +#define CPU_FTR_TM_COMP 0 +#define PPC_FEATURE2_HTM_COMP 0 +#define PPC_FEATURE2_HTM_NOSC_COMP 0 #endif /* We need to mark all pages as being coherent if we're SMP or we have a diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h index de2c0e4ee1aa..43686043e297 100644 --- a/arch/powerpc/include/uapi/asm/cputable.h +++ b/arch/powerpc/include/uapi/asm/cputable.h @@ -42,5 +42,6 @@ #define PPC_FEATURE2_ISEL 0x08000000 #define PPC_FEATURE2_TAR 0x04000000 #define PPC_FEATURE2_VEC_CRYPTO 0x02000000 +#define PPC_FEATURE2_HTM_NOSC 0x01000000 #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */ diff --git a/arch/powerpc/include/uapi/asm/tm.h b/arch/powerpc/include/uapi/asm/tm.h index 5d836b7c1176..5047659815a5 100644 --- a/arch/powerpc/include/uapi/asm/tm.h +++ b/arch/powerpc/include/uapi/asm/tm.h @@ -11,7 +11,7 @@ #define TM_CAUSE_RESCHED 0xde #define TM_CAUSE_TLBI 0xdc #define TM_CAUSE_FAC_UNAV 0xda -#define TM_CAUSE_SYSCALL 0xd8 /* future use */ +#define TM_CAUSE_SYSCALL 0xd8 #define TM_CAUSE_MISC 0xd6 /* future use */ #define TM_CAUSE_SIGNAL 0xd4 #define TM_CAUSE_ALIGNMENT 0xd2 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 60262fdf35ba..7d80bfdfb15e 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -108,7 +108,9 @@ extern void __restore_cpu_e6500(void); PPC_FEATURE_TRUE_LE | \ PPC_FEATURE_PSERIES_PERFMON_COMPAT) #define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \ - PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \ + PPC_FEATURE2_HTM_COMP | \ + PPC_FEATURE2_HTM_NOSC_COMP | \ + PPC_FEATURE2_DSCR | \ PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ PPC_FEATURE2_VEC_CRYPTO) #define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 278888e89acc..579e0f9a2d57 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -34,6 +34,7 @@ #include #include #include +#include /* * System calls. @@ -51,6 +52,12 @@ exception_marker: .globl system_call_common system_call_common: +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ + bne tabort_syscall +END_FTR_SECTION_IFSET(CPU_FTR_TM) +#endif andi. r10,r12,MSR_PR mr r10,r1 addi r1,r1,-INT_FRAME_SIZE @@ -311,6 +318,34 @@ syscall_exit_work: bl do_syscall_trace_leave b ret_from_except +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +tabort_syscall: + /* Firstly we need to enable TM in the kernel */ + mfmsr r10 + li r13, 1 + rldimi r10, r13, MSR_TM_LG, 63-MSR_TM_LG + mtmsrd r10, 0 + + /* tabort, this dooms the transaction, nothing else */ + li r13, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT) + TABORT(R13) + + /* + * Return directly to userspace. We have corrupted user register state, + * but userspace will never see that register state. Execution will + * resume after the tbegin of the aborted transaction with the + * checkpointed register state. + */ + li r13, MSR_RI + andc r10, r10, r13 + mtmsrd r10, 1 + mtspr SPRN_SRR0, r11 + mtspr SPRN_SRR1, r12 + + rfid + b . /* prevent speculative execution */ +#endif + /* Save non-volatile GPRs, if not already saved. */ _GLOBAL(save_nvgprs) ld r11,_TRAP(r1) diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 6bff955e1d55..4bea62a319dc 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -1,11 +1,11 @@ -TEST_PROGS := tm-resched-dscr +TEST_PROGS := tm-resched-dscr tm-syscall all: $(TEST_PROGS) $(TEST_PROGS): ../harness.c tm-syscall: tm-syscall-asm.S -tm-syscall: CFLAGS += -mhtm +tm-syscall: CFLAGS += -mhtm -I../../../../../usr/include include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c index 3ed8d4b252fa..1276e23da63b 100644 --- a/tools/testing/selftests/powerpc/tm/tm-syscall.c +++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c @@ -82,7 +82,8 @@ int tm_syscall(void) unsigned count = 0; struct timeval end, now; - SKIP_IF(!((long)get_auxv_entry(AT_HWCAP2) & PPC_FEATURE2_HTM)); + SKIP_IF(!((long)get_auxv_entry(AT_HWCAP2) + & PPC_FEATURE2_HTM_NOSC)); setbuf(stdout, NULL); printf("Testing transactional syscalls for %d seconds...\n", TEST_DURATION); -- cgit v1.2.3 From 02505cebc1db06707d31635e9c9b342d58f887b7 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 17 Jun 2015 08:13:40 +0530 Subject: powerpc/mm: PTE_RPN_MAX is not used, remove the same Remove the unused #define Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pte-common.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index c5a755ef7011..b7c8d079c121 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -85,10 +85,8 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void); * 64-bit PTEs */ #if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) -#define PTE_RPN_MAX (1ULL << (64 - PTE_RPN_SHIFT)) #define PTE_RPN_MASK (~((1ULL< Date: Wed, 17 Jun 2015 08:13:41 +0530 Subject: powerpc/mm: Change the swap encoding in pte. Current swap encoding in pte can't support large pfns above 4TB. Change the swap encoding such that we put the swap type in the PTE bits. Also add build checks to make sure we don't overlap with HPTEFLAGS. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pgtable-ppc64.h | 26 +++++++++++++++++++++----- arch/powerpc/include/asm/pte-book3e.h | 1 + arch/powerpc/include/asm/pte-hash64.h | 1 + 3 files changed, 23 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index f951d9cf358a..f890f7ce1593 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -347,11 +347,27 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) /* Encode and de-code a swap entry */ -#define __swp_type(entry) (((entry).val >> 1) & 0x3f) -#define __swp_offset(entry) ((entry).val >> 8) -#define __swp_entry(type, offset) ((swp_entry_t){((type)<< 1)|((offset)<<8)}) -#define __pte_to_swp_entry(pte) ((swp_entry_t){pte_val(pte) >> PTE_RPN_SHIFT}) -#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_RPN_SHIFT }) +#define MAX_SWAPFILES_CHECK() do { \ + BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \ + /* \ + * Don't have overlapping bits with _PAGE_HPTEFLAGS \ + * We filter HPTEFLAGS on set_pte. \ + */ \ + BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \ + } while (0) +/* + * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT; + */ +#define SWP_TYPE_BITS 5 +#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \ + & ((1UL << SWP_TYPE_BITS) - 1)) +#define __swp_offset(x) ((x).val >> PTE_RPN_SHIFT) +#define __swp_entry(type, offset) ((swp_entry_t) { \ + ((type) << _PAGE_BIT_SWAP_TYPE) \ + | ((offset) << PTE_RPN_SHIFT) }) + +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) +#define __swp_entry_to_pte(x) __pte((x).val) void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); void pgtable_cache_init(void); diff --git a/arch/powerpc/include/asm/pte-book3e.h b/arch/powerpc/include/asm/pte-book3e.h index 91a704952ca1..8d8473278d91 100644 --- a/arch/powerpc/include/asm/pte-book3e.h +++ b/arch/powerpc/include/asm/pte-book3e.h @@ -11,6 +11,7 @@ /* Architected bits */ #define _PAGE_PRESENT 0x000001 /* software: pte contains a translation */ #define _PAGE_SW1 0x000002 +#define _PAGE_BIT_SWAP_TYPE 2 #define _PAGE_BAP_SR 0x000004 #define _PAGE_BAP_UR 0x000008 #define _PAGE_BAP_SW 0x000010 diff --git a/arch/powerpc/include/asm/pte-hash64.h b/arch/powerpc/include/asm/pte-hash64.h index fc852f7e7b3a..ef612c160da7 100644 --- a/arch/powerpc/include/asm/pte-hash64.h +++ b/arch/powerpc/include/asm/pte-hash64.h @@ -16,6 +16,7 @@ */ #define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */ #define _PAGE_USER 0x0002 /* matches one of the PP bits */ +#define _PAGE_BIT_SWAP_TYPE 2 #define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */ #define _PAGE_GUARDED 0x0008 /* We can derive Memory coherence from _PAGE_NO_CACHE */ -- cgit v1.2.3 From 5c89a87d13d168eacb8110810544a98ce0f4319d Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 18 Jun 2015 11:41:36 +1000 Subject: powerpc/powernv: Fix wrong IOMMU table in pnv_ioda_setup_bus_dma() When pnv_pci_ioda_fixup() is called during PHB fixup time, each PE in the sorted list of PEs (phb::pe_dma_list) is iterated to setup the PE's DMA32 space by pnv_ioda_setup_bus_dma() if the PE's DMA32 weight is bigger than zero. The function also assigns all the subordinate PCI devices of the PE's primary bus with the PE's DMA32 IOMMU table. It causes the PCI devicess in the child PEs, which don't have DMA weight, receives wrong IOMMU table and then IOMMU group. The patch fixes above issue by more check on the PE's coverage and don't assign IOMMU table to those PCI devices, which belong to the child PEs. The problem was found on Firestone platform initially. Suggested-by: Gavin Shan Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 8424f5cdf0b1..5738d315248b 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1661,7 +1661,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); iommu_add_device(&dev->dev); - if (dev->subordinate) + if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) pnv_ioda_setup_bus_dma(pe, dev->subordinate); } } -- cgit v1.2.3 From dc3f4198eac14e52a98dfc79cd84b45e280f59cd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 18 May 2015 10:10:34 -0400 Subject: make simple_positive() public Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- arch/s390/hypfs/inode.c | 7 +------ drivers/block/drbd/drbd_debugfs.c | 10 +--------- drivers/infiniband/hw/ipath/ipath_fs.c | 2 +- drivers/infiniband/hw/qib/qib_fs.c | 2 +- fs/autofs4/autofs_i.h | 5 ----- fs/ceph/dir.c | 2 +- fs/configfs/inode.c | 2 +- fs/debugfs/inode.c | 11 +++-------- fs/libfs.c | 5 ----- fs/nfs/dir.c | 2 +- fs/tracefs/inode.c | 11 +++-------- include/linux/dcache.h | 5 +++++ security/inode.c | 19 ++++++------------- 14 files changed, 25 insertions(+), 60 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 1ba6307be4db..11634fa7ab3c 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -166,7 +166,7 @@ static void spufs_prune_dir(struct dentry *dir) mutex_lock(&d_inode(dir)->i_mutex); list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) { spin_lock(&dentry->d_lock); - if (!(d_unhashed(dentry)) && d_really_is_positive(dentry)) { + if (simple_positive(dentry)) { dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index d3f896a35b98..8ffad5437232 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -62,18 +62,13 @@ static void hypfs_add_dentry(struct dentry *dentry) hypfs_last_dentry = dentry; } -static inline int hypfs_positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - static void hypfs_remove(struct dentry *dentry) { struct dentry *parent; parent = dentry->d_parent; mutex_lock(&d_inode(parent)->i_mutex); - if (hypfs_positive(dentry)) { + if (simple_positive(dentry)) { if (d_is_dir(dentry)) simple_rmdir(d_inode(parent), dentry); else diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c index a6ee3d750c30..6b88a35fb048 100644 --- a/drivers/block/drbd/drbd_debugfs.c +++ b/drivers/block/drbd/drbd_debugfs.c @@ -419,14 +419,6 @@ static int in_flight_summary_show(struct seq_file *m, void *pos) return 0; } -/* simple_positive(file->f_path.dentry) respectively debugfs_positive(), - * but neither is "reachable" from here. - * So we have our own inline version of it above. :-( */ -static inline int debugfs_positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - /* make sure at *open* time that the respective object won't go away. */ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, void *), void *data, struct kref *kref, @@ -444,7 +436,7 @@ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, vo /* serialize with d_delete() */ mutex_lock(&d_inode(parent)->i_mutex); /* Make sure the object is still alive */ - if (debugfs_positive(file->f_path.dentry) + if (simple_positive(file->f_path.dentry) && kref_get_unless_zero(kref)) ret = 0; mutex_unlock(&d_inode(parent)->i_mutex); diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 1ca8e32a9592..25422a3a7238 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -277,7 +277,7 @@ static int remove_file(struct dentry *parent, char *name) } spin_lock(&tmp->d_lock); - if (!d_unhashed(tmp) && d_really_is_positive(tmp)) { + if (simple_positive(tmp)) { dget_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index bdd5d3857203..13ef22bd9459 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -455,7 +455,7 @@ static int remove_file(struct dentry *parent, char *name) } spin_lock(&tmp->d_lock); - if (!d_unhashed(tmp) && d_really_is_positive(tmp)) { + if (simple_positive(tmp)) { __d_drop(tmp); spin_unlock(&tmp->d_lock); simple_unlink(d_inode(parent), tmp); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 5b700ef1e59d..c37149b929be 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -238,11 +238,6 @@ static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi) return d_inode(sbi->sb->s_root)->i_ino; } -static inline int simple_positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - static inline void __autofs4_add_expiring(struct dentry *dentry) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 4248307fea90..edbb8da02a6a 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -161,7 +161,7 @@ more: } spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); if (di->lease_shared_gen == shared_gen && - !d_unhashed(dentry) && d_really_is_positive(dentry) && + simple_positive(dentry) && ceph_snap(d_inode(dentry)) != CEPH_SNAPDIR && ceph_ino(d_inode(dentry)) != CEPH_INO_CEPH && fpos_cmp(ctx->pos, di->offset) <= 0) diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 8d89f5fd0331..eae87575e681 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -236,7 +236,7 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) if (dentry) { spin_lock(&dentry->d_lock); - if (!d_unhashed(dentry) && d_really_is_positive(dentry)) { + if (simple_positive(dentry)) { dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 7eaec88ea970..ef86ad6bdc3e 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -44,11 +44,6 @@ static struct inode *debugfs_get_inode(struct super_block *sb) return inode; } -static inline int debugfs_positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - struct debugfs_mount_opts { kuid_t uid; kgid_t gid; @@ -522,7 +517,7 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent) { int ret = 0; - if (debugfs_positive(dentry)) { + if (simple_positive(dentry)) { dget(dentry); if (d_is_dir(dentry)) ret = simple_rmdir(d_inode(parent), dentry); @@ -602,7 +597,7 @@ void debugfs_remove_recursive(struct dentry *dentry) */ spin_lock(&parent->d_lock); list_for_each_entry(child, &parent->d_subdirs, d_child) { - if (!debugfs_positive(child)) + if (!simple_positive(child)) continue; /* perhaps simple_empty(child) makes more sense */ @@ -623,7 +618,7 @@ void debugfs_remove_recursive(struct dentry *dentry) * from d_subdirs. When releasing the parent->d_lock we can * no longer trust that the next pointer is valid. * Restart the loop. We'll skip this one with the - * debugfs_positive() check. + * simple_positive() check. */ goto loop; } diff --git a/fs/libfs.c b/fs/libfs.c index 65e1feca8b98..4d9e6c118fe1 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -20,11 +20,6 @@ #include "internal.h" -static inline int simple_positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b2c8b31b2be7..b9108f4254a7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1771,7 +1771,7 @@ EXPORT_SYMBOL_GPL(nfs_mkdir); static void nfs_dentry_handle_enoent(struct dentry *dentry) { - if (d_really_is_positive(dentry) && !d_unhashed(dentry)) + if (simple_positive(dentry)) d_delete(dentry); } diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index d92bdf3b079a..6e8a1400d662 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -496,16 +496,11 @@ struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *pare return dentry; } -static inline int tracefs_positive(struct dentry *dentry) -{ - return dentry->d_inode && !d_unhashed(dentry); -} - static int __tracefs_remove(struct dentry *dentry, struct dentry *parent) { int ret = 0; - if (tracefs_positive(dentry)) { + if (simple_positive(dentry)) { if (dentry->d_inode) { dget(dentry); switch (dentry->d_inode->i_mode & S_IFMT) { @@ -582,7 +577,7 @@ void tracefs_remove_recursive(struct dentry *dentry) */ spin_lock(&parent->d_lock); list_for_each_entry(child, &parent->d_subdirs, d_child) { - if (!tracefs_positive(child)) + if (!simple_positive(child)) continue; /* perhaps simple_empty(child) makes more sense */ @@ -603,7 +598,7 @@ void tracefs_remove_recursive(struct dentry *dentry) * from d_subdirs. When releasing the parent->d_lock we can * no longer trust that the next pointer is valid. * Restart the loop. We'll skip this one with the - * tracefs_positive() check. + * simple_positive() check. */ goto loop; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 167ec0934049..d2d50249b7b2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -507,6 +507,11 @@ static inline bool d_really_is_positive(const struct dentry *dentry) return dentry->d_inode != NULL; } +static inline int simple_positive(struct dentry *dentry) +{ + return d_really_is_positive(dentry) && !d_unhashed(dentry); +} + extern void d_set_fallthru(struct dentry *dentry); static inline bool d_is_fallthru(const struct dentry *dentry) diff --git a/security/inode.c b/security/inode.c index 91503b79c5f8..6df0d8dae1e0 100644 --- a/security/inode.c +++ b/security/inode.c @@ -25,11 +25,6 @@ static struct vfsmount *mount; static int mount_count; -static inline int positive(struct dentry *dentry) -{ - return d_really_is_positive(dentry) && !d_unhashed(dentry); -} - static int fill_super(struct super_block *sb, void *data, int silent) { static struct tree_descr files[] = {{""}}; @@ -201,14 +196,12 @@ void securityfs_remove(struct dentry *dentry) return; mutex_lock(&d_inode(parent)->i_mutex); - if (positive(dentry)) { - if (d_really_is_positive(dentry)) { - if (d_is_dir(dentry)) - simple_rmdir(d_inode(parent), dentry); - else - simple_unlink(d_inode(parent), dentry); - dput(dentry); - } + if (simple_positive(dentry)) { + if (d_is_dir(dentry)) + simple_rmdir(d_inode(parent), dentry); + else + simple_unlink(d_inode(parent), dentry); + dput(dentry); } mutex_unlock(&d_inode(parent)->i_mutex); simple_release_fs(&mount, &mount_count); -- cgit v1.2.3 From 5935877af47653d737d2401f9b6200f1fb4a03eb Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 24 Jun 2015 16:54:43 -0700 Subject: powerpc: use for_each_sg() This replaces the plain loop over the sglist array with for_each_sg() macro which consists of sg_next() function calls. Since powerpc does select ARCH_HAS_SG_CHAIN, it is necessary to use for_each_sg() in order to loop over each sg element. This also help find problems with drivers that do not properly initialize their sg tables when CONFIG_DEBUG_SG is enabled. Signed-off-by: Akinobu Mita Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/vio.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index b41426c60ef6..5f8dcdaa2820 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -557,11 +557,11 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, struct vio_dev *viodev = to_vio_dev(dev); struct iommu_table *tbl; struct scatterlist *sgl; - int ret, count = 0; + int ret, count; size_t alloc_size = 0; tbl = get_iommu_table_base(dev); - for (sgl = sglist; count < nelems; count++, sgl++) + for_each_sg(sglist, sgl, nelems, count) alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl)); if (vio_cmo_alloc(viodev, alloc_size)) { @@ -577,7 +577,7 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, return ret; } - for (sgl = sglist, count = 0; count < ret; count++, sgl++) + for_each_sg(sglist, sgl, ret, count) alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl)); if (alloc_size) vio_cmo_dealloc(viodev, alloc_size); @@ -594,10 +594,10 @@ static void vio_dma_iommu_unmap_sg(struct device *dev, struct iommu_table *tbl; struct scatterlist *sgl; size_t alloc_size = 0; - int count = 0; + int count; tbl = get_iommu_table_base(dev); - for (sgl = sglist; count < nelems; count++, sgl++) + for_each_sg(sglist, sgl, nelems, count) alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl)); dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs); -- cgit v1.2.3 From e81f2d22370f8231cb7f13f454bcc8c0eb4e23f2 Mon Sep 17 00:00:00 2001 From: Zhang Zhen Date: Wed, 24 Jun 2015 16:56:13 -0700 Subject: mm/hugetlb: reduce arch dependent code about huge_pmd_unshare Currently we have many duplicates in definitions of huge_pmd_unshare. In all architectures this function just returns 0 when CONFIG_ARCH_WANT_HUGE_PMD_SHARE is N. This patch puts the default implementation in mm/hugetlb.c and lets these architectures use the common code. Signed-off-by: Zhang Zhen Cc: Russell King Cc: Catalin Marinas Cc: Tony Luck Cc: James Hogan Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: Chris Metcalf Cc: David Rientjes Cc: James Yang Cc: Aneesh Kumar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/hugetlbpage.c | 5 ----- arch/arm64/mm/hugetlbpage.c | 7 ------- arch/ia64/mm/hugetlbpage.c | 5 ----- arch/metag/mm/hugetlbpage.c | 5 ----- arch/mips/mm/hugetlbpage.c | 5 ----- arch/powerpc/mm/hugetlbpage.c | 5 ----- arch/s390/mm/hugetlbpage.c | 5 ----- arch/sh/mm/hugetlbpage.c | 5 ----- arch/sparc/mm/hugetlbpage.c | 5 ----- arch/tile/mm/hugetlbpage.c | 5 ----- mm/hugetlb.c | 5 +++++ 11 files changed, 5 insertions(+), 52 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c index c72412415093..fcafb521f14e 100644 --- a/arch/arm/mm/hugetlbpage.c +++ b/arch/arm/mm/hugetlbpage.c @@ -41,11 +41,6 @@ int pud_huge(pud_t pud) return 0; } -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - return 0; -} - int pmd_huge(pmd_t pmd) { return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 2de9d2e59d96..cccc4af87a03 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -31,13 +31,6 @@ #include #include -#ifndef CONFIG_ARCH_WANT_HUGE_PMD_SHARE -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - return 0; -} -#endif - int pmd_huge(pmd_t pmd) { return !(pmd_val(pmd) & PMD_TABLE_BIT); diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 52b7604b5215..f50d4b3f501a 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -65,11 +65,6 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr) return pte; } -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - return 0; -} - #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } /* diff --git a/arch/metag/mm/hugetlbpage.c b/arch/metag/mm/hugetlbpage.c index 7ca80ac42ed5..53f0f6c47027 100644 --- a/arch/metag/mm/hugetlbpage.c +++ b/arch/metag/mm/hugetlbpage.c @@ -89,11 +89,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return pte; } -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - return 0; -} - int pmd_huge(pmd_t pmd) { return pmd_page_shift(pmd) > PAGE_SHIFT; diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c index 06e0f421b41b..74aa6f62468f 100644 --- a/arch/mips/mm/hugetlbpage.c +++ b/arch/mips/mm/hugetlbpage.c @@ -51,11 +51,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return (pte_t *) pmd; } -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - return 0; -} - /* * This function checks for proper alignment of input addr and len parameters. */ diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 3385e3d0506e..38bd5d998c81 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -439,11 +439,6 @@ int alloc_bootmem_huge_page(struct hstate *hstate) } #endif -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - return 0; -} - #ifdef CONFIG_PPC_FSL_BOOK3E #define HUGEPD_FREELIST_SIZE \ ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index e617e74b7be2..c3f8e3df92ff 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -193,11 +193,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return (pte_t *) pmdp; } -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - return 0; -} - int pmd_huge(pmd_t pmd) { if (!MACHINE_HAS_HPAGE) diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index 534bc978af8a..6385f60209b6 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c @@ -62,11 +62,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return pte; } -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - return 0; -} - int pmd_huge(pmd_t pmd) { return 0; diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 4242eab12e10..131eaf4ad7f5 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -172,11 +172,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return pte; } -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - return 0; -} - void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c index 8416240c322c..c034dc3fe2d4 100644 --- a/arch/tile/mm/hugetlbpage.c +++ b/arch/tile/mm/hugetlbpage.c @@ -160,11 +160,6 @@ int pud_huge(pud_t pud) return !!(pud_val(pud) & _PAGE_HUGE_PAGE); } -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - return 0; -} - #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr, unsigned long len, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 271e4432734c..716465ae57aa 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3789,6 +3789,11 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) { return NULL; } + +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} #define want_pmd_share() (0) #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ -- cgit v1.2.3 From 2ae416b142b625c58c9ccb039aa3ef48ad0e9bae Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Wed, 24 Jun 2015 16:56:16 -0700 Subject: mm: new mm hook framework CRIU is recreating the process memory layout by remapping the checkpointee memory area on top of the current process (criu). This includes remapping the vDSO to the place it has at checkpoint time. However some architectures like powerpc are keeping a reference to the vDSO base address to build the signal return stack frame by calling the vDSO sigreturn service. So once the vDSO has been moved, this reference is no more valid and the signal frame built later are not usable. This patch serie is introducing a new mm hook framework, and a new arch_remap hook which is called when mremap is done and the mm lock still hold. The next patch is adding the vDSO remap and unmap tracking to the powerpc architecture. This patch (of 3): This patch introduces a new set of header file to manage mm hooks: - per architecture empty header file (arch/x/include/asm/mm-arch-hooks.h) - a generic header (include/linux/mm-arch-hooks.h) The architecture which need to overwrite a hook as to redefine it in its header file, while architecture which doesn't need have nothing to do. The default hooks are defined in the generic header and are used in the case the architecture is not defining it. In a next step, mm hooks defined in include/asm-generic/mm_hooks.h should be moved here. Signed-off-by: Laurent Dufour Suggested-by: Andrew Morton Cc: "Kirill A. Shutemov" Cc: Hugh Dickins Cc: Rik van Riel Cc: Mel Gorman Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/arc/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/arm/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/arm64/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/avr32/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/blackfin/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/c6x/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/cris/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/frv/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/hexagon/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/ia64/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/m32r/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/m68k/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/metag/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/microblaze/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/mips/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/mn10300/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/nios2/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/openrisc/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/parisc/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/powerpc/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/s390/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/score/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/sh/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/sparc/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/tile/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/um/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/unicore32/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/x86/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ arch/xtensa/include/asm/mm-arch-hooks.h | 15 +++++++++++++++ include/linux/mm-arch-hooks.h | 16 ++++++++++++++++ 31 files changed, 466 insertions(+) create mode 100644 arch/alpha/include/asm/mm-arch-hooks.h create mode 100644 arch/arc/include/asm/mm-arch-hooks.h create mode 100644 arch/arm/include/asm/mm-arch-hooks.h create mode 100644 arch/arm64/include/asm/mm-arch-hooks.h create mode 100644 arch/avr32/include/asm/mm-arch-hooks.h create mode 100644 arch/blackfin/include/asm/mm-arch-hooks.h create mode 100644 arch/c6x/include/asm/mm-arch-hooks.h create mode 100644 arch/cris/include/asm/mm-arch-hooks.h create mode 100644 arch/frv/include/asm/mm-arch-hooks.h create mode 100644 arch/hexagon/include/asm/mm-arch-hooks.h create mode 100644 arch/ia64/include/asm/mm-arch-hooks.h create mode 100644 arch/m32r/include/asm/mm-arch-hooks.h create mode 100644 arch/m68k/include/asm/mm-arch-hooks.h create mode 100644 arch/metag/include/asm/mm-arch-hooks.h create mode 100644 arch/microblaze/include/asm/mm-arch-hooks.h create mode 100644 arch/mips/include/asm/mm-arch-hooks.h create mode 100644 arch/mn10300/include/asm/mm-arch-hooks.h create mode 100644 arch/nios2/include/asm/mm-arch-hooks.h create mode 100644 arch/openrisc/include/asm/mm-arch-hooks.h create mode 100644 arch/parisc/include/asm/mm-arch-hooks.h create mode 100644 arch/powerpc/include/asm/mm-arch-hooks.h create mode 100644 arch/s390/include/asm/mm-arch-hooks.h create mode 100644 arch/score/include/asm/mm-arch-hooks.h create mode 100644 arch/sh/include/asm/mm-arch-hooks.h create mode 100644 arch/sparc/include/asm/mm-arch-hooks.h create mode 100644 arch/tile/include/asm/mm-arch-hooks.h create mode 100644 arch/um/include/asm/mm-arch-hooks.h create mode 100644 arch/unicore32/include/asm/mm-arch-hooks.h create mode 100644 arch/x86/include/asm/mm-arch-hooks.h create mode 100644 arch/xtensa/include/asm/mm-arch-hooks.h create mode 100644 include/linux/mm-arch-hooks.h (limited to 'arch/powerpc') diff --git a/arch/alpha/include/asm/mm-arch-hooks.h b/arch/alpha/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..b07fd862fec3 --- /dev/null +++ b/arch/alpha/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_ALPHA_MM_ARCH_HOOKS_H +#define _ASM_ALPHA_MM_ARCH_HOOKS_H + +#endif /* _ASM_ALPHA_MM_ARCH_HOOKS_H */ diff --git a/arch/arc/include/asm/mm-arch-hooks.h b/arch/arc/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..c37541c5f8ba --- /dev/null +++ b/arch/arc/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_ARC_MM_ARCH_HOOKS_H +#define _ASM_ARC_MM_ARCH_HOOKS_H + +#endif /* _ASM_ARC_MM_ARCH_HOOKS_H */ diff --git a/arch/arm/include/asm/mm-arch-hooks.h b/arch/arm/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..7056660c7cc4 --- /dev/null +++ b/arch/arm/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_ARM_MM_ARCH_HOOKS_H +#define _ASM_ARM_MM_ARCH_HOOKS_H + +#endif /* _ASM_ARM_MM_ARCH_HOOKS_H */ diff --git a/arch/arm64/include/asm/mm-arch-hooks.h b/arch/arm64/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..562b655f5ba9 --- /dev/null +++ b/arch/arm64/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_ARM64_MM_ARCH_HOOKS_H +#define _ASM_ARM64_MM_ARCH_HOOKS_H + +#endif /* _ASM_ARM64_MM_ARCH_HOOKS_H */ diff --git a/arch/avr32/include/asm/mm-arch-hooks.h b/arch/avr32/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..145452ffbdad --- /dev/null +++ b/arch/avr32/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_AVR32_MM_ARCH_HOOKS_H +#define _ASM_AVR32_MM_ARCH_HOOKS_H + +#endif /* _ASM_AVR32_MM_ARCH_HOOKS_H */ diff --git a/arch/blackfin/include/asm/mm-arch-hooks.h b/arch/blackfin/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..1c5211ec338f --- /dev/null +++ b/arch/blackfin/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_BLACKFIN_MM_ARCH_HOOKS_H +#define _ASM_BLACKFIN_MM_ARCH_HOOKS_H + +#endif /* _ASM_BLACKFIN_MM_ARCH_HOOKS_H */ diff --git a/arch/c6x/include/asm/mm-arch-hooks.h b/arch/c6x/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..bb3c4a6ce8e9 --- /dev/null +++ b/arch/c6x/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_C6X_MM_ARCH_HOOKS_H +#define _ASM_C6X_MM_ARCH_HOOKS_H + +#endif /* _ASM_C6X_MM_ARCH_HOOKS_H */ diff --git a/arch/cris/include/asm/mm-arch-hooks.h b/arch/cris/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..314f774db2b0 --- /dev/null +++ b/arch/cris/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_CRIS_MM_ARCH_HOOKS_H +#define _ASM_CRIS_MM_ARCH_HOOKS_H + +#endif /* _ASM_CRIS_MM_ARCH_HOOKS_H */ diff --git a/arch/frv/include/asm/mm-arch-hooks.h b/arch/frv/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..51d13a870404 --- /dev/null +++ b/arch/frv/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_FRV_MM_ARCH_HOOKS_H +#define _ASM_FRV_MM_ARCH_HOOKS_H + +#endif /* _ASM_FRV_MM_ARCH_HOOKS_H */ diff --git a/arch/hexagon/include/asm/mm-arch-hooks.h b/arch/hexagon/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..05e8b939e416 --- /dev/null +++ b/arch/hexagon/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_HEXAGON_MM_ARCH_HOOKS_H +#define _ASM_HEXAGON_MM_ARCH_HOOKS_H + +#endif /* _ASM_HEXAGON_MM_ARCH_HOOKS_H */ diff --git a/arch/ia64/include/asm/mm-arch-hooks.h b/arch/ia64/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..ab4b5c698322 --- /dev/null +++ b/arch/ia64/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_IA64_MM_ARCH_HOOKS_H +#define _ASM_IA64_MM_ARCH_HOOKS_H + +#endif /* _ASM_IA64_MM_ARCH_HOOKS_H */ diff --git a/arch/m32r/include/asm/mm-arch-hooks.h b/arch/m32r/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..6d60b4750f41 --- /dev/null +++ b/arch/m32r/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_M32R_MM_ARCH_HOOKS_H +#define _ASM_M32R_MM_ARCH_HOOKS_H + +#endif /* _ASM_M32R_MM_ARCH_HOOKS_H */ diff --git a/arch/m68k/include/asm/mm-arch-hooks.h b/arch/m68k/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..7e8709bc90ae --- /dev/null +++ b/arch/m68k/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_M68K_MM_ARCH_HOOKS_H +#define _ASM_M68K_MM_ARCH_HOOKS_H + +#endif /* _ASM_M68K_MM_ARCH_HOOKS_H */ diff --git a/arch/metag/include/asm/mm-arch-hooks.h b/arch/metag/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..b0072b2eb0de --- /dev/null +++ b/arch/metag/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_METAG_MM_ARCH_HOOKS_H +#define _ASM_METAG_MM_ARCH_HOOKS_H + +#endif /* _ASM_METAG_MM_ARCH_HOOKS_H */ diff --git a/arch/microblaze/include/asm/mm-arch-hooks.h b/arch/microblaze/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..5c4065911bda --- /dev/null +++ b/arch/microblaze/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_MICROBLAZE_MM_ARCH_HOOKS_H +#define _ASM_MICROBLAZE_MM_ARCH_HOOKS_H + +#endif /* _ASM_MICROBLAZE_MM_ARCH_HOOKS_H */ diff --git a/arch/mips/include/asm/mm-arch-hooks.h b/arch/mips/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..b5609fe8e475 --- /dev/null +++ b/arch/mips/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_MIPS_MM_ARCH_HOOKS_H +#define _ASM_MIPS_MM_ARCH_HOOKS_H + +#endif /* _ASM_MIPS_MM_ARCH_HOOKS_H */ diff --git a/arch/mn10300/include/asm/mm-arch-hooks.h b/arch/mn10300/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..e2029a652f4c --- /dev/null +++ b/arch/mn10300/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_MN10300_MM_ARCH_HOOKS_H +#define _ASM_MN10300_MM_ARCH_HOOKS_H + +#endif /* _ASM_MN10300_MM_ARCH_HOOKS_H */ diff --git a/arch/nios2/include/asm/mm-arch-hooks.h b/arch/nios2/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..d7290dc68558 --- /dev/null +++ b/arch/nios2/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_NIOS2_MM_ARCH_HOOKS_H +#define _ASM_NIOS2_MM_ARCH_HOOKS_H + +#endif /* _ASM_NIOS2_MM_ARCH_HOOKS_H */ diff --git a/arch/openrisc/include/asm/mm-arch-hooks.h b/arch/openrisc/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..6d33cb555fe1 --- /dev/null +++ b/arch/openrisc/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_OPENRISC_MM_ARCH_HOOKS_H +#define _ASM_OPENRISC_MM_ARCH_HOOKS_H + +#endif /* _ASM_OPENRISC_MM_ARCH_HOOKS_H */ diff --git a/arch/parisc/include/asm/mm-arch-hooks.h b/arch/parisc/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..654ec63b0ee9 --- /dev/null +++ b/arch/parisc/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_PARISC_MM_ARCH_HOOKS_H +#define _ASM_PARISC_MM_ARCH_HOOKS_H + +#endif /* _ASM_PARISC_MM_ARCH_HOOKS_H */ diff --git a/arch/powerpc/include/asm/mm-arch-hooks.h b/arch/powerpc/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..63091a19de9f --- /dev/null +++ b/arch/powerpc/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_POWERPC_MM_ARCH_HOOKS_H +#define _ASM_POWERPC_MM_ARCH_HOOKS_H + +#endif /* _ASM_POWERPC_MM_ARCH_HOOKS_H */ diff --git a/arch/s390/include/asm/mm-arch-hooks.h b/arch/s390/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..07680b2f3c59 --- /dev/null +++ b/arch/s390/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_S390_MM_ARCH_HOOKS_H +#define _ASM_S390_MM_ARCH_HOOKS_H + +#endif /* _ASM_S390_MM_ARCH_HOOKS_H */ diff --git a/arch/score/include/asm/mm-arch-hooks.h b/arch/score/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..5e38689f189a --- /dev/null +++ b/arch/score/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_SCORE_MM_ARCH_HOOKS_H +#define _ASM_SCORE_MM_ARCH_HOOKS_H + +#endif /* _ASM_SCORE_MM_ARCH_HOOKS_H */ diff --git a/arch/sh/include/asm/mm-arch-hooks.h b/arch/sh/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..18087298b728 --- /dev/null +++ b/arch/sh/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_SH_MM_ARCH_HOOKS_H +#define _ASM_SH_MM_ARCH_HOOKS_H + +#endif /* _ASM_SH_MM_ARCH_HOOKS_H */ diff --git a/arch/sparc/include/asm/mm-arch-hooks.h b/arch/sparc/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..b89ba44c16f1 --- /dev/null +++ b/arch/sparc/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_SPARC_MM_ARCH_HOOKS_H +#define _ASM_SPARC_MM_ARCH_HOOKS_H + +#endif /* _ASM_SPARC_MM_ARCH_HOOKS_H */ diff --git a/arch/tile/include/asm/mm-arch-hooks.h b/arch/tile/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..d1709ea774f7 --- /dev/null +++ b/arch/tile/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_TILE_MM_ARCH_HOOKS_H +#define _ASM_TILE_MM_ARCH_HOOKS_H + +#endif /* _ASM_TILE_MM_ARCH_HOOKS_H */ diff --git a/arch/um/include/asm/mm-arch-hooks.h b/arch/um/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..a7c8b0dfdd4e --- /dev/null +++ b/arch/um/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_UM_MM_ARCH_HOOKS_H +#define _ASM_UM_MM_ARCH_HOOKS_H + +#endif /* _ASM_UM_MM_ARCH_HOOKS_H */ diff --git a/arch/unicore32/include/asm/mm-arch-hooks.h b/arch/unicore32/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..4d79a850c509 --- /dev/null +++ b/arch/unicore32/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_UNICORE32_MM_ARCH_HOOKS_H +#define _ASM_UNICORE32_MM_ARCH_HOOKS_H + +#endif /* _ASM_UNICORE32_MM_ARCH_HOOKS_H */ diff --git a/arch/x86/include/asm/mm-arch-hooks.h b/arch/x86/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..4e881a342236 --- /dev/null +++ b/arch/x86/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_X86_MM_ARCH_HOOKS_H +#define _ASM_X86_MM_ARCH_HOOKS_H + +#endif /* _ASM_X86_MM_ARCH_HOOKS_H */ diff --git a/arch/xtensa/include/asm/mm-arch-hooks.h b/arch/xtensa/include/asm/mm-arch-hooks.h new file mode 100644 index 000000000000..d2e5cfd3dd02 --- /dev/null +++ b/arch/xtensa/include/asm/mm-arch-hooks.h @@ -0,0 +1,15 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_XTENSA_MM_ARCH_HOOKS_H +#define _ASM_XTENSA_MM_ARCH_HOOKS_H + +#endif /* _ASM_XTENSA_MM_ARCH_HOOKS_H */ diff --git a/include/linux/mm-arch-hooks.h b/include/linux/mm-arch-hooks.h new file mode 100644 index 000000000000..63005e367abd --- /dev/null +++ b/include/linux/mm-arch-hooks.h @@ -0,0 +1,16 @@ +/* + * Generic mm no-op hooks. + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _LINUX_MM_ARCH_HOOKS_H +#define _LINUX_MM_ARCH_HOOKS_H + +#include + +#endif /* _LINUX_MM_ARCH_HOOKS_H */ -- cgit v1.2.3 From 83d3f0e90c6c8f833e3da91917c243a916fda69e Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Wed, 24 Jun 2015 16:56:22 -0700 Subject: powerpc/mm: tracking vDSO remap Some processes (CRIU) are moving the vDSO area using the mremap system call. As a consequence the kernel reference to the vDSO base address is no more valid and the signal return frame built once the vDSO has been moved is not pointing to the new sigreturn address. This patch handles vDSO remapping and unmapping. Signed-off-by: Laurent Dufour Reviewed-by: Ingo Molnar Cc: "Kirill A. Shutemov" Cc: Hugh Dickins Cc: Rik van Riel Cc: Mel Gorman Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/mm-arch-hooks.h | 13 +++++++++++++ arch/powerpc/include/asm/mmu_context.h | 23 ++++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mm-arch-hooks.h b/arch/powerpc/include/asm/mm-arch-hooks.h index 63091a19de9f..f2a2da895897 100644 --- a/arch/powerpc/include/asm/mm-arch-hooks.h +++ b/arch/powerpc/include/asm/mm-arch-hooks.h @@ -12,4 +12,17 @@ #ifndef _ASM_POWERPC_MM_ARCH_HOOKS_H #define _ASM_POWERPC_MM_ARCH_HOOKS_H +static inline void arch_remap(struct mm_struct *mm, + unsigned long old_start, unsigned long old_end, + unsigned long new_start, unsigned long new_end) +{ + /* + * mremap() doesn't allow moving multiple vmas so we can limit the + * check to old_start == vdso_base. + */ + if (old_start == mm->context.vdso_base) + mm->context.vdso_base = new_start; +} +#define arch_remap arch_remap + #endif /* _ASM_POWERPC_MM_ARCH_HOOKS_H */ diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 3e5184210d9b..878c27771717 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -8,7 +8,6 @@ #include #include #include -#include #include /* @@ -127,5 +126,27 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, #endif } +static inline void arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) +{ +} + +static inline void arch_exit_mmap(struct mm_struct *mm) +{ +} + +static inline void arch_unmap(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + if (start <= mm->context.vdso_base && mm->context.vdso_base < end) + mm->context.vdso_base = 0; +} + +static inline void arch_bprm_mm_init(struct mm_struct *mm, + struct vm_area_struct *vma) +{ +} + #endif /* __KERNEL__ */ #endif /* __ASM_POWERPC_MMU_CONTEXT_H */ -- cgit v1.2.3 From a67a31fa308a9032ead31b0501dafdb44ccf5a12 Mon Sep 17 00:00:00 2001 From: Zhang Zhen Date: Wed, 24 Jun 2015 16:56:25 -0700 Subject: mm/hugetlb: reduce arch dependent code about hugetlb_prefault_arch_hook Currently we have many duplicates in definitions of hugetlb_prefault_arch_hook. In all architectures this function is empty. Signed-off-by: Zhang Zhen Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/hugetlb.h | 4 ---- arch/arm64/include/asm/hugetlb.h | 4 ---- arch/ia64/include/asm/hugetlb.h | 4 ---- arch/metag/include/asm/hugetlb.h | 4 ---- arch/mips/include/asm/hugetlb.h | 4 ---- arch/powerpc/include/asm/hugetlb.h | 5 ----- arch/s390/include/asm/hugetlb.h | 1 - arch/sh/include/asm/hugetlb.h | 3 --- arch/sparc/include/asm/hugetlb.h | 4 ---- arch/tile/include/asm/hugetlb.h | 4 ---- arch/x86/include/asm/hugetlb.h | 3 --- fs/hugetlbfs/inode.c | 1 - 12 files changed, 41 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h index 1f1b1cd112f3..31bb7dccb971 100644 --- a/arch/arm/include/asm/hugetlb.h +++ b/arch/arm/include/asm/hugetlb.h @@ -53,10 +53,6 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) -{ -} - static inline int huge_pte_none(pte_t pte) { return pte_none(pte); diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 5b7ca8ace95f..734c17e89e94 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -86,10 +86,6 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) -{ -} - static inline int huge_pte_none(pte_t pte) { return pte_none(pte); diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h index aa910054b8e7..ff1377bc02a6 100644 --- a/arch/ia64/include/asm/hugetlb.h +++ b/arch/ia64/include/asm/hugetlb.h @@ -20,10 +20,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE); } -static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) -{ -} - static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { diff --git a/arch/metag/include/asm/hugetlb.h b/arch/metag/include/asm/hugetlb.h index 471f481e67f3..f730b396d79b 100644 --- a/arch/metag/include/asm/hugetlb.h +++ b/arch/metag/include/asm/hugetlb.h @@ -14,10 +14,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, int prepare_hugepage_range(struct file *file, unsigned long addr, unsigned long len); -static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) -{ -} - static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h index fe0d15d32660..4a5bb5453408 100644 --- a/arch/mips/include/asm/hugetlb.h +++ b/arch/mips/include/asm/hugetlb.h @@ -38,10 +38,6 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) -{ -} - static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 1d53a65b4ec1..4bbd3c8c2888 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -112,11 +112,6 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) -{ -} - - static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 11eae5f55b70..dfb542ade6b1 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -35,7 +35,6 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -#define hugetlb_prefault_arch_hook(mm) do { } while (0) #define arch_clear_hugepage_flags(page) do { } while (0) int arch_prepare_hugepage(struct page *page); diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h index 699255d6d1c6..b788a9bc8918 100644 --- a/arch/sh/include/asm/hugetlb.h +++ b/arch/sh/include/asm/hugetlb.h @@ -26,9 +26,6 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) { -} - static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index e4cab465b81f..3130d7636312 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -11,10 +11,6 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); -static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) -{ -} - static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h index 3257733003f8..1abd00c55236 100644 --- a/arch/tile/include/asm/hugetlb.h +++ b/arch/tile/include/asm/hugetlb.h @@ -40,10 +40,6 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) -{ -} - static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index 68c05398bba9..dab7a3a750bf 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -26,9 +26,6 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) { -} - static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 87724c1d7be6..0cf74df68617 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -130,7 +130,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) goto out; ret = 0; - hugetlb_prefault_arch_hook(vma->vm_mm); if (vma->vm_flags & VM_WRITE && inode->i_size < len) inode->i_size = len; out: -- cgit v1.2.3 From 15a25b2ead5f97c5a63c169186e294b41ce03f9a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 24 Jun 2015 16:57:39 -0700 Subject: mm/thp: split out pmd collapse flush into separate functions Architectures like ppc64 [1] need to do special things while clearing pmd before a collapse. For them this operation is largely different from a normal hugepage pte clear. Hence add a separate function to clear pmd before collapse. After this patch pmdp_* functions operate only on hugepage pte, and not on regular pmd_t values pointing to page table. [1] ppc64 needs to invalidate all the normal page pte mappings we already have inserted in the hardware hash page table. But before doing that we need to make sure there are no parallel hash page table insert going on. So we need to do a kick_all_cpus_sync() before flushing the older hash table entries. By moving this to a separate function we capture these details and mention how it is different from a hugepage pte clear. This patch is a cleanup and only does code movement for clarity. There should not be any change in functionality. Signed-off-by: Aneesh Kumar K.V Acked-by: Kirill A. Shutemov Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Andrea Arcangeli Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/pgtable-ppc64.h | 4 ++ arch/powerpc/mm/pgtable_64.c | 76 +++++++++++++++++--------------- include/asm-generic/pgtable.h | 21 +++++++++ mm/huge_memory.c | 2 +- 4 files changed, 67 insertions(+), 36 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index f890f7ce1593..5f3a33784259 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -592,6 +592,10 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, extern void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); +extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); +#define pmdp_collapse_flush pmdp_collapse_flush + #define __HAVE_ARCH_PGTABLE_DEPOSIT extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 6bfadf1aa5cb..049d961802aa 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -560,41 +560,47 @@ pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address, pmd_t pmd; VM_BUG_ON(address & ~HPAGE_PMD_MASK); - if (pmd_trans_huge(*pmdp)) { - pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp); - } else { - /* - * khugepaged calls this for normal pmd - */ - pmd = *pmdp; - pmd_clear(pmdp); - /* - * Wait for all pending hash_page to finish. This is needed - * in case of subpage collapse. When we collapse normal pages - * to hugepage, we first clear the pmd, then invalidate all - * the PTE entries. The assumption here is that any low level - * page fault will see a none pmd and take the slow path that - * will wait on mmap_sem. But we could very well be in a - * hash_page with local ptep pointer value. Such a hash page - * can result in adding new HPTE entries for normal subpages. - * That means we could be modifying the page content as we - * copy them to a huge page. So wait for parallel hash_page - * to finish before invalidating HPTE entries. We can do this - * by sending an IPI to all the cpus and executing a dummy - * function there. - */ - kick_all_cpus_sync(); - /* - * Now invalidate the hpte entries in the range - * covered by pmd. This make sure we take a - * fault and will find the pmd as none, which will - * result in a major fault which takes mmap_sem and - * hence wait for collapse to complete. Without this - * the __collapse_huge_page_copy can result in copying - * the old content. - */ - flush_tlb_pmd_range(vma->vm_mm, &pmd, address); - } + VM_BUG_ON(!pmd_trans_huge(*pmdp)); + pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp); + return pmd; +} + +pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) +{ + pmd_t pmd; + + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + VM_BUG_ON(pmd_trans_huge(*pmdp)); + + pmd = *pmdp; + pmd_clear(pmdp); + /* + * Wait for all pending hash_page to finish. This is needed + * in case of subpage collapse. When we collapse normal pages + * to hugepage, we first clear the pmd, then invalidate all + * the PTE entries. The assumption here is that any low level + * page fault will see a none pmd and take the slow path that + * will wait on mmap_sem. But we could very well be in a + * hash_page with local ptep pointer value. Such a hash page + * can result in adding new HPTE entries for normal subpages. + * That means we could be modifying the page content as we + * copy them to a huge page. So wait for parallel hash_page + * to finish before invalidating HPTE entries. We can do this + * by sending an IPI to all the cpus and executing a dummy + * function there. + */ + kick_all_cpus_sync(); + /* + * Now invalidate the hpte entries in the range + * covered by pmd. This make sure we take a + * fault and will find the pmd as none, which will + * result in a major fault which takes mmap_sem and + * hence wait for collapse to complete. Without this + * the __collapse_huge_page_copy can result in copying + * the old content. + */ + flush_tlb_pmd_range(vma->vm_mm, &pmd, address); return pmd; } diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index bd910ceaccfa..d2c2171d1c32 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -189,6 +189,27 @@ extern void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); #endif +#ifndef pmdp_collapse_flush +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, + unsigned long address, + pmd_t *pmdp) +{ + return pmdp_clear_flush(vma, address, pmdp); +} +#define pmdp_collapse_flush pmdp_collapse_flush +#else +static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, + unsigned long address, + pmd_t *pmdp) +{ + BUILD_BUG(); + return *pmdp; +} +#define pmdp_collapse_flush pmdp_collapse_flush +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif + #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index b3d8cd8d6968..65dd8d67287b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2499,7 +2499,7 @@ static void collapse_huge_page(struct mm_struct *mm, * huge and small TLB entries for the same virtual address * to avoid the risk of CPU bugs in that area. */ - _pmd = pmdp_clear_flush(vma, address, pmd); + _pmd = pmdp_collapse_flush(vma, address, pmd); spin_unlock(pmd_ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); -- cgit v1.2.3 From f28b6ff8c3d48af21354ef30164c8ccea69d5928 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 24 Jun 2015 16:57:42 -0700 Subject: powerpc/mm: use generic version of pmdp_clear_flush() Also move the pmd_trans_huge check to generic code. Signed-off-by: Aneesh Kumar K.V Acked-by: Kirill A. Shutemov Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Andrea Arcangeli Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/pgtable-ppc64.h | 4 ---- arch/powerpc/mm/pgtable_64.c | 11 ----------- arch/s390/include/asm/pgtable.h | 8 ++++++++ include/asm-generic/pgtable.h | 9 ++------- mm/pgtable-generic.c | 17 +++++++++++++++++ 5 files changed, 27 insertions(+), 22 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 5f3a33784259..34ffbf52d078 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -573,10 +573,6 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, extern pmd_t pmdp_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); -#define __HAVE_ARCH_PMDP_CLEAR_FLUSH -extern pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp); - #define __HAVE_ARCH_PMDP_SET_WRPROTECT static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 049d961802aa..f7775193d745 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -554,17 +554,6 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, return old; } -pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp) -{ - pmd_t pmd; - - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - VM_BUG_ON(!pmd_trans_huge(*pmdp)); - pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp); - return pmd; -} - pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 0bb2da79adf3..7242dd386ab7 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1548,6 +1548,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, } } +static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, + unsigned long address, + pmd_t *pmdp) +{ + return pmdp_get_and_clear(vma->vm_mm, address, pmdp); +} +#define pmdp_collapse_flush pmdp_collapse_flush + #define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot)) #define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index d2c2171d1c32..f1108a72a672 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -191,13 +191,8 @@ extern void pmdp_splitting_flush(struct vm_area_struct *vma, #ifndef pmdp_collapse_flush #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, - unsigned long address, - pmd_t *pmdp) -{ - return pmdp_clear_flush(vma, address, pmdp); -} -#define pmdp_collapse_flush pmdp_collapse_flush +extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); #else static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index c25f94b33811..f21dc5fbc6cd 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -126,6 +126,7 @@ pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address, { pmd_t pmd; VM_BUG_ON(address & ~HPAGE_PMD_MASK); + VM_BUG_ON(!pmd_trans_huge(*pmdp)); pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp); flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; @@ -198,3 +199,19 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif + +#ifndef pmdp_collapse_flush +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) +{ + pmd_t pmd; + + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + VM_BUG_ON(pmd_trans_huge(*pmdp)); + pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp); + flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + return pmd; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif -- cgit v1.2.3 From 8809aa2d28d74111ff2f1928edaa4e9845c97a7d Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 24 Jun 2015 16:57:44 -0700 Subject: mm: clarify that the function operates on hugepage pte We have confusing functions to clear pmd, pmd_clear_* and pmd_clear. Add _huge_ to pmdp_clear functions so that we are clear that they operate on hugepage pte. We don't bother about other functions like pmdp_set_wrprotect, pmdp_clear_flush_young, because they operate on PTE bits and hence indicate they are operating on hugepage ptes Signed-off-by: Aneesh Kumar K.V Acked-by: Kirill A. Shutemov Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Andrea Arcangeli Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/include/asm/pgtable.h | 8 ++++---- arch/powerpc/include/asm/pgtable-ppc64.h | 6 +++--- arch/powerpc/mm/pgtable_64.c | 4 ++-- arch/s390/include/asm/pgtable.h | 24 ++++++++++++------------ arch/sparc/include/asm/pgtable_64.h | 8 ++++---- arch/tile/include/asm/pgtable.h | 8 ++++---- arch/x86/include/asm/pgtable.h | 4 ++-- include/asm-generic/pgtable.h | 18 +++++++++--------- include/linux/mmu_notifier.h | 12 ++++++------ mm/huge_memory.c | 16 ++++++++-------- mm/migrate.c | 2 +- mm/pgtable-generic.c | 14 +++++++++----- mm/rmap.c | 2 +- 13 files changed, 65 insertions(+), 61 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 819af9d057a8..9d8106758142 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -568,12 +568,12 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd) } /* - * The generic version pmdp_get_and_clear uses a version of pmd_clear() with a + * The generic version pmdp_huge_get_and_clear uses a version of pmd_clear() with a * different prototype. */ -#define __HAVE_ARCH_PMDP_GET_AND_CLEAR -static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR +static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) { pmd_t old = *pmdp; diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 34ffbf52d078..3bb7488bd24b 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -569,9 +569,9 @@ extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, extern int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); -#define __HAVE_ARCH_PMDP_GET_AND_CLEAR -extern pmd_t pmdp_get_and_clear(struct mm_struct *mm, - unsigned long addr, pmd_t *pmdp); +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR +extern pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp); #define __HAVE_ARCH_PMDP_SET_WRPROTECT static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index f7775193d745..876232d64126 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -812,8 +812,8 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, return; } -pmd_t pmdp_get_and_clear(struct mm_struct *mm, - unsigned long addr, pmd_t *pmdp) +pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) { pmd_t old_pmd; pgtable_t pgtable; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7242dd386ab7..f66d82798a6a 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1498,9 +1498,9 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, return pmd_young(pmd); } -#define __HAVE_ARCH_PMDP_GET_AND_CLEAR -static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR +static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) { pmd_t pmd = *pmdp; @@ -1509,10 +1509,10 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, return pmd; } -#define __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL -static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm, - unsigned long address, - pmd_t *pmdp, int full) +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL +static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, + unsigned long address, + pmd_t *pmdp, int full) { pmd_t pmd = *pmdp; @@ -1522,11 +1522,11 @@ static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm, return pmd; } -#define __HAVE_ARCH_PMDP_CLEAR_FLUSH -static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) +#define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH +static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) { - return pmdp_get_and_clear(vma->vm_mm, address, pmdp); + return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); } #define __HAVE_ARCH_PMDP_INVALIDATE @@ -1552,7 +1552,7 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - return pmdp_get_and_clear(vma->vm_mm, address, pmdp); + return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); } #define pmdp_collapse_flush pmdp_collapse_flush diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 2a52c91d2c8a..131d36fcd07a 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -865,10 +865,10 @@ static inline unsigned long pud_pfn(pud_t pud) void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t orig, int fullmm); -#define __HAVE_ARCH_PMDP_GET_AND_CLEAR -static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, - unsigned long addr, - pmd_t *pmdp) +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR +static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long addr, + pmd_t *pmdp) { pmd_t pmd = *pmdp; set_pmd_at(mm, addr, pmdp, __pmd(0UL)); diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h index 95a4f19d16c5..2b05ccbebed9 100644 --- a/arch/tile/include/asm/pgtable.h +++ b/arch/tile/include/asm/pgtable.h @@ -414,10 +414,10 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, } -#define __HAVE_ARCH_PMDP_GET_AND_CLEAR -static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, - unsigned long address, - pmd_t *pmdp) +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR +static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long address, + pmd_t *pmdp) { return pte_pmd(ptep_get_and_clear(mm, address, pmdp_ptep(pmdp))); } diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 2562e303405b..867da5bbb4a3 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -805,8 +805,8 @@ static inline int pmd_write(pmd_t pmd) return pmd_flags(pmd) & _PAGE_RW; } -#define __HAVE_ARCH_PMDP_GET_AND_CLEAR -static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, unsigned long addr, +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR +static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { pmd_t pmd = native_pmdp_get_and_clear(pmdp); diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index f1108a72a672..29c57b2cb344 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -96,11 +96,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, } #endif -#ifndef __HAVE_ARCH_PMDP_GET_AND_CLEAR +#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, - unsigned long address, - pmd_t *pmdp) +static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long address, + pmd_t *pmdp) { pmd_t pmd = *pmdp; pmd_clear(pmdp); @@ -109,13 +109,13 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif -#ifndef __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL +#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm, +static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, unsigned long address, pmd_t *pmdp, int full) { - return pmdp_get_and_clear(mm, address, pmdp); + return pmdp_huge_get_and_clear(mm, address, pmdp); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif @@ -152,8 +152,8 @@ extern pte_t ptep_clear_flush(struct vm_area_struct *vma, pte_t *ptep); #endif -#ifndef __HAVE_ARCH_PMDP_CLEAR_FLUSH -extern pmd_t pmdp_clear_flush(struct vm_area_struct *vma, +#ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH +extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); #endif diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 95243d28a0ee..61cd67f4d788 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -324,25 +324,25 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) ___pte; \ }) -#define pmdp_clear_flush_notify(__vma, __haddr, __pmd) \ +#define pmdp_huge_clear_flush_notify(__vma, __haddr, __pmd) \ ({ \ unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ struct mm_struct *___mm = (__vma)->vm_mm; \ pmd_t ___pmd; \ \ - ___pmd = pmdp_clear_flush(__vma, __haddr, __pmd); \ + ___pmd = pmdp_huge_clear_flush(__vma, __haddr, __pmd); \ mmu_notifier_invalidate_range(___mm, ___haddr, \ ___haddr + HPAGE_PMD_SIZE); \ \ ___pmd; \ }) -#define pmdp_get_and_clear_notify(__mm, __haddr, __pmd) \ +#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \ ({ \ unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ pmd_t ___pmd; \ \ - ___pmd = pmdp_get_and_clear(__mm, __haddr, __pmd); \ + ___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd); \ mmu_notifier_invalidate_range(__mm, ___haddr, \ ___haddr + HPAGE_PMD_SIZE); \ \ @@ -428,8 +428,8 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) #define ptep_clear_flush_young_notify ptep_clear_flush_young #define pmdp_clear_flush_young_notify pmdp_clear_flush_young #define ptep_clear_flush_notify ptep_clear_flush -#define pmdp_clear_flush_notify pmdp_clear_flush -#define pmdp_get_and_clear_notify pmdp_get_and_clear +#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush +#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear #define set_pte_at_notify set_pte_at #endif /* CONFIG_MMU_NOTIFIER */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 65dd8d67287b..c107094f79ba 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1031,7 +1031,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, goto out_free_pages; VM_BUG_ON_PAGE(!PageHead(page), page); - pmdp_clear_flush_notify(vma, haddr, pmd); + pmdp_huge_clear_flush_notify(vma, haddr, pmd); /* leave pmd empty until pte is filled */ pgtable = pgtable_trans_huge_withdraw(mm, pmd); @@ -1174,7 +1174,7 @@ alloc: pmd_t entry; entry = mk_huge_pmd(new_page, vma->vm_page_prot); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); - pmdp_clear_flush_notify(vma, haddr, pmd); + pmdp_huge_clear_flush_notify(vma, haddr, pmd); page_add_new_anon_rmap(new_page, vma, haddr); mem_cgroup_commit_charge(new_page, memcg, false); lru_cache_add_active_or_unevictable(new_page, vma); @@ -1396,12 +1396,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t orig_pmd; /* * For architectures like ppc64 we look at deposited pgtable - * when calling pmdp_get_and_clear. So do the + * when calling pmdp_huge_get_and_clear. So do the * pgtable_trans_huge_withdraw after finishing pmdp related * operations. */ - orig_pmd = pmdp_get_and_clear_full(tlb->mm, addr, pmd, - tlb->fullmm); + orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd, + tlb->fullmm); tlb_remove_pmd_tlb_entry(tlb, pmd, addr); pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd); if (is_huge_zero_pmd(orig_pmd)) { @@ -1459,7 +1459,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, new_ptl = pmd_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); - pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); + pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd); VM_BUG_ON(!pmd_none(*new_pmd)); if (pmd_move_must_withdraw(new_ptl, old_ptl)) { @@ -1505,7 +1505,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, } if (!prot_numa || !pmd_protnone(*pmd)) { - entry = pmdp_get_and_clear_notify(mm, addr, pmd); + entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd); entry = pmd_modify(entry, newprot); if (preserve_write) entry = pmd_mkwrite(entry); @@ -2863,7 +2863,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, pmd_t _pmd; int i; - pmdp_clear_flush_notify(vma, haddr, pmd); + pmdp_huge_clear_flush_notify(vma, haddr, pmd); /* leave pmd empty until pte is filled */ pgtable = pgtable_trans_huge_withdraw(mm, pmd); diff --git a/mm/migrate.c b/mm/migrate.c index d4fe1f94120b..ee401e4e5ef1 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1799,7 +1799,7 @@ fail_putback: */ flush_cache_range(vma, mmun_start, mmun_end); page_add_anon_rmap(new_page, vma, mmun_start); - pmdp_clear_flush_notify(vma, mmun_start, pmd); + pmdp_huge_clear_flush_notify(vma, mmun_start, pmd); set_pmd_at(mm, mmun_start, pmd, entry); flush_tlb_range(vma, mmun_start, mmun_end); update_mmu_cache_pmd(vma, address, &entry); diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index f21dc5fbc6cd..6b674e00153c 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -119,15 +119,15 @@ pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, } #endif -#ifndef __HAVE_ARCH_PMDP_CLEAR_FLUSH +#ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH #ifdef CONFIG_TRANSPARENT_HUGEPAGE -pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp) +pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) { pmd_t pmd; VM_BUG_ON(address & ~HPAGE_PMD_MASK); VM_BUG_ON(!pmd_trans_huge(*pmdp)); - pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp); + pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; } @@ -205,11 +205,15 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { + /* + * pmd and hugepage pte format are same. So we could + * use the same function. + */ pmd_t pmd; VM_BUG_ON(address & ~HPAGE_PMD_MASK); VM_BUG_ON(pmd_trans_huge(*pmdp)); - pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp); + pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; } diff --git a/mm/rmap.c b/mm/rmap.c index 9f47f152b01e..7af1ecb21ccb 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -625,7 +625,7 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) pmd = pmd_offset(pud, address); /* - * Some THP functions use the sequence pmdp_clear_flush(), set_pmd_at() + * Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at() * without holding anon_vma lock for write. So when looking for a * genuine pmde (in which to find pte), test present and !THP together. */ -- cgit v1.2.3 From 08bd4fc15683b9a26b9be7048d151f4ddadad96f Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 25 Jun 2015 14:59:44 -0700 Subject: mm/hugetlb: remove arch_prepare/release_hugepage from arch headers Nobody used these hooks so they were removed from common code, and can now be removed from the architectures. Signed-off-by: Dominik Dingel Acked-by: Martin Schwidefsky Acked-by: Ralf Baechle Cc: Heiko Carstens Cc: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/hugetlb.h | 9 --------- arch/arm64/include/asm/hugetlb.h | 9 --------- arch/ia64/include/asm/hugetlb.h | 9 --------- arch/metag/include/asm/hugetlb.h | 9 --------- arch/mips/include/asm/hugetlb.h | 9 --------- arch/powerpc/include/asm/hugetlb.h | 9 --------- arch/sh/include/asm/hugetlb.h | 9 --------- arch/sparc/include/asm/hugetlb.h | 9 --------- arch/tile/include/asm/hugetlb.h | 9 --------- arch/x86/include/asm/hugetlb.h | 9 --------- 10 files changed, 90 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h index 31bb7dccb971..7d26f6c4f0f5 100644 --- a/arch/arm/include/asm/hugetlb.h +++ b/arch/arm/include/asm/hugetlb.h @@ -63,15 +63,6 @@ static inline pte_t huge_pte_wrprotect(pte_t pte) return pte_wrprotect(pte); } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_dcache_clean, &page->flags); diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 734c17e89e94..2fd9b14ca295 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -96,15 +96,6 @@ static inline pte_t huge_pte_wrprotect(pte_t pte) return pte_wrprotect(pte); } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_dcache_clean, &page->flags); diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h index ff1377bc02a6..ef65f026b11e 100644 --- a/arch/ia64/include/asm/hugetlb.h +++ b/arch/ia64/include/asm/hugetlb.h @@ -65,15 +65,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { } diff --git a/arch/metag/include/asm/hugetlb.h b/arch/metag/include/asm/hugetlb.h index f730b396d79b..905ed422dbeb 100644 --- a/arch/metag/include/asm/hugetlb.h +++ b/arch/metag/include/asm/hugetlb.h @@ -67,15 +67,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { } diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h index 4a5bb5453408..982bc0685330 100644 --- a/arch/mips/include/asm/hugetlb.h +++ b/arch/mips/include/asm/hugetlb.h @@ -110,15 +110,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { } diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 4bbd3c8c2888..7eac89b9f02e 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -168,15 +168,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { } diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h index b788a9bc8918..ef489a56fcce 100644 --- a/arch/sh/include/asm/hugetlb.h +++ b/arch/sh/include/asm/hugetlb.h @@ -79,15 +79,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_dcache_clean, &page->flags); diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index 3130d7636312..139e711ff80c 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -78,15 +78,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { } diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h index 1abd00c55236..2fac5be4de26 100644 --- a/arch/tile/include/asm/hugetlb.h +++ b/arch/tile/include/asm/hugetlb.h @@ -94,15 +94,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { } diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index dab7a3a750bf..f8a29d2c97b0 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -80,15 +80,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { } -- cgit v1.2.3 From d8ea782b56d9d2c46a47b3231cfd16ecfb538c60 Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Mon, 29 Jun 2015 10:47:55 +0530 Subject: powerpc/powernv: Fix vma page prot flags in opal-prd driver opal-prd driver will mmap() firmware code/data area as private mapping to prd user space daemon. Write to this page will trigger COW faults. The new COW pages are normal kernel RAM pages accounted by the kernel and are not special. vma->vm_page_prot value will be used at page fault time for the new COW pages, while pgprot_t value passed in remap_pfn_range() is used for the initial page table entry. Hence: * Do not add _PAGE_SPECIAL in vma, but only for remap_pfn_range() * Also remap_pfn_range() will add the _PAGE_SPECIAL flag using pte_mkspecial() call, hence no need to specify in the driver This fix resolves the page accounting warning shown below: BUG: Bad rss-counter state mm:c0000007d34ac600 idx:1 val:19 The above warning is triggered since _PAGE_SPECIAL was incorrectly being set for the normal kernel COW pages. Signed-off-by: Vaidyanathan Srinivasan Acked-by: Jeremy Kerr Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-prd.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index 46cb3feb0a13..4ece8e40dd54 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -112,6 +112,7 @@ static int opal_prd_open(struct inode *inode, struct file *file) static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) { size_t addr, size; + pgprot_t page_prot; int rc; pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n", @@ -125,13 +126,11 @@ static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) if (!opal_prd_range_is_valid(addr, size)) return -EINVAL; - vma->vm_page_prot = __pgprot(pgprot_val(phys_mem_access_prot(file, - vma->vm_pgoff, - size, vma->vm_page_prot)) - | _PAGE_SPECIAL); + page_prot = phys_mem_access_prot(file, vma->vm_pgoff, + size, vma->vm_page_prot); rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, - vma->vm_page_prot); + page_prot); return rc; } -- cgit v1.2.3 From 27ea2c420cad57386c25668cb9a9f0f082635586 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Mon, 15 Jun 2015 13:25:19 +1000 Subject: powerpc: Set the correct kernel taint on machine check errors. This means the 'M' flag will work properly when the kernel prints a backtrace. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6530f1b8874d..37de90f8a845 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -297,6 +297,8 @@ long machine_check_early(struct pt_regs *regs) __this_cpu_inc(irq_stat.mce_exceptions); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); return handled; -- cgit v1.2.3 From eab861a7a52208868637f47a92caa926ddadd9c7 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 2 Jul 2015 14:56:20 +1000 Subject: powerpc: Add plain English description for alignment exception oopses If we take an alignment exception which we cannot fix, the oops currently prints: Unable to handle kernel paging request for unknown fault Lets print something more useful: Unable to handle kernel paging request for unaligned access at address 0xc0000000f77bba8f Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/mm/fault.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 6d535973b200..a67c6d781c52 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -529,6 +529,10 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) printk(KERN_ALERT "Unable to handle kernel paging request for " "instruction fetch\n"); break; + case 0x600: + printk(KERN_ALERT "Unable to handle kernel paging request for " + "unaligned access at address 0x%08lx\n", regs->dar); + break; default: printk(KERN_ALERT "Unable to handle kernel paging request for " "unknown fault\n"); -- cgit v1.2.3 From aaf6fd5c75eb4aa734ec2062deab371633c3655f Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Mon, 6 Jul 2015 09:40:34 +1000 Subject: powerpc/ppc4xx_hsta_msi: Include ppc-pci.h to fix reference to hose_list An earlier commit referenced 'hose_list' in sysdev/ppc4xx_hsta_msi.c. hose_list is defined in ppc-pci.h, which was not included in that file. Include it, fixing the build for the akebono defconfig used by the kbuild test robot. Fixes: f2c800aaceb6 ("powerpc/ppc4xx_hsta_msi: Move MSI-related ops to pci_controller_ops") Reported-by: kbuild test robot Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/ppc4xx_hsta_msi.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c index 2bc33674ebfc..87f9623ca805 100644 --- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c @@ -18,6 +18,7 @@ #include #include #include +#include struct ppc4xx_hsta_msi { struct device *dev; -- cgit v1.2.3 From a8956a7b7232da5f4ce4a305c72a54cc5e4a8307 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 3 Jul 2015 17:39:12 +1000 Subject: powerpc/powernv: Fix opal-elog interrupt handler The conversion of opal events to a proper irqchip means that handlers are called until the relevant opal event has been cleared by processing it. Events that queue work should therefore use a threaded handler to mask the event until processing is complete. Signed-off-by: Alistair Popple Tested-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-elog.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 4949ef0d9400..37f959bf392e 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -237,7 +237,7 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) return elog; } -static void elog_work_fn(struct work_struct *work) +static irqreturn_t elog_event(int irq, void *data) { __be64 size; __be64 id; @@ -251,7 +251,7 @@ static void elog_work_fn(struct work_struct *work) rc = opal_get_elog_size(&id, &size, &type); if (rc != OPAL_SUCCESS) { pr_err("ELOG: OPAL log info read failed\n"); - return; + return IRQ_HANDLED; } elog_size = be64_to_cpu(size); @@ -270,16 +270,10 @@ static void elog_work_fn(struct work_struct *work) * entries. */ if (kset_find_obj(elog_kset, name)) - return; + return IRQ_HANDLED; create_elog_obj(log_id, elog_size, elog_type); -} - -static DECLARE_WORK(elog_work, elog_work_fn); -static irqreturn_t elog_event(int irq, void *data) -{ - schedule_work(&elog_work); return IRQ_HANDLED; } @@ -304,8 +298,8 @@ int __init opal_elog_init(void) return irq; } - rc = request_irq(irq, elog_event, - IRQ_TYPE_LEVEL_HIGH, "opal-elog", NULL); + rc = request_threaded_irq(irq, NULL, elog_event, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, "opal-elog", NULL); if (rc) { pr_err("%s: Can't request OPAL event irq (%d)\n", __func__, rc); -- cgit v1.2.3 From b32aadc1a8ed84afbe924cd2ced31cd6a2e67074 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Tue, 7 Jul 2015 01:39:23 +0530 Subject: powerpc/powernv: Fix race in updating core_idle_state core_idle_state is maintained for each core. It uses 0-7 bits to track whether a thread in the core has entered fastsleep or winkle. 8th bit is used as a lock bit. The lock bit is set in these 2 scenarios- - The thread is first in subcore to wakeup from sleep/winkle. - If its the last thread in the core about to enter sleep/winkle While the lock bit is set, if any other thread in the core wakes up, it loops until the lock bit is cleared before proceeding in the wakeup path. This helps prevent race conditions w.r.t fastsleep workaround and prevents threads from switching to process context before core/subcore resources are restored. But, in the path to sleep/winkle entry, we currently don't check for lock-bit. This exposes us to following race when running with subcore on- First thread in the subcorea Another thread in the same waking up core entering sleep/winkle lwarx r15,0,r14 ori r15,r15,PNV_CORE_IDLE_LOCK_BIT stwcx. r15,0,r14 [Code to restore subcore state] lwarx r15,0,r14 [clear thread bit] stwcx. r15,0,r14 andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS stw r15,0(r14) Here, after the thread entering sleep clears its thread bit in core_idle_state, the value is overwritten by the thread waking up. In such cases when the core enters fastsleep, code mistakes an idle thread as running. Because of this, the first thread waking up from fastsleep which is supposed to resync timebase skips it. So we can end up having a core with stale timebase value. This patch fixes the above race by looping on the lock bit even while entering the idle states. Signed-off-by: Shreyas B. Prabhu Fixes: 7b54e9f213f76 'powernv/powerpc: Add winkle support for offline cpus' Cc: stable@vger.kernel.org # 3.19+ Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_power7.S | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index ccde8f084ce4..112ccf497562 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -51,6 +51,22 @@ .text +/* + * Used by threads when the lock bit of core_idle_state is set. + * Threads will spin in HMT_LOW until the lock bit is cleared. + * r14 - pointer to core_idle_state + * r15 - used to load contents of core_idle_state + */ + +core_idle_lock_held: + HMT_LOW +3: lwz r15,0(r14) + andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT + bne 3b + HMT_MEDIUM + lwarx r15,0,r14 + blr + /* * Pass requested state in r3: * r3 - PNV_THREAD_NAP/SLEEP/WINKLE @@ -150,6 +166,10 @@ power7_enter_nap_mode: ld r14,PACA_CORE_IDLE_STATE_PTR(r13) lwarx_loop1: lwarx r15,0,r14 + + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bnel core_idle_lock_held + andc r15,r15,r7 /* Clear thread bit */ andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS @@ -294,7 +314,7 @@ lwarx_loop2: * workaround undo code or resyncing timebase or restoring context * In either case loop until the lock bit is cleared. */ - bne core_idle_lock_held + bnel core_idle_lock_held cmpwi cr2,r15,0 lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) @@ -319,15 +339,6 @@ lwarx_loop2: isync b common_exit -core_idle_lock_held: - HMT_LOW -core_idle_lock_loop: - lwz r15,0(14) - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bne core_idle_lock_loop - HMT_MEDIUM - b lwarx_loop2 - first_thread_in_subcore: /* First thread in subcore to wakeup */ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT -- cgit v1.2.3 From 442053e57a4fc58b719b6ceab60f29ef9cf4404c Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 7 Jul 2015 12:21:10 -0400 Subject: powerpc/perf/24x7: Fix lockdep warning The sysfs attributes for the 24x7 counters are dynamically allocated. Initialize the attributes using sysfs_attr_init() to fix following warning which occurs when CONFIG_DEBUG_LOCK_VMALLOC=y. [ 0.346249] audit: initializing netlink subsys (disabled) [ 0.346284] audit: type=2000 audit(1436295254.340:1): initialized [ 0.346489] BUG: key c0000000efe90198 not in .data! [ 0.346491] DEBUG_LOCKS_WARN_ON(1) [ 0.346502] ------------[ cut here ]------------ [ 0.346504] WARNING: at ../kernel/locking/lockdep.c:3002 [ 0.346506] Modules linked in: Reported-by: Gustavo Luiz Duarte Signed-off-by: Sukadev Bhattiprolu Tested-by: Gustavo Luiz Duarte Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-24x7.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index ec2eb20631d1..df956295c2a7 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -320,6 +320,8 @@ static struct attribute *device_str_attr_create_(char *name, char *str) if (!attr) return NULL; + sysfs_attr_init(&attr->attr.attr); + attr->var = str; attr->attr.attr.name = name; attr->attr.attr.mode = 0444; -- cgit v1.2.3 From 3ba3a73e9f204ce7904cae7d14be399b467c4fef Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 20 Jul 2015 20:45:51 +1000 Subject: powerpc/powernv/ioda2: Fix calculation for memory allocated for TCE table The existing code stores the amount of memory allocated for a TCE table. At the moment it uses @offset which is a virtual offset in the TCE table which is only correct for a one level tables and it does not include memory allocated for intermediate levels. When multilevel TCE table is requested, WARN_ON in tce_iommu_create_table() prints a warning. This adds an additional counter to pnv_pci_ioda2_table_do_alloc_pages() to count actually allocated memory. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 5738d315248b..85cbc96eff6c 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2220,7 +2220,7 @@ static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb) static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift, unsigned levels, unsigned long limit, - unsigned long *current_offset) + unsigned long *current_offset, unsigned long *total_allocated) { struct page *tce_mem = NULL; __be64 *addr, *tmp; @@ -2236,6 +2236,7 @@ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift, } addr = page_address(tce_mem); memset(addr, 0, allocated); + *total_allocated += allocated; --levels; if (!levels) { @@ -2245,7 +2246,7 @@ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift, for (i = 0; i < entries; ++i) { tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift, - levels, limit, current_offset); + levels, limit, current_offset, total_allocated); if (!tmp) break; @@ -2267,7 +2268,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, struct iommu_table *tbl) { void *addr; - unsigned long offset = 0, level_shift; + unsigned long offset = 0, level_shift, total_allocated = 0; const unsigned window_shift = ilog2(window_size); unsigned entries_shift = window_shift - page_shift; unsigned table_shift = max_t(unsigned, entries_shift + 3, PAGE_SHIFT); @@ -2286,7 +2287,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, /* Allocate TCE table */ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, - levels, tce_table_size, &offset); + levels, tce_table_size, &offset, &total_allocated); /* addr==NULL means that the first level allocation failed */ if (!addr) @@ -2308,7 +2309,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, page_shift); tbl->it_level_size = 1ULL << (level_shift - 3); tbl->it_indirect_levels = levels - 1; - tbl->it_allocated_size = offset; + tbl->it_allocated_size = total_allocated; pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n", window_size, tce_table_size, bus_offset); -- cgit v1.2.3 From b8d65e9662b1ffb3b52a65fd11b0b968022dc6a1 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Thu, 30 Jul 2015 16:53:54 +1000 Subject: powerpc/eeh-powernv: Fix unbalanced IRQ warning pnv_eeh_next_error() re-enables the eeh opal event interrupt but it gets called from a loop if there are more outstanding events to process, resulting in a warning due to enabling an already enabled interrupt. Instead the interrupt should only be re-enabled once the last outstanding event has been processed. Tested-by: Daniel Axtens Reported-by: Daniel Axtens Signed-off-by: Alistair Popple Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/eeh-powernv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 5cf5e6ea213b..7cf0df859d05 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1478,7 +1478,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) } /* Unmask the event */ - if (eeh_enabled()) + if (ret == EEH_NEXT_ERR_NONE && eeh_enabled()) enable_irq(eeh_event_irq); return ret; -- cgit v1.2.3 From 3c00cb5e68dc719f2fc73a33b1b230aadfcb1309 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 6 Aug 2015 15:46:26 -0700 Subject: signal: fix information leak in copy_siginfo_from_user32 This function can leak kernel stack data when the user siginfo_t has a positive si_code value. The top 16 bits of si_code descibe which fields in the siginfo_t union are active, but they are treated inconsistently between copy_siginfo_from_user32, copy_siginfo_to_user32 and copy_siginfo_to_user. copy_siginfo_from_user32 is called from rt_sigqueueinfo and rt_tgsigqueueinfo in which the user has full control overthe top 16 bits of si_code. This fixes the following information leaks: x86: 8 bytes leaked when sending a signal from a 32-bit process to itself. This leak grows to 16 bytes if the process uses x32. (si_code = __SI_CHLD) x86: 100 bytes leaked when sending a signal from a 32-bit process to a 64-bit process. (si_code = -1) sparc: 4 bytes leaked when sending a signal from a 32-bit process to a 64-bit process. (si_code = any) parsic and s390 have similar bugs, but they are not vulnerable because rt_[tg]sigqueueinfo have checks that prevent sending a positive si_code to a different process. These bugs are also fixed for consistency. Signed-off-by: Amanieu d'Antras Cc: Oleg Nesterov Cc: Ingo Molnar Cc: Russell King Cc: Ralf Baechle Cc: Benjamin Herrenschmidt Cc: Chris Metcalf Cc: Paul Mackerras Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/kernel/signal32.c | 2 -- arch/mips/kernel/signal32.c | 2 -- arch/powerpc/kernel/signal_32.c | 2 -- arch/tile/kernel/compat_signal.c | 2 -- kernel/signal.c | 4 ++-- 5 files changed, 2 insertions(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 1670f15ef69e..81fd38f4fbaa 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -201,8 +201,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, __ARCH_SI_PREAMBLE_SIZE) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE)) diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index 19a7705f2a01..5d7f2634996f 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -409,8 +409,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, 3*sizeof(int)) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE32)) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index d3a831ac0f92..da50e0c9c57e 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -966,8 +966,6 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s) int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) { - memset(to, 0, sizeof *to); - if (copy_from_user(to, from, 3*sizeof(int)) || copy_from_user(to->_sifields._pad, from->_sifields._pad, SI_PAD_SIZE32)) diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index e8c2c04143cd..c667e104a0c2 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -113,8 +113,6 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo))) return -EFAULT; - memset(to, 0, sizeof(*to)); - err = __get_user(to->si_signo, &from->si_signo); err |= __get_user(to->si_errno, &from->si_errno); err |= __get_user(to->si_code, &from->si_code); diff --git a/kernel/signal.c b/kernel/signal.c index 836df8dac6cc..00524cf6c412 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3017,7 +3017,7 @@ COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo, int, sig, struct compat_siginfo __user *, uinfo) { - siginfo_t info; + siginfo_t info = {}; int ret = copy_siginfo_from_user32(&info, uinfo); if (unlikely(ret)) return ret; @@ -3061,7 +3061,7 @@ COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo, int, sig, struct compat_siginfo __user *, uinfo) { - siginfo_t info; + siginfo_t info = {}; if (copy_siginfo_from_user32(&info, uinfo)) return -EFAULT; -- cgit v1.2.3