diff options
Diffstat (limited to 'arch')
339 files changed, 6391 insertions, 4618 deletions
diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 644815c0516e..c64c505d966c 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -102,11 +102,5 @@ boot_targets += uImage uImage.bin uImage.gz $(boot_targets): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -%.dtb %.dtb.S %.dtb.o: scripts - $(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@ - -dtbs: scripts - $(Q)$(MAKE) $(build)=$(boot)/dts - archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 5c91e0093ee8..05a91d8b89f3 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -303,12 +303,7 @@ else KBUILD_IMAGE := $(boot)/zImage endif -# Build the DT binary blobs if we have OF configured -ifeq ($(CONFIG_USE_OF),y) -KBUILD_DTBS := dtbs -endif - -all: $(notdir $(KBUILD_IMAGE)) $(KBUILD_DTBS) +all: $(notdir $(KBUILD_IMAGE)) archheaders: @@ -335,17 +330,6 @@ $(BOOT_TARGETS): vmlinux $(INSTALL_TARGETS): $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@ -%.dtb: | scripts - $(Q)$(MAKE) $(build)=$(boot)/dts MACHINE=$(MACHINE) $(boot)/dts/$@ - -PHONY += dtbs dtbs_install - -dtbs: prepare scripts - $(Q)$(MAKE) $(build)=$(boot)/dts - -dtbs_install: - $(Q)$(MAKE) $(dtbinst)=$(boot)/dts - PHONY += vdso_install vdso_install: ifeq ($(CONFIG_VDSO),y) @@ -367,8 +351,6 @@ define archhelp echo ' uImage - U-Boot wrapped zImage' echo ' bootpImage - Combined zImage and initial RAM disk' echo ' (supply initrd image via make variable INITRD=<path>)' - echo '* dtbs - Build device tree blobs for enabled boards' - echo ' dtbs_install - Install dtbs to $(INSTALL_DTBS_PATH)' echo ' install - Install uncompressed kernel' echo ' zinstall - Install compressed kernel' echo ' uinstall - Install U-Boot wrapped compressed kernel' diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h index 07437816e098..b36c0289a308 100644 --- a/arch/arm/boot/compressed/libfdt_env.h +++ b/arch/arm/boot/compressed/libfdt_env.h @@ -6,6 +6,8 @@ #include <linux/string.h> #include <asm/byteorder.h> +#define INT_MAX ((int)(~0U>>1)) + typedef __be16 fdt16_t; typedef __be32 fdt32_t; typedef __be64 fdt64_t; diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index ecaa68dd1af5..13bcd3b867cb 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -87,14 +87,11 @@ void __init arm_dt_init_cpu_maps(void) if (!cpus) return; - for_each_child_of_node(cpus, cpu) { + for_each_of_cpu_node(cpu) { const __be32 *cell; int prop_bytes; u32 hwid; - if (of_node_cmp(cpu->type, "cpu")) - continue; - pr_debug(" * %pOF...\n", cpu); /* * A device tree containing CPU nodes with missing "reg" diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 24ac3cab411d..60e375ce1ab2 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -94,12 +94,6 @@ static void __init parse_dt_topology(void) __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity), GFP_NOWAIT); - cn = of_find_node_by_path("/cpus"); - if (!cn) { - pr_err("No CPU information found in DT\n"); - return; - } - for_each_possible_cpu(cpu) { const u32 *rate; int len; diff --git a/arch/arm/mach-shmobile/pm-rcar-gen2.c b/arch/arm/mach-shmobile/pm-rcar-gen2.c index 345af3ebcc3a..7efe95bd584f 100644 --- a/arch/arm/mach-shmobile/pm-rcar-gen2.c +++ b/arch/arm/mach-shmobile/pm-rcar-gen2.c @@ -50,7 +50,7 @@ void __init rcar_gen2_pm_init(void) void __iomem *p; u32 bar; static int once; - struct device_node *np, *cpus; + struct device_node *np; bool has_a7 = false; bool has_a15 = false; struct resource res; @@ -59,11 +59,7 @@ void __init rcar_gen2_pm_init(void) if (once++) return; - cpus = of_find_node_by_path("/cpus"); - if (!cpus) - return; - - for_each_child_of_node(cpus, np) { + for_each_of_cpu_node(np) { if (of_device_is_compatible(np, "arm,cortex-a15")) has_a15 = true; else if (of_device_is_compatible(np, "arm,cortex-a7")) diff --git a/arch/arm/mach-shmobile/pm-rmobile.c b/arch/arm/mach-shmobile/pm-rmobile.c index e348bcfe389d..94fdeef11b81 100644 --- a/arch/arm/mach-shmobile/pm-rmobile.c +++ b/arch/arm/mach-shmobile/pm-rmobile.c @@ -202,7 +202,7 @@ static void __init get_special_pds(void) const struct of_device_id *id; /* PM domains containing CPUs */ - for_each_node_by_type(np, "cpu") + for_each_of_cpu_node(np) add_special_pd(np, PD_CPU); /* PM domain containing console */ diff --git a/arch/arm/mach-shmobile/timer.c b/arch/arm/mach-shmobile/timer.c index 828e8aea037e..e48b0939693f 100644 --- a/arch/arm/mach-shmobile/timer.c +++ b/arch/arm/mach-shmobile/timer.c @@ -22,22 +22,16 @@ void __init shmobile_init_delay(void) { - struct device_node *np, *cpus; + struct device_node *np; u32 max_freq = 0; - cpus = of_find_node_by_path("/cpus"); - if (!cpus) - return; - - for_each_child_of_node(cpus, np) { + for_each_of_cpu_node(np) { u32 freq; if (!of_property_read_u32(np, "clock-frequency", &freq)) max_freq = max(max_freq, freq); } - of_node_put(cpus); - if (!max_freq) return; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c03cd0d765d3..964f682a2b7b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -11,6 +11,8 @@ config ARM64 select ARCH_CLOCKSOURCE_DATA select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_DMA_COHERENT_TO_PFN + select ARCH_HAS_DMA_MMAP_PGPROT select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER @@ -24,6 +26,8 @@ config ARM64 select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX + select ARCH_HAS_SYNC_DMA_FOR_DEVICE + select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_NMI_SAFE_CMPXCHG diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 106039d25e2f..b4e994cd3a42 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -113,9 +113,8 @@ core-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a # Default target when executing plain make boot := arch/arm64/boot KBUILD_IMAGE := $(boot)/Image.gz -KBUILD_DTBS := dtbs -all: Image.gz $(KBUILD_DTBS) +all: Image.gz Image: vmlinux @@ -127,17 +126,6 @@ Image.%: Image zinstall install: $(Q)$(MAKE) $(build)=$(boot) $@ -%.dtb: scripts - $(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@ - -PHONY += dtbs dtbs_install - -dtbs: prepare scripts - $(Q)$(MAKE) $(build)=$(boot)/dts - -dtbs_install: - $(Q)$(MAKE) $(dtbinst)=$(boot)/dts - PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@ @@ -145,7 +133,6 @@ vdso_install: # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) - $(Q)$(MAKE) $(clean)=$(boot)/dts # We need to generate vdso-offsets.h before compiling certain files in kernel/. # In order to do that, we should use the archprepare target, but we can't since @@ -160,8 +147,6 @@ vdso_prepare: prepare0 define archhelp echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)' echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' - echo '* dtbs - Build device tree blobs for enabled boards' - echo ' dtbs_install - Install dtbs to $(INSTALL_DTBS_PATH)' echo ' install - Install uncompressed kernel' echo ' zinstall - Install compressed kernel' echo ' Install using (your) ~/bin/installkernel or' diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi index 8cb78dd99672..90c0faf8579f 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -148,6 +148,7 @@ #address-cells = <2>; #size-cells = <2>; ranges; + dma-ranges = <0x0 0x0 0x0 0x0 0x10000 0x00000000>; clockgen: clocking@1300000 { compatible = "fsl,ls2080a-clockgen"; @@ -321,6 +322,8 @@ reg = <0x00000008 0x0c000000 0 0x40>, /* MC portal base */ <0x00000000 0x08340000 0 0x40000>; /* MC control reg */ msi-parent = <&its>; + iommu-map = <0 &smmu 0 0>; /* This is fixed-up by u-boot */ + dma-coherent; #address-cells = <3>; #size-cells = <1>; @@ -424,6 +427,9 @@ compatible = "arm,mmu-500"; reg = <0 0x5000000 0 0x800000>; #global-interrupts = <12>; + #iommu-cells = <1>; + stream-match-mask = <0x7C00>; + dma-coherent; interrupts = <0 13 4>, /* global secure fault */ <0 14 4>, /* combined secure interrupt */ <0 15 4>, /* global non-secure fault */ @@ -466,7 +472,6 @@ <0 204 4>, <0 205 4>, <0 206 4>, <0 207 4>, <0 208 4>, <0 209 4>; - mmu-masters = <&fsl_mc 0x300 0>; }; dspi: dspi@2100000 { diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h index 5a5fa47a6b18..3dd3d664c5c5 100644 --- a/arch/arm64/include/asm/device.h +++ b/arch/arm64/include/asm/device.h @@ -23,7 +23,6 @@ struct dev_archdata { #ifdef CONFIG_XEN const struct dma_map_ops *dev_dma_ops; #endif - bool dma_coherent; }; struct pdev_archdata { diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index b7847eb8a7bb..c41f3fb1446c 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -44,10 +44,13 @@ void arch_teardown_dma_ops(struct device *dev); #define arch_teardown_dma_ops arch_teardown_dma_ops #endif -/* do not use this function in a driver */ +/* + * Do not use this function in a driver, it is only provided for + * arch/arm/mm/xen.c, which is used by arm64 as well. + */ static inline bool is_device_dma_coherent(struct device *dev) { - return dev->archdata.dma_coherent; + return dev->dma_coherent; } #endif /* __KERNEL__ */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 25fcd22a4bb2..96b8f2f51ab2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -602,7 +602,7 @@ static void __init of_parse_and_init_cpus(void) { struct device_node *dn; - for_each_node_by_type(dn, "cpu") { + for_each_of_cpu_node(dn) { u64 hwid = of_get_cpu_mpidr(dn); if (hwid == INVALID_HWID) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 072c51fb07d7..d190612b8f33 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/genalloc.h> #include <linux/dma-direct.h> +#include <linux/dma-noncoherent.h> #include <linux/dma-contiguous.h> #include <linux/vmalloc.h> #include <linux/swiotlb.h> @@ -32,16 +33,6 @@ #include <asm/cacheflush.h> -static int swiotlb __ro_after_init; - -static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, - bool coherent) -{ - if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE)) - return pgprot_writecombine(prot); - return prot; -} - static struct gen_pool *atomic_pool __ro_after_init; #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K @@ -91,18 +82,16 @@ static int __free_from_pool(void *start, size_t size) return 1; } -static void *__dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - unsigned long attrs) +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t flags, unsigned long attrs) { struct page *page; void *ptr, *coherent_ptr; - bool coherent = is_device_dma_coherent(dev); - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false); + pgprot_t prot = pgprot_writecombine(PAGE_KERNEL); size = PAGE_ALIGN(size); - if (!coherent && !gfpflags_allow_blocking(flags)) { + if (!gfpflags_allow_blocking(flags)) { struct page *page = NULL; void *addr = __alloc_from_pool(size, &page, flags); @@ -112,14 +101,10 @@ static void *__dma_alloc(struct device *dev, size_t size, return addr; } - ptr = swiotlb_alloc(dev, size, dma_handle, flags, attrs); + ptr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs); if (!ptr) goto no_mem; - /* no need for non-cacheable mapping if coherent */ - if (coherent) - return ptr; - /* remove any dirty cache lines on the kernel alias */ __dma_flush_area(ptr, size); @@ -133,130 +118,57 @@ static void *__dma_alloc(struct device *dev, size_t size, return coherent_ptr; no_map: - swiotlb_free(dev, size, ptr, *dma_handle, attrs); + dma_direct_free_pages(dev, size, ptr, *dma_handle, attrs); no_mem: return NULL; } -static void __dma_free(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs) +void arch_dma_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, unsigned long attrs) { - void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); + if (!__free_from_pool(vaddr, PAGE_ALIGN(size))) { + void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle)); - size = PAGE_ALIGN(size); - - if (!is_device_dma_coherent(dev)) { - if (__free_from_pool(vaddr, size)) - return; vunmap(vaddr); + dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs); } - swiotlb_free(dev, size, swiotlb_addr, dma_handle, attrs); } -static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, + dma_addr_t dma_addr) { - dma_addr_t dev_addr; - - dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs); - if (!is_device_dma_coherent(dev) && - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); - - return dev_addr; -} - - -static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - if (!is_device_dma_coherent(dev) && - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); - swiotlb_unmap_page(dev, dev_addr, size, dir, attrs); + return __phys_to_pfn(dma_to_phys(dev, dma_addr)); } -static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - unsigned long attrs) +pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, + unsigned long attrs) { - struct scatterlist *sg; - int i, ret; - - ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); - if (!is_device_dma_coherent(dev) && - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - for_each_sg(sgl, sg, ret, i) - __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), - sg->length, dir); - - return ret; -} - -static void __swiotlb_unmap_sg_attrs(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - if (!is_device_dma_coherent(dev) && - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - for_each_sg(sgl, sg, nelems, i) - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), - sg->length, dir); - swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); + if (!dev_is_dma_coherent(dev) || (attrs & DMA_ATTR_WRITE_COMBINE)) + return pgprot_writecombine(prot); + return prot; } -static void __swiotlb_sync_single_for_cpu(struct device *dev, - dma_addr_t dev_addr, size_t size, - enum dma_data_direction dir) +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { - if (!is_device_dma_coherent(dev)) - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); - swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir); + __dma_map_area(phys_to_virt(paddr), size, dir); } -static void __swiotlb_sync_single_for_device(struct device *dev, - dma_addr_t dev_addr, size_t size, - enum dma_data_direction dir) +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { - swiotlb_sync_single_for_device(dev, dev_addr, size, dir); - if (!is_device_dma_coherent(dev)) - __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + __dma_unmap_area(phys_to_virt(paddr), size, dir); } -static void __swiotlb_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) +static int __swiotlb_get_sgtable_page(struct sg_table *sgt, + struct page *page, size_t size) { - struct scatterlist *sg; - int i; - - if (!is_device_dma_coherent(dev)) - for_each_sg(sgl, sg, nelems, i) - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), - sg->length, dir); - swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); -} + int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); -static void __swiotlb_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; + if (!ret) + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); - swiotlb_sync_sg_for_device(dev, sgl, nelems, dir); - if (!is_device_dma_coherent(dev)) - for_each_sg(sgl, sg, nelems, i) - __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), - sg->length, dir); + return ret; } static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, @@ -277,74 +189,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, return ret; } -static int __swiotlb_mmap(struct device *dev, - struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ - int ret; - unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT; - - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, - is_device_dma_coherent(dev)); - - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - - return __swiotlb_mmap_pfn(vma, pfn, size); -} - -static int __swiotlb_get_sgtable_page(struct sg_table *sgt, - struct page *page, size_t size) -{ - int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); - - if (!ret) - sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); - - return ret; -} - -static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t handle, size_t size, - unsigned long attrs) -{ - struct page *page = phys_to_page(dma_to_phys(dev, handle)); - - return __swiotlb_get_sgtable_page(sgt, page, size); -} - -static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) -{ - if (swiotlb) - return swiotlb_dma_supported(hwdev, mask); - return 1; -} - -static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr) -{ - if (swiotlb) - return swiotlb_dma_mapping_error(hwdev, addr); - return 0; -} - -static const struct dma_map_ops arm64_swiotlb_dma_ops = { - .alloc = __dma_alloc, - .free = __dma_free, - .mmap = __swiotlb_mmap, - .get_sgtable = __swiotlb_get_sgtable, - .map_page = __swiotlb_map_page, - .unmap_page = __swiotlb_unmap_page, - .map_sg = __swiotlb_map_sg_attrs, - .unmap_sg = __swiotlb_unmap_sg_attrs, - .sync_single_for_cpu = __swiotlb_sync_single_for_cpu, - .sync_single_for_device = __swiotlb_sync_single_for_device, - .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = __swiotlb_sync_sg_for_device, - .dma_supported = __swiotlb_dma_supported, - .mapping_error = __swiotlb_dma_mapping_error, -}; - static int __init atomic_pool_init(void) { pgprot_t prot = __pgprot(PROT_NORMAL_NC); @@ -500,10 +344,6 @@ EXPORT_SYMBOL(dummy_dma_ops); static int __init arm64_dma_init(void) { - if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) - swiotlb = 1; - WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), TAINT_CPU_OUT_OF_SPEC, "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", @@ -528,7 +368,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, unsigned long attrs) { - bool coherent = is_device_dma_coherent(dev); + bool coherent = dev_is_dma_coherent(dev); int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); size_t iosize = size; void *addr; @@ -569,7 +409,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, addr = NULL; } } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); + pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs); struct page *page; page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, @@ -596,7 +436,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, size >> PAGE_SHIFT); } } else { - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); + pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs); struct page **pages; pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, @@ -658,8 +498,7 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, struct vm_struct *area; int ret; - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, - is_device_dma_coherent(dev)); + vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs); if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; @@ -709,11 +548,11 @@ static void __iommu_sync_single_for_cpu(struct device *dev, { phys_addr_t phys; - if (is_device_dma_coherent(dev)) + if (dev_is_dma_coherent(dev)) return; - phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); - __dma_unmap_area(phys_to_virt(phys), size, dir); + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); + arch_sync_dma_for_cpu(dev, phys, size, dir); } static void __iommu_sync_single_for_device(struct device *dev, @@ -722,11 +561,11 @@ static void __iommu_sync_single_for_device(struct device *dev, { phys_addr_t phys; - if (is_device_dma_coherent(dev)) + if (dev_is_dma_coherent(dev)) return; - phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); - __dma_map_area(phys_to_virt(phys), size, dir); + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); + arch_sync_dma_for_device(dev, phys, size, dir); } static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, @@ -734,13 +573,13 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { - bool coherent = is_device_dma_coherent(dev); + bool coherent = dev_is_dma_coherent(dev); int prot = dma_info_to_prot(dir, coherent, attrs); dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); - if (!iommu_dma_mapping_error(dev, dev_addr) && - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - __iommu_sync_single_for_device(dev, dev_addr, size, dir); + if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + !iommu_dma_mapping_error(dev, dev_addr)) + __dma_map_area(page_address(page) + offset, size, dir); return dev_addr; } @@ -762,11 +601,11 @@ static void __iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg; int i; - if (is_device_dma_coherent(dev)) + if (dev_is_dma_coherent(dev)) return; for_each_sg(sgl, sg, nelems, i) - __dma_unmap_area(sg_virt(sg), sg->length, dir); + arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); } static void __iommu_sync_sg_for_device(struct device *dev, @@ -776,18 +615,18 @@ static void __iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg; int i; - if (is_device_dma_coherent(dev)) + if (dev_is_dma_coherent(dev)) return; for_each_sg(sgl, sg, nelems, i) - __dma_map_area(sg_virt(sg), sg->length, dir); + arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); } static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, unsigned long attrs) { - bool coherent = is_device_dma_coherent(dev); + bool coherent = dev_is_dma_coherent(dev); if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __iommu_sync_sg_for_device(dev, sgl, nelems, dir); @@ -879,9 +718,9 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { if (!dev->dma_ops) - dev->dma_ops = &arm64_swiotlb_dma_ops; + dev->dma_ops = &swiotlb_dma_ops; - dev->archdata.dma_coherent = coherent; + dev->dma_coherent = coherent; __iommu_setup_dma_ops(dev, dma_base, size, iommu); #ifdef CONFIG_XEN diff --git a/arch/c6x/Makefile b/arch/c6x/Makefile index 3fe8a948e94c..b7aa854f7008 100644 --- a/arch/c6x/Makefile +++ b/arch/c6x/Makefile @@ -40,9 +40,7 @@ boot := arch/$(ARCH)/boot DTB:=$(subst dtbImage.,,$(filter dtbImage.%, $(MAKECMDGOALS))) export DTB -ifneq ($(DTB),) core-y += $(boot)/dts/ -endif # With make 3.82 we cannot mix normal and wildcard targets diff --git a/arch/c6x/boot/dts/Makefile b/arch/c6x/boot/dts/Makefile index b212d278ebc4..f438285c3640 100644 --- a/arch/c6x/boot/dts/Makefile +++ b/arch/c6x/boot/dts/Makefile @@ -5,15 +5,12 @@ DTC_FLAGS ?= -p 1024 +dtb-$(CONFIG_SOC_TMS320C6455) += dsk6455.dtb +dtb-$(CONFIG_SOC_TMS320C6457) += evmc6457.dtb +dtb-$(CONFIG_SOC_TMS320C6472) += evmc6472.dtb +dtb-$(CONFIG_SOC_TMS320C6474) += evmc6474.dtb +dtb-$(CONFIG_SOC_TMS320C6678) += evmc6678.dtb + ifneq ($(DTB),) -obj-y += linked_dtb.o +obj-y += $(DTB).dtb.o endif - -quiet_cmd_cp = CP $< $@$2 - cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false) - -# Generate builtin.dtb from $(DTB).dtb -$(obj)/builtin.dtb: $(obj)/$(DTB).dtb - $(call if_changed,cp) - -$(obj)/linked_dtb.o: $(obj)/builtin.dtb diff --git a/arch/c6x/boot/dts/linked_dtb.S b/arch/c6x/boot/dts/linked_dtb.S deleted file mode 100644 index cf347f1d16ce..000000000000 --- a/arch/c6x/boot/dts/linked_dtb.S +++ /dev/null @@ -1,2 +0,0 @@ -.section __fdt_blob,"a" -.incbin "arch/c6x/boot/dts/builtin.dtb" diff --git a/arch/c6x/include/asm/sections.h b/arch/c6x/include/asm/sections.h index d6c591ab5b7e..dc2f15eb3bde 100644 --- a/arch/c6x/include/asm/sections.h +++ b/arch/c6x/include/asm/sections.h @@ -8,6 +8,5 @@ extern char _vectors_start[]; extern char _vectors_end[]; extern char _data_lma[]; -extern char _fdt_start[], _fdt_end[]; #endif /* _ASM_C6X_SECTIONS_H */ diff --git a/arch/c6x/kernel/setup.c b/arch/c6x/kernel/setup.c index 786e36e2f61d..05d96a9541b5 100644 --- a/arch/c6x/kernel/setup.c +++ b/arch/c6x/kernel/setup.c @@ -96,7 +96,7 @@ static void __init get_cpuinfo(void) unsigned long core_khz; u64 tmp; struct cpuinfo_c6x *p; - struct device_node *node, *np; + struct device_node *node; p = &per_cpu(cpu_data, smp_processor_id()); @@ -190,13 +190,8 @@ static void __init get_cpuinfo(void) p->core_id = get_coreid(); - node = of_find_node_by_name(NULL, "cpus"); - if (node) { - for_each_child_of_node(node, np) - if (!strcmp("cpu", np->name)) - ++c6x_num_cores; - of_node_put(node); - } + for_each_of_cpu_node(node) + ++c6x_num_cores; node = of_find_node_by_name(NULL, "soc"); if (node) { @@ -270,7 +265,7 @@ int __init c6x_add_memory(phys_addr_t start, unsigned long size) notrace void __init machine_init(unsigned long dt_ptr) { void *dtb = __va(dt_ptr); - void *fdt = _fdt_start; + void *fdt = __dtb_start; /* interrupts must be masked */ set_creg(IER, 2); @@ -363,7 +358,7 @@ void __init setup_arch(char **cmdline_p) memory_end >> PAGE_SHIFT); memblock_reserve(memory_start, bootmap_size); - unflatten_device_tree(); + unflatten_and_copy_device_tree(); c6x_cache_init(); diff --git a/arch/c6x/kernel/vmlinux.lds.S b/arch/c6x/kernel/vmlinux.lds.S index 1fba5b421eee..584bab2bace6 100644 --- a/arch/c6x/kernel/vmlinux.lds.S +++ b/arch/c6x/kernel/vmlinux.lds.S @@ -90,16 +90,6 @@ SECTIONS *(.switch) } - . = ALIGN (8) ; - __fdt_blob : AT(ADDR(__fdt_blob) - LOAD_OFFSET) - { - _fdt_start = . ; /* place for fdt blob */ - *(__fdt_blob) ; /* Any link-placed DTB */ - BYTE(0); /* section always has contents */ - . = _fdt_start + 0x4000; /* Pad up to 16kbyte */ - _fdt_end = . ; - } - _etext = .; /* diff --git a/arch/h8300/Makefile b/arch/h8300/Makefile index 58634e6bae92..4003ddc616e1 100644 --- a/arch/h8300/Makefile +++ b/arch/h8300/Makefile @@ -31,21 +31,12 @@ CROSS_COMPILE := h8300-unknown-linux- endif core-y += arch/$(ARCH)/kernel/ arch/$(ARCH)/mm/ -ifneq '$(CONFIG_H8300_BUILTIN_DTB)' '""' -core-y += arch/h8300/boot/dts/ -endif +core-y += arch/$(ARCH)/boot/dts/ libs-y += arch/$(ARCH)/lib/ boot := arch/h8300/boot -%.dtb %.dtb.S %.dtb.o: | scripts - $(Q)$(MAKE) $(build)=arch/h8300/boot/dts arch/h8300/boot/dts/$@ - -PHONY += dtbs -dtbs: scripts - $(Q)$(MAKE) $(build)=arch/h8300/boot/dts - archmrproper: archclean: diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c index 1b083c500b9a..ebb3b6d169ea 100644 --- a/arch/m68k/mac/misc.c +++ b/arch/m68k/mac/misc.c @@ -37,35 +37,6 @@ static void (*rom_reset)(void); #ifdef CONFIG_ADB_CUDA -static time64_t cuda_read_time(void) -{ - struct adb_request req; - time64_t time; - - if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0) - return 0; - while (!req.complete) - cuda_poll(); - - time = (u32)((req.reply[3] << 24) | (req.reply[4] << 16) | - (req.reply[5] << 8) | req.reply[6]); - - return time - RTC_OFFSET; -} - -static void cuda_write_time(time64_t time) -{ - struct adb_request req; - u32 data = lower_32_bits(time + RTC_OFFSET); - - if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME, - (data >> 24) & 0xFF, (data >> 16) & 0xFF, - (data >> 8) & 0xFF, data & 0xFF) < 0) - return; - while (!req.complete) - cuda_poll(); -} - static __u8 cuda_read_pram(int offset) { struct adb_request req; @@ -91,33 +62,6 @@ static void cuda_write_pram(int offset, __u8 data) #endif /* CONFIG_ADB_CUDA */ #ifdef CONFIG_ADB_PMU -static time64_t pmu_read_time(void) -{ - struct adb_request req; - time64_t time; - - if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0) - return 0; - pmu_wait_complete(&req); - - time = (u32)((req.reply[0] << 24) | (req.reply[1] << 16) | - (req.reply[2] << 8) | req.reply[3]); - - return time - RTC_OFFSET; -} - -static void pmu_write_time(time64_t time) -{ - struct adb_request req; - u32 data = lower_32_bits(time + RTC_OFFSET); - - if (pmu_request(&req, NULL, 5, PMU_SET_RTC, - (data >> 24) & 0xFF, (data >> 16) & 0xFF, - (data >> 8) & 0xFF, data & 0xFF) < 0) - return; - pmu_wait_complete(&req); -} - static __u8 pmu_read_pram(int offset) { struct adb_request req; @@ -295,13 +239,17 @@ static time64_t via_read_time(void) * is basically any machine with Mac II-style ADB. */ -static void via_write_time(time64_t time) +static void via_set_rtc_time(struct rtc_time *tm) { union { __u8 cdata[4]; __u32 idata; } data; __u8 temp; + time64_t time; + + time = mktime64(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, + tm->tm_hour, tm->tm_min, tm->tm_sec); /* Clear the write protect bit */ @@ -641,12 +589,12 @@ int mac_hwclk(int op, struct rtc_time *t) #ifdef CONFIG_ADB_CUDA case MAC_ADB_EGRET: case MAC_ADB_CUDA: - now = cuda_read_time(); + now = cuda_get_time(); break; #endif #ifdef CONFIG_ADB_PMU case MAC_ADB_PB2: - now = pmu_read_time(); + now = pmu_get_time(); break; #endif default: @@ -665,24 +613,21 @@ int mac_hwclk(int op, struct rtc_time *t) __func__, t->tm_year + 1900, t->tm_mon + 1, t->tm_mday, t->tm_hour, t->tm_min, t->tm_sec); - now = mktime64(t->tm_year + 1900, t->tm_mon + 1, t->tm_mday, - t->tm_hour, t->tm_min, t->tm_sec); - switch (macintosh_config->adb_type) { case MAC_ADB_IOP: case MAC_ADB_II: case MAC_ADB_PB1: - via_write_time(now); + via_set_rtc_time(t); break; #ifdef CONFIG_ADB_CUDA case MAC_ADB_EGRET: case MAC_ADB_CUDA: - cuda_write_time(now); + cuda_set_rtc_time(t); break; #endif #ifdef CONFIG_ADB_PMU case MAC_ADB_PB2: - pmu_write_time(now); + pmu_set_rtc_time(t); break; #endif default: diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index 4f3ab5707265..0823d291fbeb 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -65,9 +65,7 @@ boot := arch/microblaze/boot # Are we making a simpleImage.<boardname> target? If so, crack out the boardname DTB:=$(subst simpleImage.,,$(filter simpleImage.%, $(MAKECMDGOALS))) -ifneq ($(DTB),) - core-y += $(boot)/dts/ -endif +core-y += $(boot)/dts/ # defines filename extension depending memory management type ifeq ($(CONFIG_MMU),) diff --git a/arch/microblaze/boot/dts/Makefile b/arch/microblaze/boot/dts/Makefile index 1f77913d404d..c7324e74f9ef 100644 --- a/arch/microblaze/boot/dts/Makefile +++ b/arch/microblaze/boot/dts/Makefile @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 # +dtb-y := system.dtb + +ifneq ($(DTB),) obj-y += linked_dtb.o # Ensure system.dtb exists @@ -11,6 +14,7 @@ ifneq ($(DTB),system) $(obj)/system.dtb: $(obj)/$(DTB).dtb $(call if_changed,cp) endif +endif quiet_cmd_cp = CP $< $@$2 cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false) diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c index 96b3f26d16be..ef2f49471a2a 100644 --- a/arch/microblaze/kernel/cpu/cpuinfo.c +++ b/arch/microblaze/kernel/cpu/cpuinfo.c @@ -89,9 +89,9 @@ static struct device_node *cpu; void __init setup_cpuinfo(void) { - cpu = (struct device_node *) of_find_node_by_type(NULL, "cpu"); + cpu = of_get_cpu_node(0, NULL); if (!cpu) - pr_err("You don't have cpu!!!\n"); + pr_err("You don't have cpu or are missing cpu reg property!!!\n"); pr_info("%s: initialising\n", __func__); @@ -117,6 +117,8 @@ void __init setup_cpuinfo(void) if (cpuinfo.mmu_privins) pr_warn("%s: Stream instructions enabled" " - USERSPACE CAN LOCK THIS KERNEL!\n", __func__); + + of_node_put(cpu); } void __init setup_cpuinfo_clk(void) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 77c022e56e6e..53e75ddbba1c 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -21,6 +21,7 @@ config MIPS select GENERIC_CLOCKEVENTS select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE + select GENERIC_IOMAP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_LIB_ASHLDI3 @@ -28,7 +29,6 @@ config MIPS select GENERIC_LIB_CMPDI2 select GENERIC_LIB_LSHRDI3 select GENERIC_LIB_UCMPDI2 - select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL @@ -78,6 +78,7 @@ config MIPS select RTC_LIB if !MACH_LOONGSON64 select SYSCTL_EXCEPTION_TRACE select VIRT_TO_BUS + select NO_BOOTMEM menu "Machine selection" @@ -132,6 +133,7 @@ config MIPS_GENERIC select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN select USE_OF + select UHI_BOOT help Select this to build a kernel which aims to support multiple boards, generally using a flattened device tree passed from the bootloader @@ -1149,6 +1151,7 @@ config NO_IOPORT_MAP config GENERIC_CSUM bool + default y if !CPU_HAS_LOAD_STORE_LR config GENERIC_ISA_DMA bool @@ -1367,6 +1370,7 @@ config CPU_LOONGSON3 select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_HUGEPAGES + select CPU_HAS_LOAD_STORE_LR select WEAK_ORDERING select WEAK_REORDERING_BEYOND_LLSC select MIPS_PGD_C0_CONTEXT @@ -1443,6 +1447,7 @@ config CPU_MIPS32_R1 bool "MIPS32 Release 1" depends on SYS_HAS_CPU_MIPS32_R1 select CPU_HAS_PREFETCH + select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_HIGHMEM help @@ -1460,6 +1465,7 @@ config CPU_MIPS32_R2 bool "MIPS32 Release 2" depends on SYS_HAS_CPU_MIPS32_R2 select CPU_HAS_PREFETCH + select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_MSA @@ -1478,7 +1484,6 @@ config CPU_MIPS32_R6 select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_MSA - select GENERIC_CSUM select HAVE_KVM select MIPS_O32_FP64_SUPPORT help @@ -1491,6 +1496,7 @@ config CPU_MIPS64_R1 bool "MIPS64 Release 1" depends on SYS_HAS_CPU_MIPS64_R1 select CPU_HAS_PREFETCH + select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1510,6 +1516,7 @@ config CPU_MIPS64_R2 bool "MIPS64 Release 2" depends on SYS_HAS_CPU_MIPS64_R2 select CPU_HAS_PREFETCH + select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1531,7 +1538,6 @@ config CPU_MIPS64_R6 select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_MSA - select GENERIC_CSUM select MIPS_O32_FP64_SUPPORT if 32BIT || MIPS32_O32 select HAVE_KVM help @@ -1544,6 +1550,7 @@ config CPU_R3000 bool "R3000" depends on SYS_HAS_CPU_R3000 select CPU_HAS_WB + select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_HIGHMEM help @@ -1558,12 +1565,14 @@ config CPU_TX39XX bool "R39XX" depends on SYS_HAS_CPU_TX39XX select CPU_SUPPORTS_32BIT_KERNEL + select CPU_HAS_LOAD_STORE_LR config CPU_VR41XX bool "R41xx" depends on SYS_HAS_CPU_VR41XX select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL + select CPU_HAS_LOAD_STORE_LR help The options selects support for the NEC VR4100 series of processors. Only choose this option if you have one of these processors as a @@ -1575,6 +1584,7 @@ config CPU_R4300 depends on SYS_HAS_CPU_R4300 select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL + select CPU_HAS_LOAD_STORE_LR help MIPS Technologies R4300-series processors. @@ -1584,6 +1594,7 @@ config CPU_R4X00 select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HUGEPAGES + select CPU_HAS_LOAD_STORE_LR help MIPS Technologies R4000-series processors other than 4300, including the R4000, R4400, R4600, and 4700. @@ -1592,6 +1603,7 @@ config CPU_TX49XX bool "R49XX" depends on SYS_HAS_CPU_TX49XX select CPU_HAS_PREFETCH + select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HUGEPAGES @@ -1602,6 +1614,7 @@ config CPU_R5000 select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HUGEPAGES + select CPU_HAS_LOAD_STORE_LR help MIPS Technologies R5000-series processors other than the Nevada. @@ -1611,6 +1624,7 @@ config CPU_R5432 select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HUGEPAGES + select CPU_HAS_LOAD_STORE_LR config CPU_R5500 bool "R5500" @@ -1618,6 +1632,7 @@ config CPU_R5500 select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HUGEPAGES + select CPU_HAS_LOAD_STORE_LR help NEC VR5500 and VR5500A series processors implement 64-bit MIPS IV instruction set. @@ -1628,6 +1643,7 @@ config CPU_NEVADA select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HUGEPAGES + select CPU_HAS_LOAD_STORE_LR help QED / PMC-Sierra RM52xx-series ("Nevada") processors. @@ -1635,6 +1651,7 @@ config CPU_R8000 bool "R8000" depends on SYS_HAS_CPU_R8000 select CPU_HAS_PREFETCH + select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_64BIT_KERNEL help MIPS Technologies R8000 processors. Note these processors are @@ -1644,6 +1661,7 @@ config CPU_R10000 bool "R10000" depends on SYS_HAS_CPU_R10000 select CPU_HAS_PREFETCH + select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1655,6 +1673,7 @@ config CPU_RM7000 bool "RM7000" depends on SYS_HAS_CPU_RM7000 select CPU_HAS_PREFETCH + select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1663,6 +1682,7 @@ config CPU_RM7000 config CPU_SB1 bool "SB1" depends on SYS_HAS_CPU_SB1 + select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1673,6 +1693,7 @@ config CPU_CAVIUM_OCTEON bool "Cavium Octeon processor" depends on SYS_HAS_CPU_CAVIUM_OCTEON select CPU_HAS_PREFETCH + select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_64BIT_KERNEL select WEAK_ORDERING select CPU_SUPPORTS_HIGHMEM @@ -1702,6 +1723,7 @@ config CPU_BMIPS select WEAK_ORDERING select CPU_SUPPORTS_HIGHMEM select CPU_HAS_PREFETCH + select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_CPUFREQ select MIPS_EXTERNAL_TIMER help @@ -1710,6 +1732,7 @@ config CPU_BMIPS config CPU_XLR bool "Netlogic XLR SoC" depends on SYS_HAS_CPU_XLR + select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_64BIT_KERNEL select CPU_SUPPORTS_HIGHMEM @@ -1728,6 +1751,7 @@ config CPU_XLP select WEAK_ORDERING select WEAK_REORDERING_BEYOND_LLSC select CPU_HAS_PREFETCH + select CPU_HAS_LOAD_STORE_LR select CPU_MIPSR2 select CPU_SUPPORTS_HUGEPAGES select MIPS_ASID_BITS_VARIABLE @@ -1833,12 +1857,14 @@ config CPU_LOONGSON2 select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_HUGEPAGES select ARCH_HAS_PHYS_TO_DMA + select CPU_HAS_LOAD_STORE_LR config CPU_LOONGSON1 bool select CPU_MIPS32 select CPU_MIPSR1 select CPU_HAS_PREFETCH + select CPU_HAS_LOAD_STORE_LR select CPU_SUPPORTS_32BIT_KERNEL select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_CPUFREQ @@ -2452,6 +2478,13 @@ config XKS01 config CPU_HAS_RIXI bool +config CPU_HAS_LOAD_STORE_LR + bool + help + CPU has support for unaligned load and store instructions: + LWL, LWR, SWL, SWR (Load/store word left/right). + LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems). + # # Vectored interrupt mode is an R2 feature # @@ -2899,6 +2932,9 @@ config USE_OF select OF_EARLY_FLATTREE select IRQ_DOMAIN +config UHI_BOOT + bool + config BUILTIN_DTB bool diff --git a/arch/mips/Makefile b/arch/mips/Makefile index d74b3742fa5d..15a84cfd0719 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -13,6 +13,7 @@ # archscripts: scripts_basic + $(Q)$(MAKE) $(build)=arch/mips/tools elf-entry $(Q)$(MAKE) $(build)=arch/mips/boot/tools relocs KBUILD_DEFCONFIG := 32r2el_defconfig @@ -230,6 +231,8 @@ toolchain-xpa := $(call cc-option-yn,$(xpa-cflags-y) -mxpa) cflags-$(toolchain-xpa) += -DTOOLCHAIN_SUPPORTS_XPA toolchain-crc := $(call cc-option-yn,$(mips-cflags) -Wa$(comma)-mcrc) cflags-$(toolchain-crc) += -DTOOLCHAIN_SUPPORTS_CRC +toolchain-dsp := $(call cc-option-yn,$(mips-cflags) -Wa$(comma)-mdsp) +cflags-$(toolchain-dsp) += -DTOOLCHAIN_SUPPORTS_DSP # # Firmware support @@ -257,13 +260,7 @@ ifdef CONFIG_PHYSICAL_START load-y = $(CONFIG_PHYSICAL_START) endif -# Sign-extend the entry point to 64 bits if retrieved as a 32-bit number. -entry-y = $(shell $(OBJDUMP) -f vmlinux 2>/dev/null \ - | sed -n '/^start address / { \ - s/^.* //; \ - s/0x\([0-7].......\)$$/0x00000000\1/; \ - s/0x\(........\)$$/0xffffffff\1/; p }') - +entry-y = $(shell $(objtree)/arch/mips/tools/elf-entry vmlinux) cflags-y += -I$(srctree)/arch/mips/include/asm/mach-generic drivers-$(CONFIG_PCI) += arch/mips/pci/ @@ -407,18 +404,7 @@ endif CLEAN_FILES += vmlinux.32 vmlinux.64 # device-trees -core-$(CONFIG_BUILTIN_DTB) += arch/mips/boot/dts/ - -%.dtb %.dtb.S %.dtb.o: | scripts - $(Q)$(MAKE) $(build)=arch/mips/boot/dts arch/mips/boot/dts/$@ - -PHONY += dtbs -dtbs: scripts - $(Q)$(MAKE) $(build)=arch/mips/boot/dts - -PHONY += dtbs_install -dtbs_install: - $(Q)$(MAKE) $(dtbinst)=arch/mips/boot/dts +core-y += arch/mips/boot/dts/ archprepare: ifdef CONFIG_MIPS32_N32 @@ -461,8 +447,6 @@ define archhelp echo ' uImage.lzma - U-Boot image (lzma)' echo ' uImage.lzo - U-Boot image (lzo)' echo ' uzImage.bin - U-Boot image (self-extracting)' - echo ' dtbs - Device-tree blobs for enabled boards' - echo ' dtbs_install - Install dtbs to $(INSTALL_DTBS_PATH)' echo echo ' These will be default as appropriate for a configured platform.' echo diff --git a/arch/mips/bcm47xx/workarounds.c b/arch/mips/bcm47xx/workarounds.c index 1a8a07e7a563..46eddbec8d9f 100644 --- a/arch/mips/bcm47xx/workarounds.c +++ b/arch/mips/bcm47xx/workarounds.c @@ -5,9 +5,8 @@ #include <bcm47xx_board.h> #include <bcm47xx.h> -static void __init bcm47xx_workarounds_netgear_wnr3500l(void) +static void __init bcm47xx_workarounds_enable_usb_power(int usb_power) { - const int usb_power = 12; int err; err = gpio_request_one(usb_power, GPIOF_OUT_INIT_HIGH, "usb_power"); @@ -23,7 +22,10 @@ void __init bcm47xx_workarounds(void) switch (board) { case BCM47XX_BOARD_NETGEAR_WNR3500L: - bcm47xx_workarounds_netgear_wnr3500l(); + bcm47xx_workarounds_enable_usb_power(12); + break; + case BCM47XX_BOARD_NETGEAR_WNDR3400_V3: + bcm47xx_workarounds_enable_usb_power(21); break; default: /* No workaround(s) needed */ diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c index 231fc5ce375e..6329c5f780d6 100644 --- a/arch/mips/bmips/setup.c +++ b/arch/mips/bmips/setup.c @@ -153,8 +153,6 @@ void __init plat_time_init(void) mips_hpt_frequency = freq; } -extern const char __appended_dtb; - void __init plat_mem_setup(void) { void *dtb; @@ -164,15 +162,10 @@ void __init plat_mem_setup(void) ioport_resource.start = 0; ioport_resource.end = ~0; -#ifdef CONFIG_MIPS_ELF_APPENDED_DTB - if (!fdt_check_header(&__appended_dtb)) - dtb = (void *)&__appended_dtb; - else -#endif /* intended to somewhat resemble ARM; see Documentation/arm/Booting */ if (fw_arg0 == 0 && fw_arg1 == 0xffffffff) dtb = phys_to_virt(fw_arg2); - else if (fw_passed_dtb) /* UHI interface */ + else if (fw_passed_dtb) /* UHI interface or appended dtb */ dtb = (void *)fw_passed_dtb; else if (__dtb_start != __dtb_end) dtb = (void *)__dtb_start; diff --git a/arch/mips/boot/dts/lantiq/danube.dtsi b/arch/mips/boot/dts/lantiq/danube.dtsi index 2dd950181f8a..510be63c8bdf 100644 --- a/arch/mips/boot/dts/lantiq/danube.dtsi +++ b/arch/mips/boot/dts/lantiq/danube.dtsi @@ -10,12 +10,12 @@ }; }; - biu@1F800000 { + biu@1f800000 { #address-cells = <1>; #size-cells = <1>; compatible = "lantiq,biu", "simple-bus"; - reg = <0x1F800000 0x800000>; - ranges = <0x0 0x1F800000 0x7FFFFF>; + reg = <0x1f800000 0x800000>; + ranges = <0x0 0x1f800000 0x7fffff>; icu0: icu@80200 { #interrupt-cells = <1>; @@ -24,18 +24,18 @@ reg = <0x80200 0x120>; }; - watchdog@803F0 { + watchdog@803f0 { compatible = "lantiq,wdt"; - reg = <0x803F0 0x10>; + reg = <0x803f0 0x10>; }; }; - sram@1F000000 { + sram@1f000000 { #address-cells = <1>; #size-cells = <1>; compatible = "lantiq,sram"; - reg = <0x1F000000 0x800000>; - ranges = <0x0 0x1F000000 0x7FFFFF>; + reg = <0x1f000000 0x800000>; + ranges = <0x0 0x1f000000 0x7fffff>; eiu0: eiu@101000 { #interrupt-cells = <1>; @@ -66,41 +66,41 @@ #address-cells = <1>; #size-cells = <1>; compatible = "lantiq,fpi", "simple-bus"; - ranges = <0x0 0x10000000 0xEEFFFFF>; - reg = <0x10000000 0xEF00000>; + ranges = <0x0 0x10000000 0xeefffff>; + reg = <0x10000000 0xef00000>; - gptu@E100A00 { + gptu@e100a00 { compatible = "lantiq,gptu-xway"; - reg = <0xE100A00 0x100>; + reg = <0xe100a00 0x100>; }; - serial@E100C00 { + serial@e100c00 { compatible = "lantiq,asc"; - reg = <0xE100C00 0x400>; + reg = <0xe100c00 0x400>; interrupt-parent = <&icu0>; interrupts = <112 113 114>; }; - dma0: dma@E104100 { + dma0: dma@e104100 { compatible = "lantiq,dma-xway"; - reg = <0xE104100 0x800>; + reg = <0xe104100 0x800>; }; - ebu0: ebu@E105300 { + ebu0: ebu@e105300 { compatible = "lantiq,ebu-xway"; - reg = <0xE105300 0x100>; + reg = <0xe105300 0x100>; }; - pci0: pci@E105400 { + pci0: pci@e105400 { #address-cells = <3>; #size-cells = <2>; #interrupt-cells = <1>; compatible = "lantiq,pci-xway"; bus-range = <0x0 0x0>; ranges = <0x2000000 0 0x8000000 0x8000000 0 0x2000000 /* pci memory */ - 0x1000000 0 0x00000000 0xAE00000 0 0x200000>; /* io space */ + 0x1000000 0 0x00000000 0xae00000 0 0x200000>; /* io space */ reg = <0x7000000 0x8000 /* config space */ - 0xE105400 0x400>; /* pci bridge */ + 0xe105400 0x400>; /* pci bridge */ }; }; }; diff --git a/arch/mips/boot/dts/lantiq/easy50712.dts b/arch/mips/boot/dts/lantiq/easy50712.dts index c37a33962f28..1ce20b7d05cb 100644 --- a/arch/mips/boot/dts/lantiq/easy50712.dts +++ b/arch/mips/boot/dts/lantiq/easy50712.dts @@ -52,14 +52,14 @@ }; }; - gpio: pinmux@E100B10 { + gpio: pinmux@e100b10 { compatible = "lantiq,danube-pinctrl"; pinctrl-names = "default"; pinctrl-0 = <&state_default>; #gpio-cells = <2>; gpio-controller; - reg = <0xE100B10 0xA0>; + reg = <0xe100b10 0xa0>; state_default: pinmux { stp { @@ -82,26 +82,26 @@ }; }; - etop@E180000 { + etop@e180000 { compatible = "lantiq,etop-xway"; - reg = <0xE180000 0x40000>; + reg = <0xe180000 0x40000>; interrupt-parent = <&icu0>; interrupts = <73 78>; phy-mode = "rmii"; mac-address = [ 00 11 22 33 44 55 ]; }; - stp0: stp@E100BB0 { + stp0: stp@e100bb0 { #gpio-cells = <2>; compatible = "lantiq,gpio-stp-xway"; gpio-controller; - reg = <0xE100BB0 0x40>; + reg = <0xe100bb0 0x40>; lantiq,shadow = <0xfff>; lantiq,groups = <0x3>; }; - pci@E105400 { + pci@e105400 { lantiq,bus-clock = <33333333>; interrupt-map-mask = <0xf800 0x0 0x0 0x7>; interrupt-map = < diff --git a/arch/mips/boot/dts/mscc/Makefile b/arch/mips/boot/dts/mscc/Makefile index 9a9bb7ea0503..ec6f5b2bf093 100644 --- a/arch/mips/boot/dts/mscc/Makefile +++ b/arch/mips/boot/dts/mscc/Makefile @@ -1,3 +1,3 @@ -dtb-$(CONFIG_MSCC_OCELOT) += ocelot_pcb123.dtb +dtb-$(CONFIG_MSCC_OCELOT) += ocelot_pcb123.dtb ocelot_pcb120.dtb obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y)) diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi index 8ce317c5b9ed..90c60d42f571 100644 --- a/arch/mips/boot/dts/mscc/ocelot.dtsi +++ b/arch/mips/boot/dts/mscc/ocelot.dtsi @@ -78,6 +78,19 @@ status = "disabled"; }; + i2c: i2c@100400 { + compatible = "mscc,ocelot-i2c", "snps,designware-i2c"; + pinctrl-0 = <&i2c_pins>; + pinctrl-names = "default"; + reg = <0x100400 0x100>, <0x198 0x8>; + #address-cells = <1>; + #size-cells = <0>; + interrupts = <8>; + clocks = <&ahb_clk>; + + status = "disabled"; + }; + uart2: serial@100800 { pinctrl-0 = <&uart2_pins>; pinctrl-names = "default"; @@ -182,6 +195,11 @@ interrupts = <13>; #interrupt-cells = <2>; + i2c_pins: i2c-pins { + pins = "GPIO_16", "GPIO_17"; + function = "twi"; + }; + uart_pins: uart-pins { pins = "GPIO_6", "GPIO_7"; function = "uart"; @@ -196,6 +214,7 @@ pins = "GPIO_14", "GPIO_15"; function = "miim1"; }; + }; mdio0: mdio@107009c { diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb120.dts b/arch/mips/boot/dts/mscc/ocelot_pcb120.dts new file mode 100644 index 000000000000..33991fd209f5 --- /dev/null +++ b/arch/mips/boot/dts/mscc/ocelot_pcb120.dts @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2017 Microsemi Corporation */ + +/dts-v1/; + +#include <dt-bindings/interrupt-controller/irq.h> +#include <dt-bindings/phy/phy-ocelot-serdes.h> +#include "ocelot.dtsi" + +/ { + compatible = "mscc,ocelot-pcb120", "mscc,ocelot"; + + chosen { + stdout-path = "serial0:115200n8"; + }; + + memory@0 { + device_type = "memory"; + reg = <0x0 0x0e000000>; + }; +}; + +&gpio { + phy_int_pins: phy_int_pins { + pins = "GPIO_4"; + function = "gpio"; + }; +}; + +&mdio0 { + status = "okay"; +}; + +&mdio1 { + status = "okay"; + pinctrl-names = "default"; + pinctrl-0 = <&miim1>, <&phy_int_pins>; + + phy7: ethernet-phy@0 { + reg = <0>; + interrupts = <4 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&gpio>; + }; + phy6: ethernet-phy@1 { + reg = <1>; + interrupts = <4 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&gpio>; + }; + phy5: ethernet-phy@2 { + reg = <2>; + interrupts = <4 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&gpio>; + }; + phy4: ethernet-phy@3 { + reg = <3>; + interrupts = <4 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&gpio>; + }; +}; + +&port0 { + phy-handle = <&phy0>; +}; + +&port1 { + phy-handle = <&phy1>; +}; + +&port2 { + phy-handle = <&phy2>; +}; + +&port3 { + phy-handle = <&phy3>; +}; + +&port4 { + phy-handle = <&phy7>; + phy-mode = "sgmii"; + phys = <&serdes 4 SERDES1G(2)>; +}; + +&port5 { + phy-handle = <&phy4>; + phy-mode = "sgmii"; + phys = <&serdes 5 SERDES1G(5)>; +}; + +&port6 { + phy-handle = <&phy6>; + phy-mode = "sgmii"; + phys = <&serdes 6 SERDES1G(3)>; +}; + +&port9 { + phy-handle = <&phy5>; + phy-mode = "sgmii"; + phys = <&serdes 9 SERDES1G(4)>; +}; + +&uart0 { + status = "okay"; +}; + +&uart2 { + status = "okay"; +}; diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts index 2266027759f9..ef852f382da8 100644 --- a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts +++ b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts @@ -36,6 +36,12 @@ }; }; +&i2c { + clock-frequency = <100000>; + i2c-sda-hold-time-ns = <300>; + status = "okay"; +}; + &mdio0 { status = "okay"; }; diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 8272d8c648ca..cc1d8525e651 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -1180,8 +1180,8 @@ static int octeon_irq_gpio_xlat(struct irq_domain *d, type = IRQ_TYPE_LEVEL_LOW; break; default: - pr_err("Error: (%s) Invalid irq trigger specification: %x\n", - node->name, + pr_err("Error: (%pOFn) Invalid irq trigger specification: %x\n", + node, trigger); type = IRQ_TYPE_LEVEL_LOW; break; @@ -2271,8 +2271,8 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node, parent_irq = irq_of_parse_and_map(ciu_node, 0); if (!parent_irq) { - pr_err("ERROR: Couldn't acquire parent_irq for %s\n", - ciu_node->name); + pr_err("ERROR: Couldn't acquire parent_irq for %pOFn\n", + ciu_node); return -EINVAL; } @@ -2283,7 +2283,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node, addr = of_get_address(ciu_node, 0, NULL, NULL); if (!addr) { - pr_err("ERROR: Couldn't acquire reg(0) %s\n", ciu_node->name); + pr_err("ERROR: Couldn't acquire reg(0) %pOFn\n", ciu_node); return -EINVAL; } host_data->raw_reg = (u64)phys_to_virt( @@ -2291,7 +2291,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node, addr = of_get_address(ciu_node, 1, NULL, NULL); if (!addr) { - pr_err("ERROR: Couldn't acquire reg(1) %s\n", ciu_node->name); + pr_err("ERROR: Couldn't acquire reg(1) %pOFn\n", ciu_node); return -EINVAL; } host_data->en_reg = (u64)phys_to_virt( @@ -2299,8 +2299,8 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node, r = of_property_read_u32(ciu_node, "cavium,max-bits", &val); if (r) { - pr_err("ERROR: Couldn't read cavium,max-bits from %s\n", - ciu_node->name); + pr_err("ERROR: Couldn't read cavium,max-bits from %pOFn\n", + ciu_node); return r; } host_data->max_bits = val; diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index c2426232db06..dfb95cffef3e 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -1161,15 +1161,12 @@ void __init device_tree_init(void) bool do_prune; bool fill_mac; -#ifdef CONFIG_MIPS_ELF_APPENDED_DTB - if (!fdt_check_header(&__appended_dtb)) { - fdt = &__appended_dtb; + if (fw_passed_dtb) { + fdt = (void *)fw_passed_dtb; do_prune = false; fill_mac = true; pr_info("Using appended Device Tree.\n"); - } else -#endif - if (octeon_bootinfo->minor_version >= 3 && octeon_bootinfo->fdt_addr) { + } else if (octeon_bootinfo->minor_version >= 3 && octeon_bootinfo->fdt_addr) { fdt = phys_to_virt(octeon_bootinfo->fdt_addr); if (fdt_check_header(fdt)) panic("Corrupt Device Tree passed to kernel."); diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 75e7c8625659..39f2a2ec1286 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -15,6 +15,7 @@ #include <linux/sched/task_stack.h> #include <linux/init.h> #include <linux/export.h> +#include <linux/kexec.h> #include <asm/mmu_context.h> #include <asm/time.h> @@ -424,6 +425,9 @@ const struct plat_smp_ops octeon_smp_ops = { .cpu_disable = octeon_cpu_disable, .cpu_die = octeon_cpu_die, #endif +#ifdef CONFIG_KEXEC + .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, +#endif }; static irqreturn_t octeon_78xx_reched_interrupt(int irq, void *dev_id) @@ -501,6 +505,9 @@ static const struct plat_smp_ops octeon_78xx_smp_ops = { .cpu_disable = octeon_cpu_disable, .cpu_die = octeon_cpu_die, #endif +#ifdef CONFIG_KEXEC + .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, +#endif }; void __init octeon_setup_smp(void) diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config index aa815761d85e..f607888d2483 100644 --- a/arch/mips/configs/generic/board-ocelot.config +++ b/arch/mips/configs/generic/board-ocelot.config @@ -18,17 +18,25 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_GPIO_SYSFS=y +CONFIG_NETDEVICES=y +CONFIG_MSCC_OCELOT_SWITCH=y +CONFIG_MSCC_OCELOT_SWITCH_OCELOT=y +CONFIG_MDIO_MSCC_MIIM=y +CONFIG_MICROSEMI_PHY=y CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_MUX=y +CONFIG_I2C_DESIGNWARE_PLATFORM=y CONFIG_SPI=y CONFIG_SPI_BITBANG=y CONFIG_SPI_DESIGNWARE=y +CONFIG_SPI_DW_MMIO=y CONFIG_SPI_SPIDEV=y +CONFIG_GPIO_SYSFS=y + CONFIG_POWER_RESET=y CONFIG_POWER_RESET_OCELOT_RESET=y diff --git a/arch/mips/generic/Kconfig b/arch/mips/generic/Kconfig index 08e33c6b2539..fd6019802657 100644 --- a/arch/mips/generic/Kconfig +++ b/arch/mips/generic/Kconfig @@ -65,11 +65,11 @@ config FIT_IMAGE_FDT_XILFPGA Enable this to include the FDT for the MIPSfpga platform from Imagination Technologies in the FIT kernel image. -config FIT_IMAGE_FDT_OCELOT_PCB123 - bool "Include FDT for Microsemi Ocelot PCB123" +config FIT_IMAGE_FDT_OCELOT + bool "Include FDT for Microsemi Ocelot development platforms" select MSCC_OCELOT help - Enable this to include the FDT for the Ocelot PCB123 platform + Enable this to include the FDT for the Ocelot development platforms from Microsemi in the FIT kernel image. This requires u-boot on the platform. diff --git a/arch/mips/generic/Makefile b/arch/mips/generic/Makefile index d03a36f869a4..181aa1335419 100644 --- a/arch/mips/generic/Makefile +++ b/arch/mips/generic/Makefile @@ -15,5 +15,4 @@ obj-y += proc.o obj-$(CONFIG_YAMON_DT_SHIM) += yamon-dt.o obj-$(CONFIG_LEGACY_BOARD_SEAD3) += board-sead3.o obj-$(CONFIG_LEGACY_BOARD_OCELOT) += board-ocelot.o -obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_VIRT_BOARD_RANCHU) += board-ranchu.o diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform index 879cb80396c8..eaa19d189324 100644 --- a/arch/mips/generic/Platform +++ b/arch/mips/generic/Platform @@ -16,5 +16,5 @@ all-$(CONFIG_MIPS_GENERIC) := vmlinux.gz.itb its-y := vmlinux.its.S its-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += board-boston.its.S its-$(CONFIG_FIT_IMAGE_FDT_NI169445) += board-ni169445.its.S -its-$(CONFIG_FIT_IMAGE_FDT_OCELOT_PCB123) += board-ocelot_pcb123.its.S +its-$(CONFIG_FIT_IMAGE_FDT_OCELOT) += board-ocelot.its.S its-$(CONFIG_FIT_IMAGE_FDT_XILFPGA) += board-xilfpga.its.S diff --git a/arch/mips/generic/board-ocelot_pcb123.its.S b/arch/mips/generic/board-ocelot.its.S index 5a7d5e1c878a..3da23988149a 100644 --- a/arch/mips/generic/board-ocelot_pcb123.its.S +++ b/arch/mips/generic/board-ocelot.its.S @@ -11,6 +11,17 @@ algo = "sha1"; }; }; + + fdt@ocelot_pcb120 { + description = "MSCC Ocelot PCB120 Device Tree"; + data = /incbin/("boot/dts/mscc/ocelot_pcb120.dtb"); + type = "flat_dt"; + arch = "mips"; + compression = "none"; + hash@0 { + algo = "sha1"; + }; + }; }; configurations { @@ -19,5 +30,11 @@ kernel = "kernel@0"; fdt = "fdt@ocelot_pcb123"; }; + + conf@ocelot_pcb120 { + description = "Ocelot Linux kernel"; + kernel = "kernel@0"; + fdt = "fdt@ocelot_pcb120"; + }; }; }; diff --git a/arch/mips/generic/kexec.c b/arch/mips/generic/kexec.c deleted file mode 100644 index 1ca409f58929..000000000000 --- a/arch/mips/generic/kexec.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2016 Imagination Technologies - * Author: Marcin Nowakowski <marcin.nowakowski@mips.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/kexec.h> -#include <linux/libfdt.h> -#include <linux/uaccess.h> - -static int generic_kexec_prepare(struct kimage *image) -{ - int i; - - for (i = 0; i < image->nr_segments; i++) { - struct fdt_header fdt; - - if (image->segment[i].memsz <= sizeof(fdt)) - continue; - - if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt))) - continue; - - if (fdt_check_header(&fdt)) - continue; - - kexec_args[0] = -2; - kexec_args[1] = (unsigned long) - phys_to_virt((unsigned long)image->segment[i].mem); - break; - } - return 0; -} - -static int __init register_generic_kexec(void) -{ - _machine_kexec_prepare = generic_kexec_prepare; - return 0; -} -arch_initcall(register_generic_kexec); diff --git a/arch/mips/include/asm/asm-eva.h b/arch/mips/include/asm/asm-eva.h index 1e38f0e1ea3e..d80be38c4144 100644 --- a/arch/mips/include/asm/asm-eva.h +++ b/arch/mips/include/asm/asm-eva.h @@ -15,6 +15,7 @@ /* Kernel variants */ #define kernel_cache(op, base) "cache " op ", " base "\n" +#define kernel_pref(hint, base) "pref " hint ", " base "\n" #define kernel_ll(reg, addr) "ll " reg ", " addr "\n" #define kernel_sc(reg, addr) "sc " reg ", " addr "\n" #define kernel_lw(reg, addr) "lw " reg ", " addr "\n" @@ -51,6 +52,7 @@ " .set pop\n" #define user_cache(op, base) __BUILD_EVA_INSN("cachee", op, base) +#define user_pref(hint, base) __BUILD_EVA_INSN("prefe", hint, base) #define user_ll(reg, addr) __BUILD_EVA_INSN("lle", reg, addr) #define user_sc(reg, addr) __BUILD_EVA_INSN("sce", reg, addr) #define user_lw(reg, addr) __BUILD_EVA_INSN("lwe", reg, addr) @@ -72,6 +74,7 @@ #else #define user_cache(op, base) kernel_cache(op, base) +#define user_pref(hint, base) kernel_pref(hint, base) #define user_ll(reg, addr) kernel_ll(reg, addr) #define user_sc(reg, addr) kernel_sc(reg, addr) #define user_lw(reg, addr) kernel_lw(reg, addr) @@ -99,6 +102,7 @@ #else /* __ASSEMBLY__ */ #define kernel_cache(op, base) cache op, base +#define kernel_pref(hint, base) pref hint, base #define kernel_ll(reg, addr) ll reg, addr #define kernel_sc(reg, addr) sc reg, addr #define kernel_lw(reg, addr) lw reg, addr @@ -135,6 +139,7 @@ .set pop; #define user_cache(op, base) __BUILD_EVA_INSN(cachee, op, base) +#define user_pref(hint, base) __BUILD_EVA_INSN(prefe, hint, base) #define user_ll(reg, addr) __BUILD_EVA_INSN(lle, reg, addr) #define user_sc(reg, addr) __BUILD_EVA_INSN(sce, reg, addr) #define user_lw(reg, addr) __BUILD_EVA_INSN(lwe, reg, addr) @@ -155,6 +160,7 @@ #else #define user_cache(op, base) kernel_cache(op, base) +#define user_pref(hint, base) kernel_pref(hint, base) #define user_ll(reg, addr) kernel_ll(reg, addr) #define user_sc(reg, addr) kernel_sc(reg, addr) #define user_lw(reg, addr) kernel_lw(reg, addr) diff --git a/arch/mips/include/asm/asm.h b/arch/mips/include/asm/asm.h index 81fae23ce7cd..c23527ba65d0 100644 --- a/arch/mips/include/asm/asm.h +++ b/arch/mips/include/asm/asm.h @@ -20,32 +20,6 @@ #include <asm/sgidefs.h> #include <asm/asm-eva.h> -#ifndef CAT -#ifdef __STDC__ -#define __CAT(str1, str2) str1##str2 -#else -#define __CAT(str1, str2) str1/**/str2 -#endif -#define CAT(str1, str2) __CAT(str1, str2) -#endif - -/* - * PIC specific declarations - * Not used for the kernel but here seems to be the right place. - */ -#ifdef __PIC__ -#define CPRESTORE(register) \ - .cprestore register -#define CPADD(register) \ - .cpadd register -#define CPLOAD(register) \ - .cpload register -#else -#define CPRESTORE(register) -#define CPADD(register) -#define CPLOAD(register) -#endif - /* * LEAF - declare leaf routine */ @@ -130,96 +104,6 @@ symbol = value .popsection; /* - * Build text tables - */ -#define TTABLE(string) \ - .pushsection .text; \ - .word 1f; \ - .popsection \ - .pushsection .data; \ -1: .asciiz string; \ - .popsection - -/* - * MIPS IV pref instruction. - * Use with .set noreorder only! - * - * MIPS IV implementations are free to treat this as a nop. The R5000 - * is one of them. So we should have an option not to use this instruction. - */ -#ifdef CONFIG_CPU_HAS_PREFETCH - -#define PREF(hint,addr) \ - .set push; \ - .set arch=r5000; \ - pref hint, addr; \ - .set pop - -#define PREFE(hint, addr) \ - .set push; \ - .set mips0; \ - .set eva; \ - prefe hint, addr; \ - .set pop - -#define PREFX(hint,addr) \ - .set push; \ - .set arch=r5000; \ - prefx hint, addr; \ - .set pop - -#else /* !CONFIG_CPU_HAS_PREFETCH */ - -#define PREF(hint, addr) -#define PREFE(hint, addr) -#define PREFX(hint, addr) - -#endif /* !CONFIG_CPU_HAS_PREFETCH */ - -/* - * MIPS ISA IV/V movn/movz instructions and equivalents for older CPUs. - */ -#if (_MIPS_ISA == _MIPS_ISA_MIPS1) -#define MOVN(rd, rs, rt) \ - .set push; \ - .set reorder; \ - beqz rt, 9f; \ - move rd, rs; \ - .set pop; \ -9: -#define MOVZ(rd, rs, rt) \ - .set push; \ - .set reorder; \ - bnez rt, 9f; \ - move rd, rs; \ - .set pop; \ -9: -#endif /* _MIPS_ISA == _MIPS_ISA_MIPS1 */ -#if (_MIPS_ISA == _MIPS_ISA_MIPS2) || (_MIPS_ISA == _MIPS_ISA_MIPS3) -#define MOVN(rd, rs, rt) \ - .set push; \ - .set noreorder; \ - bnezl rt, 9f; \ - move rd, rs; \ - .set pop; \ -9: -#define MOVZ(rd, rs, rt) \ - .set push; \ - .set noreorder; \ - beqzl rt, 9f; \ - move rd, rs; \ - .set pop; \ -9: -#endif /* (_MIPS_ISA == _MIPS_ISA_MIPS2) || (_MIPS_ISA == _MIPS_ISA_MIPS3) */ -#if (_MIPS_ISA == _MIPS_ISA_MIPS4 ) || (_MIPS_ISA == _MIPS_ISA_MIPS5) || \ - (_MIPS_ISA == _MIPS_ISA_MIPS32) || (_MIPS_ISA == _MIPS_ISA_MIPS64) -#define MOVN(rd, rs, rt) \ - movn rd, rs, rt -#define MOVZ(rd, rs, rt) \ - movz rd, rs, rt -#endif /* MIPS IV, MIPS V, MIPS32 or MIPS64 */ - -/* * Stack alignment */ #if (_MIPS_SIM == _MIPS_SIM_ABI32) diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index 54c730aed327..266257d56fb6 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -20,6 +20,7 @@ #include <linux/irqflags.h> #include <asm/addrspace.h> +#include <asm/barrier.h> #include <asm/bug.h> #include <asm/byteorder.h> #include <asm/cpu.h> @@ -34,11 +35,6 @@ #include <mangle-port.h> /* - * Slowdown I/O port space accesses for antique hardware. - */ -#undef CONF_SLOWDOWN_IO - -/* * Raw operations are never swapped in software. OTOH values that raw * operations are working on may or may not have been swapped by the bus * hardware. An example use would be for flash memory that's used for @@ -50,6 +46,11 @@ # define __raw_ioswabq(a, x) (x) # define ____raw_ioswabq(a, x) (x) +# define __relaxed_ioswabb ioswabb +# define __relaxed_ioswabw ioswabw +# define __relaxed_ioswabl ioswabl +# define __relaxed_ioswabq ioswabq + /* ioswab[bwlq], __mem_ioswab[bwlq] are defined in mangle-port.h */ #define IO_SPACE_LIMIT 0xffff @@ -80,31 +81,29 @@ static inline void set_io_port_base(unsigned long base) } /* - * Thanks to James van Artsdalen for a better timing-fix than - * the two short jumps: using outb's to a nonexistent port seems - * to guarantee better timings even on fast machines. - * - * On the other hand, I'd like to be sure of a non-existent port: - * I feel a bit unsafe about using 0x80 (should be safe, though) - * - * Linus - * + * Provide the necessary definitions for generic iomap. We make use of + * mips_io_port_base for iomap(), but we don't reserve any low addresses for + * use with I/O ports. */ -#define __SLOW_DOWN_IO \ - __asm__ __volatile__( \ - "sb\t$0,0x80(%0)" \ - : : "r" (mips_io_port_base)); +#define HAVE_ARCH_PIO_SIZE +#define PIO_OFFSET mips_io_port_base +#define PIO_MASK IO_SPACE_LIMIT +#define PIO_RESERVED 0x0UL -#ifdef CONF_SLOWDOWN_IO -#ifdef REALLY_SLOW_IO -#define SLOW_DOWN_IO { __SLOW_DOWN_IO; __SLOW_DOWN_IO; __SLOW_DOWN_IO; __SLOW_DOWN_IO; } -#else -#define SLOW_DOWN_IO __SLOW_DOWN_IO -#endif -#else -#define SLOW_DOWN_IO -#endif +/* + * Enforce in-order execution of data I/O. In the MIPS architecture + * these are equivalent to corresponding platform-specific memory + * barriers defined in <asm/barrier.h>. API pinched from PowerPC, + * with sync additionally defined. + */ +#define iobarrier_rw() mb() +#define iobarrier_r() rmb() +#define iobarrier_w() wmb() +#define iobarrier_sync() iob() + +/* Some callers use this older API instead. */ +#define mmiowb() iobarrier_w() /* * virt_to_phys - map virtual addresses to physical @@ -172,11 +171,6 @@ static inline void *isa_bus_to_virt(unsigned long address) extern void __iomem * __ioremap(phys_addr_t offset, phys_addr_t size, unsigned long flags); extern void __iounmap(const volatile void __iomem *addr); -#ifndef CONFIG_PCI -struct pci_dev; -static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {} -#endif - static inline void __iomem * __ioremap_mode(phys_addr_t offset, unsigned long size, unsigned long flags) { @@ -316,13 +310,13 @@ static inline void iounmap(const volatile void __iomem *addr) #undef __IS_KSEG1 } -#if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_LOONGSON3_ENHANCEMENT) +#if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_CPU_LOONGSON3) #define war_io_reorder_wmb() wmb() #else #define war_io_reorder_wmb() barrier() #endif -#define __BUILD_MEMORY_SINGLE(pfx, bwlq, type, irq) \ +#define __BUILD_MEMORY_SINGLE(pfx, bwlq, type, barrier, relax, irq) \ \ static inline void pfx##write##bwlq(type val, \ volatile void __iomem *mem) \ @@ -330,7 +324,10 @@ static inline void pfx##write##bwlq(type val, \ volatile type *__mem; \ type __val; \ \ - war_io_reorder_wmb(); \ + if (barrier) \ + iobarrier_rw(); \ + else \ + war_io_reorder_wmb(); \ \ __mem = (void *)__swizzle_addr_##bwlq((unsigned long)(mem)); \ \ @@ -367,6 +364,9 @@ static inline type pfx##read##bwlq(const volatile void __iomem *mem) \ \ __mem = (void *)__swizzle_addr_##bwlq((unsigned long)(mem)); \ \ + if (barrier) \ + iobarrier_rw(); \ + \ if (sizeof(type) != sizeof(u64) || sizeof(u64) == sizeof(long)) \ __val = *__mem; \ else if (cpu_has_64bits) { \ @@ -390,18 +390,22 @@ static inline type pfx##read##bwlq(const volatile void __iomem *mem) \ } \ \ /* prevent prefetching of coherent DMA data prematurely */ \ - rmb(); \ + if (!relax) \ + rmb(); \ return pfx##ioswab##bwlq(__mem, __val); \ } -#define __BUILD_IOPORT_SINGLE(pfx, bwlq, type, p, slow) \ +#define __BUILD_IOPORT_SINGLE(pfx, bwlq, type, barrier, relax, p) \ \ static inline void pfx##out##bwlq##p(type val, unsigned long port) \ { \ volatile type *__addr; \ type __val; \ \ - war_io_reorder_wmb(); \ + if (barrier) \ + iobarrier_rw(); \ + else \ + war_io_reorder_wmb(); \ \ __addr = (void *)__swizzle_addr_##bwlq(mips_io_port_base + port); \ \ @@ -411,7 +415,6 @@ static inline void pfx##out##bwlq##p(type val, unsigned long port) \ BUILD_BUG_ON(sizeof(type) > sizeof(unsigned long)); \ \ *__addr = __val; \ - slow; \ } \ \ static inline type pfx##in##bwlq##p(unsigned long port) \ @@ -423,23 +426,27 @@ static inline type pfx##in##bwlq##p(unsigned long port) \ \ BUILD_BUG_ON(sizeof(type) > sizeof(unsigned long)); \ \ + if (barrier) \ + iobarrier_rw(); \ + \ __val = *__addr; \ - slow; \ \ /* prevent prefetching of coherent DMA data prematurely */ \ - rmb(); \ + if (!relax) \ + rmb(); \ return pfx##ioswab##bwlq(__addr, __val); \ } -#define __BUILD_MEMORY_PFX(bus, bwlq, type) \ +#define __BUILD_MEMORY_PFX(bus, bwlq, type, relax) \ \ -__BUILD_MEMORY_SINGLE(bus, bwlq, type, 1) +__BUILD_MEMORY_SINGLE(bus, bwlq, type, 1, relax, 1) #define BUILDIO_MEM(bwlq, type) \ \ -__BUILD_MEMORY_PFX(__raw_, bwlq, type) \ -__BUILD_MEMORY_PFX(, bwlq, type) \ -__BUILD_MEMORY_PFX(__mem_, bwlq, type) \ +__BUILD_MEMORY_PFX(__raw_, bwlq, type, 0) \ +__BUILD_MEMORY_PFX(__relaxed_, bwlq, type, 1) \ +__BUILD_MEMORY_PFX(__mem_, bwlq, type, 0) \ +__BUILD_MEMORY_PFX(, bwlq, type, 0) BUILDIO_MEM(b, u8) BUILDIO_MEM(w, u16) @@ -447,8 +454,8 @@ BUILDIO_MEM(l, u32) BUILDIO_MEM(q, u64) #define __BUILD_IOPORT_PFX(bus, bwlq, type) \ - __BUILD_IOPORT_SINGLE(bus, bwlq, type, ,) \ - __BUILD_IOPORT_SINGLE(bus, bwlq, type, _p, SLOW_DOWN_IO) + __BUILD_IOPORT_SINGLE(bus, bwlq, type, 1, 0,) \ + __BUILD_IOPORT_SINGLE(bus, bwlq, type, 1, 0, _p) #define BUILDIO_IOPORT(bwlq, type) \ __BUILD_IOPORT_PFX(, bwlq, type) \ @@ -463,19 +470,19 @@ BUILDIO_IOPORT(q, u64) #define __BUILDIO(bwlq, type) \ \ -__BUILD_MEMORY_SINGLE(____raw_, bwlq, type, 0) +__BUILD_MEMORY_SINGLE(____raw_, bwlq, type, 1, 0, 0) __BUILDIO(q, u64) -#define readb_relaxed readb -#define readw_relaxed readw -#define readl_relaxed readl -#define readq_relaxed readq +#define readb_relaxed __relaxed_readb +#define readw_relaxed __relaxed_readw +#define readl_relaxed __relaxed_readl +#define readq_relaxed __relaxed_readq -#define writeb_relaxed writeb -#define writew_relaxed writew -#define writel_relaxed writel -#define writeq_relaxed writeq +#define writeb_relaxed __relaxed_writeb +#define writew_relaxed __relaxed_writew +#define writel_relaxed __relaxed_writel +#define writeq_relaxed __relaxed_writeq #define readb_be(addr) \ __raw_readb((__force unsigned *)(addr)) @@ -561,14 +568,6 @@ BUILDSTRING(l, u32) BUILDSTRING(q, u64) #endif - -#ifdef CONFIG_CPU_CAVIUM_OCTEON -#define mmiowb() wmb() -#else -/* Depends on MIPS II instruction set */ -#define mmiowb() asm volatile ("sync" ::: "memory") -#endif - static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count) { memset((void __force *) addr, val, count); diff --git a/arch/mips/include/asm/kexec.h b/arch/mips/include/asm/kexec.h index 493a3cc7c39a..40795ca89961 100644 --- a/arch/mips/include/asm/kexec.h +++ b/arch/mips/include/asm/kexec.h @@ -12,11 +12,11 @@ #include <asm/stacktrace.h> /* Maximum physical address we can use pages from */ -#define KEXEC_SOURCE_MEMORY_LIMIT (0x20000000) +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) /* Maximum address we can reach in physical address mode */ -#define KEXEC_DESTINATION_MEMORY_LIMIT (0x20000000) +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) /* Maximum address we can use for the control code buffer */ -#define KEXEC_CONTROL_MEMORY_LIMIT (0x20000000) +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) /* Reserve 3*4096 bytes for board-specific info */ #define KEXEC_CONTROL_PAGE_SIZE (4096 + 3*4096) @@ -39,11 +39,12 @@ extern unsigned long kexec_args[4]; extern int (*_machine_kexec_prepare)(struct kimage *); extern void (*_machine_kexec_shutdown)(void); extern void (*_machine_crash_shutdown)(struct pt_regs *regs); -extern void default_machine_crash_shutdown(struct pt_regs *regs); +void default_machine_crash_shutdown(struct pt_regs *regs); +void kexec_nonboot_cpu_jump(void); +void kexec_reboot(void); #ifdef CONFIG_SMP extern const unsigned char kexec_smp_wait[]; extern unsigned long secondary_kexec_args[4]; -extern void (*relocated_kexec_smp_wait) (void *); extern atomic_t kexec_ready_to_reboot; extern void (*_crash_smp_send_stop)(void); #endif diff --git a/arch/mips/include/asm/mach-loongson64/irq.h b/arch/mips/include/asm/mach-loongson64/irq.h index 3644b68c0ccc..be9f727a9328 100644 --- a/arch/mips/include/asm/mach-loongson64/irq.h +++ b/arch/mips/include/asm/mach-loongson64/irq.h @@ -10,7 +10,7 @@ #define MIPS_CPU_IRQ_BASE 56 #define LOONGSON_UART_IRQ (MIPS_CPU_IRQ_BASE + 2) /* UART */ -#define LOONGSON_HT1_IRQ (MIPS_CPU_IRQ_BASE + 3) /* HT1 */ +#define LOONGSON_BRIDGE_IRQ (MIPS_CPU_IRQ_BASE + 3) /* CASCADE */ #define LOONGSON_TIMER_IRQ (MIPS_CPU_IRQ_BASE + 7) /* CPU Timer */ #define LOONGSON_HT1_CFG_BASE loongson_sysconf.ht_control_base diff --git a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h index 312739117bb0..cbac603ced19 100644 --- a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h +++ b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h @@ -11,6 +11,8 @@ #ifndef __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H #define __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H +#include <asm/cpu.h> + /* * Override macros used in arch/mips/kernel/head.S. */ @@ -26,12 +28,15 @@ mfc0 t0, CP0_PAGEGRAIN or t0, (0x1 << 29) mtc0 t0, CP0_PAGEGRAIN -#ifdef CONFIG_LOONGSON3_ENHANCEMENT /* Enable STFill Buffer */ + mfc0 t0, CP0_PRID + andi t0, (PRID_IMP_MASK | PRID_REV_MASK) + slti t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2) + bnez t0, 1f mfc0 t0, CP0_CONFIG6 or t0, 0x100 mtc0 t0, CP0_CONFIG6 -#endif +1: _ehb .set pop #endif @@ -52,12 +57,15 @@ mfc0 t0, CP0_PAGEGRAIN or t0, (0x1 << 29) mtc0 t0, CP0_PAGEGRAIN -#ifdef CONFIG_LOONGSON3_ENHANCEMENT /* Enable STFill Buffer */ + mfc0 t0, CP0_PRID + andi t0, (PRID_IMP_MASK | PRID_REV_MASK) + slti t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2) + bnez t0, 1f mfc0 t0, CP0_CONFIG6 or t0, 0x100 mtc0 t0, CP0_CONFIG6 -#endif +1: _ehb .set pop #endif diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 01df9ad62fb8..341a02c92985 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -2287,13 +2287,14 @@ do { \ _write_32bit_cp1_register(dest, val, ) #endif -#ifdef HAVE_AS_DSP +#ifdef TOOLCHAIN_SUPPORTS_DSP #define rddsp(mask) \ ({ \ unsigned int __dspctl; \ \ __asm__ __volatile__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " rddsp %0, %x1 \n" \ " .set pop \n" \ @@ -2306,6 +2307,7 @@ do { \ do { \ __asm__ __volatile__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " wrdsp %0, %x1 \n" \ " .set pop \n" \ @@ -2318,6 +2320,7 @@ do { \ long mflo0; \ __asm__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " mflo %0, $ac0 \n" \ " .set pop \n" \ @@ -2330,6 +2333,7 @@ do { \ long mflo1; \ __asm__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " mflo %0, $ac1 \n" \ " .set pop \n" \ @@ -2342,6 +2346,7 @@ do { \ long mflo2; \ __asm__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " mflo %0, $ac2 \n" \ " .set pop \n" \ @@ -2354,6 +2359,7 @@ do { \ long mflo3; \ __asm__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " mflo %0, $ac3 \n" \ " .set pop \n" \ @@ -2366,6 +2372,7 @@ do { \ long mfhi0; \ __asm__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " mfhi %0, $ac0 \n" \ " .set pop \n" \ @@ -2378,6 +2385,7 @@ do { \ long mfhi1; \ __asm__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " mfhi %0, $ac1 \n" \ " .set pop \n" \ @@ -2390,6 +2398,7 @@ do { \ long mfhi2; \ __asm__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " mfhi %0, $ac2 \n" \ " .set pop \n" \ @@ -2402,6 +2411,7 @@ do { \ long mfhi3; \ __asm__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " mfhi %0, $ac3 \n" \ " .set pop \n" \ @@ -2414,6 +2424,7 @@ do { \ ({ \ __asm__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " mtlo %0, $ac0 \n" \ " .set pop \n" \ @@ -2425,6 +2436,7 @@ do { \ ({ \ __asm__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " mtlo %0, $ac1 \n" \ " .set pop \n" \ @@ -2436,6 +2448,7 @@ do { \ ({ \ __asm__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " mtlo %0, $ac2 \n" \ " .set pop \n" \ @@ -2447,6 +2460,7 @@ do { \ ({ \ __asm__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " mtlo %0, $ac3 \n" \ " .set pop \n" \ @@ -2458,6 +2472,7 @@ do { \ ({ \ __asm__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " mthi %0, $ac0 \n" \ " .set pop \n" \ @@ -2469,6 +2484,7 @@ do { \ ({ \ __asm__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " mthi %0, $ac1 \n" \ " .set pop \n" \ @@ -2480,6 +2496,7 @@ do { \ ({ \ __asm__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " mthi %0, $ac2 \n" \ " .set pop \n" \ @@ -2491,6 +2508,7 @@ do { \ ({ \ __asm__( \ " .set push \n" \ + " .set " MIPS_ISA_LEVEL " \n" \ " .set dsp \n" \ " mthi %0, $ac3 \n" \ " .set pop \n" \ diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index 49d6046ca1d0..c373eb605040 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -81,7 +81,7 @@ extern unsigned int vced_count, vcei_count; #endif -#define VDSO_RANDOMIZE_SIZE (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_256M) +#define VDSO_RANDOMIZE_SIZE (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_64M) extern unsigned long mips_stack_top(void); #define STACK_TOP mips_stack_top() diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h index 7f12d7e27c94..d19b2d65336b 100644 --- a/arch/mips/include/asm/r4kcache.h +++ b/arch/mips/include/asm/r4kcache.h @@ -48,58 +48,14 @@ extern void (*r4k_blast_icache)(void); : \ : "i" (op), "R" (*(unsigned char *)(addr))) -#ifdef CONFIG_MIPS_MT - -#define __iflush_prologue \ - unsigned long redundance; \ - extern int mt_n_iflushes; \ - for (redundance = 0; redundance < mt_n_iflushes; redundance++) { - -#define __iflush_epilogue \ - } - -#define __dflush_prologue \ - unsigned long redundance; \ - extern int mt_n_dflushes; \ - for (redundance = 0; redundance < mt_n_dflushes; redundance++) { - -#define __dflush_epilogue \ - } - -#define __inv_dflush_prologue __dflush_prologue -#define __inv_dflush_epilogue __dflush_epilogue -#define __sflush_prologue { -#define __sflush_epilogue } -#define __inv_sflush_prologue __sflush_prologue -#define __inv_sflush_epilogue __sflush_epilogue - -#else /* CONFIG_MIPS_MT */ - -#define __iflush_prologue { -#define __iflush_epilogue } -#define __dflush_prologue { -#define __dflush_epilogue } -#define __inv_dflush_prologue { -#define __inv_dflush_epilogue } -#define __sflush_prologue { -#define __sflush_epilogue } -#define __inv_sflush_prologue { -#define __inv_sflush_epilogue } - -#endif /* CONFIG_MIPS_MT */ - static inline void flush_icache_line_indexed(unsigned long addr) { - __iflush_prologue cache_op(Index_Invalidate_I, addr); - __iflush_epilogue } static inline void flush_dcache_line_indexed(unsigned long addr) { - __dflush_prologue cache_op(Index_Writeback_Inv_D, addr); - __dflush_epilogue } static inline void flush_scache_line_indexed(unsigned long addr) @@ -109,7 +65,6 @@ static inline void flush_scache_line_indexed(unsigned long addr) static inline void flush_icache_line(unsigned long addr) { - __iflush_prologue switch (boot_cpu_type()) { case CPU_LOONGSON2: cache_op(Hit_Invalidate_I_Loongson2, addr); @@ -119,21 +74,16 @@ static inline void flush_icache_line(unsigned long addr) cache_op(Hit_Invalidate_I, addr); break; } - __iflush_epilogue } static inline void flush_dcache_line(unsigned long addr) { - __dflush_prologue cache_op(Hit_Writeback_Inv_D, addr); - __dflush_epilogue } static inline void invalidate_dcache_line(unsigned long addr) { - __dflush_prologue cache_op(Hit_Invalidate_D, addr); - __dflush_epilogue } static inline void invalidate_scache_line(unsigned long addr) @@ -586,13 +536,9 @@ static inline void extra##blast_##pfx##cache##lsize(void) \ current_cpu_data.desc.waybit; \ unsigned long ws, addr; \ \ - __##pfx##flush_prologue \ - \ for (ws = 0; ws < ws_end; ws += ws_inc) \ for (addr = start; addr < end; addr += lsize * 32) \ cache##lsize##_unroll32(addr|ws, indexop); \ - \ - __##pfx##flush_epilogue \ } \ \ static inline void extra##blast_##pfx##cache##lsize##_page(unsigned long page) \ @@ -600,14 +546,10 @@ static inline void extra##blast_##pfx##cache##lsize##_page(unsigned long page) \ unsigned long start = page; \ unsigned long end = page + PAGE_SIZE; \ \ - __##pfx##flush_prologue \ - \ do { \ cache##lsize##_unroll32(start, hitop); \ start += lsize * 32; \ } while (start < end); \ - \ - __##pfx##flush_epilogue \ } \ \ static inline void extra##blast_##pfx##cache##lsize##_page_indexed(unsigned long page) \ @@ -620,13 +562,9 @@ static inline void extra##blast_##pfx##cache##lsize##_page_indexed(unsigned long current_cpu_data.desc.waybit; \ unsigned long ws, addr; \ \ - __##pfx##flush_prologue \ - \ for (ws = 0; ws < ws_end; ws += ws_inc) \ for (addr = start; addr < end; addr += lsize * 32) \ cache##lsize##_unroll32(addr|ws, indexop); \ - \ - __##pfx##flush_epilogue \ } __BUILD_BLAST_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D, 16, ) @@ -656,14 +594,10 @@ static inline void blast_##pfx##cache##lsize##_user_page(unsigned long page) \ unsigned long start = page; \ unsigned long end = page + PAGE_SIZE; \ \ - __##pfx##flush_prologue \ - \ do { \ cache##lsize##_unroll32_user(start, hitop); \ start += lsize * 32; \ } while (start < end); \ - \ - __##pfx##flush_epilogue \ } __BUILD_BLAST_USER_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D, @@ -685,16 +619,12 @@ static inline void prot##extra##blast_##pfx##cache##_range(unsigned long start, unsigned long addr = start & ~(lsize - 1); \ unsigned long aend = (end - 1) & ~(lsize - 1); \ \ - __##pfx##flush_prologue \ - \ while (1) { \ prot##cache_op(hitop, addr); \ if (addr == aend) \ break; \ addr += lsize; \ } \ - \ - __##pfx##flush_epilogue \ } #ifndef CONFIG_EVA @@ -712,8 +642,6 @@ static inline void protected_blast_##pfx##cache##_range(unsigned long start,\ unsigned long addr = start & ~(lsize - 1); \ unsigned long aend = (end - 1) & ~(lsize - 1); \ \ - __##pfx##flush_prologue \ - \ if (!uaccess_kernel()) { \ while (1) { \ protected_cachee_op(hitop, addr); \ @@ -730,7 +658,6 @@ static inline void protected_blast_##pfx##cache##_range(unsigned long start,\ } \ \ } \ - __##pfx##flush_epilogue \ } __BUILD_PROT_BLAST_CACHE_RANGE(d, dcache, Hit_Writeback_Inv_D) diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h index 53b2cb8e5966..b7123f9c0785 100644 --- a/arch/mips/include/asm/smp-ops.h +++ b/arch/mips/include/asm/smp-ops.h @@ -33,6 +33,9 @@ struct plat_smp_ops { int (*cpu_disable)(void); void (*cpu_die)(unsigned int cpu); #endif +#ifdef CONFIG_KEXEC + void (*kexec_nonboot_cpu)(void); +#endif }; extern void register_smp_ops(const struct plat_smp_ops *ops); diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index 056a6bf13491..7990c1c70471 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -91,6 +91,22 @@ static inline void __cpu_die(unsigned int cpu) extern void play_dead(void); #endif +#ifdef CONFIG_KEXEC +static inline void kexec_nonboot_cpu(void) +{ + extern const struct plat_smp_ops *mp_ops; /* private */ + + return mp_ops->kexec_nonboot_cpu(); +} + +static inline void *kexec_nonboot_cpu_func(void) +{ + extern const struct plat_smp_ops *mp_ops; /* private */ + + return mp_ops->kexec_nonboot_cpu; +} +#endif + /* * This function will set up the necessary IPIs for Linux to communicate * with the CPUs in mask. diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index f10e1e15e1c6..210c2802cf4d 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -113,22 +113,4 @@ obj-$(CONFIG_MIPS_CPC) += mips-cpc.o obj-$(CONFIG_CPU_PM) += pm.o obj-$(CONFIG_MIPS_CPS_PM) += pm-cps.o -# -# DSP ASE supported for MIPS32 or MIPS64 Release 2 cores only. It is not -# safe to unconditionnaly use the assembler -mdsp / -mdspr2 switches -# here because the compiler may use DSP ASE instructions (such as lwx) in -# code paths where we cannot check that the CPU we are running on supports it. -# Proper abstraction using HAVE_AS_DSP and macros is done in -# arch/mips/include/asm/mipsregs.h. -# -ifeq ($(CONFIG_CPU_MIPSR2), y) -CFLAGS_DSP = -DHAVE_AS_DSP - -CFLAGS_signal.o = $(CFLAGS_DSP) -CFLAGS_signal32.o = $(CFLAGS_DSP) -CFLAGS_process.o = $(CFLAGS_DSP) -CFLAGS_branch.o = $(CFLAGS_DSP) -CFLAGS_ptrace.o = $(CFLAGS_DSP) -endif - CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS) diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c index d455363d51c3..2c7288041a99 100644 --- a/arch/mips/kernel/crash.c +++ b/arch/mips/kernel/crash.c @@ -36,6 +36,9 @@ static void crash_shutdown_secondary(void *passed_regs) if (!cpu_online(cpu)) return; + /* We won't be sent IPIs any more. */ + set_cpu_online(cpu, false); + local_irq_disable(); if (!cpumask_test_cpu(cpu, &cpus_in_crash)) crash_save_cpu(regs, cpu); @@ -43,7 +46,9 @@ static void crash_shutdown_secondary(void *passed_regs) while (!atomic_read(&kexec_ready_to_reboot)) cpu_relax(); - relocated_kexec_smp_wait(NULL); + + kexec_reboot(); + /* NOTREACHED */ } diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S index d1bb506adc10..351d40fe0859 100644 --- a/arch/mips/kernel/head.S +++ b/arch/mips/kernel/head.S @@ -77,7 +77,7 @@ EXPORT(_stext) */ FEXPORT(__kernel_entry) j kernel_entry -#endif +#endif /* CONFIG_BOOT_RAW */ __REF @@ -94,24 +94,26 @@ NESTED(kernel_entry, 16, sp) # kernel entry point 0: #ifdef CONFIG_USE_OF -#ifdef CONFIG_MIPS_RAW_APPENDED_DTB +#if defined(CONFIG_MIPS_RAW_APPENDED_DTB) || \ + defined(CONFIG_MIPS_ELF_APPENDED_DTB) + PTR_LA t2, __appended_dtb #ifdef CONFIG_CPU_BIG_ENDIAN li t1, 0xd00dfeed -#else +#else /* !CONFIG_CPU_BIG_ENDIAN */ li t1, 0xedfe0dd0 -#endif +#endif /* !CONFIG_CPU_BIG_ENDIAN */ lw t0, (t2) beq t0, t1, dtb_found -#endif +#endif /* CONFIG_MIPS_RAW_APPENDED_DTB || CONFIG_MIPS_ELF_APPENDED_DTB */ li t1, -2 move t2, a1 beq a0, t1, dtb_found li t2, 0 dtb_found: -#endif +#endif /* CONFIG_USE_OF */ PTR_LA t0, __bss_start # clear .bss LONG_S zero, (t0) PTR_LA t1, __bss_stop - LONGSIZE @@ -156,9 +158,9 @@ dtb_found: * newly sync'd icache. */ jr.hb v0 -#else +#else /* !CONFIG_RELOCATABLE */ j start_kernel -#endif +#endif /* !CONFIG_RELOCATABLE */ END(kernel_entry) #ifdef CONFIG_SMP diff --git a/arch/mips/kernel/machine_kexec.c b/arch/mips/kernel/machine_kexec.c index 8b574bcd39ba..93936dce04d6 100644 --- a/arch/mips/kernel/machine_kexec.c +++ b/arch/mips/kernel/machine_kexec.c @@ -9,6 +9,7 @@ #include <linux/kexec.h> #include <linux/mm.h> #include <linux/delay.h> +#include <linux/libfdt.h> #include <asm/cacheflush.h> #include <asm/page.h> @@ -19,15 +20,18 @@ extern const size_t relocate_new_kernel_size; extern unsigned long kexec_start_address; extern unsigned long kexec_indirection_page; -int (*_machine_kexec_prepare)(struct kimage *) = NULL; -void (*_machine_kexec_shutdown)(void) = NULL; -void (*_machine_crash_shutdown)(struct pt_regs *regs) = NULL; +static unsigned long reboot_code_buffer; + #ifdef CONFIG_SMP -void (*relocated_kexec_smp_wait) (void *); +static void (*relocated_kexec_smp_wait)(void *); + atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0); void (*_crash_smp_send_stop)(void) = NULL; #endif +void (*_machine_kexec_shutdown)(void) = NULL; +void (*_machine_crash_shutdown)(struct pt_regs *regs) = NULL; + static void kexec_image_info(const struct kimage *kimage) { unsigned long i; @@ -48,13 +52,59 @@ static void kexec_image_info(const struct kimage *kimage) } } +#ifdef CONFIG_UHI_BOOT + +static int uhi_machine_kexec_prepare(struct kimage *kimage) +{ + int i; + + /* + * In case DTB file is not passed to the new kernel, a flat device + * tree will be created by kexec tool. It holds modified command + * line for the new kernel. + */ + for (i = 0; i < kimage->nr_segments; i++) { + struct fdt_header fdt; + + if (kimage->segment[i].memsz <= sizeof(fdt)) + continue; + + if (copy_from_user(&fdt, kimage->segment[i].buf, sizeof(fdt))) + continue; + + if (fdt_check_header(&fdt)) + continue; + + kexec_args[0] = -2; + kexec_args[1] = (unsigned long) + phys_to_virt((unsigned long)kimage->segment[i].mem); + break; + } + + return 0; +} + +int (*_machine_kexec_prepare)(struct kimage *) = uhi_machine_kexec_prepare; + +#else + +int (*_machine_kexec_prepare)(struct kimage *) = NULL; + +#endif /* CONFIG_UHI_BOOT */ + int machine_kexec_prepare(struct kimage *kimage) { +#ifdef CONFIG_SMP + if (!kexec_nonboot_cpu_func()) + return -EINVAL; +#endif + kexec_image_info(kimage); if (_machine_kexec_prepare) return _machine_kexec_prepare(kimage); + return 0; } @@ -63,11 +113,41 @@ machine_kexec_cleanup(struct kimage *kimage) { } +#ifdef CONFIG_SMP +static void kexec_shutdown_secondary(void *param) +{ + int cpu = smp_processor_id(); + + if (!cpu_online(cpu)) + return; + + /* We won't be sent IPIs any more. */ + set_cpu_online(cpu, false); + + local_irq_disable(); + while (!atomic_read(&kexec_ready_to_reboot)) + cpu_relax(); + + kexec_reboot(); + + /* NOTREACHED */ +} +#endif + void machine_shutdown(void) { if (_machine_kexec_shutdown) _machine_kexec_shutdown(); + +#ifdef CONFIG_SMP + smp_call_function(kexec_shutdown_secondary, NULL, 0); + + while (num_online_cpus() > 1) { + cpu_relax(); + mdelay(1); + } +#endif } void @@ -79,12 +159,57 @@ machine_crash_shutdown(struct pt_regs *regs) default_machine_crash_shutdown(regs); } -typedef void (*noretfun_t)(void) __noreturn; +#ifdef CONFIG_SMP +void kexec_nonboot_cpu_jump(void) +{ + local_flush_icache_range((unsigned long)relocated_kexec_smp_wait, + reboot_code_buffer + relocate_new_kernel_size); + + relocated_kexec_smp_wait(NULL); +} +#endif + +void kexec_reboot(void) +{ + void (*do_kexec)(void) __noreturn; + + /* + * We know we were online, and there will be no incoming IPIs at + * this point. Mark online again before rebooting so that the crash + * analysis tool will see us correctly. + */ + set_cpu_online(smp_processor_id(), true); + + /* Ensure remote CPUs observe that we're online before rebooting. */ + smp_mb__after_atomic(); + +#ifdef CONFIG_SMP + if (smp_processor_id() > 0) { + /* + * Instead of cpu_relax() or wait, this is needed for kexec + * smp reboot. Kdump usually doesn't require an smp new + * kernel, but kexec may do. + */ + kexec_nonboot_cpu(); + + /* NOTREACHED */ + } +#endif + + /* + * Make sure we get correct instructions written by the + * machine_kexec() CPU. + */ + local_flush_icache_range(reboot_code_buffer, + reboot_code_buffer + relocate_new_kernel_size); + + do_kexec = (void *)reboot_code_buffer; + do_kexec(); +} void machine_kexec(struct kimage *image) { - unsigned long reboot_code_buffer; unsigned long entry; unsigned long *ptr; @@ -118,6 +243,9 @@ machine_kexec(struct kimage *image) *ptr = (unsigned long) phys_to_virt(*ptr); } + /* Mark offline BEFORE disabling local irq. */ + set_cpu_online(smp_processor_id(), false); + /* * we do not want to be bothered. */ @@ -125,6 +253,7 @@ machine_kexec(struct kimage *image) printk("Will call new kernel at %08lx\n", image->start); printk("Bye ...\n"); + /* Make reboot code buffer available to the boot CPU. */ __flush_cache_all(); #ifdef CONFIG_SMP /* All secondary cpus now may jump to kexec_wait cycle */ @@ -133,5 +262,5 @@ machine_kexec(struct kimage *image) smp_wmb(); atomic_set(&kexec_ready_to_reboot, 1); #endif - ((noretfun_t) reboot_code_buffer)(); + kexec_reboot(); } diff --git a/arch/mips/kernel/mips-mt.c b/arch/mips/kernel/mips-mt.c index efaa2527657d..9f85b98d24ac 100644 --- a/arch/mips/kernel/mips-mt.c +++ b/arch/mips/kernel/mips-mt.c @@ -154,40 +154,6 @@ static int __init config7_set(char *str) } __setup("config7=", config7_set); -/* Experimental cache flush control parameters that should go away some day */ -int mt_protiflush; -int mt_protdflush; -int mt_n_iflushes = 1; -int mt_n_dflushes = 1; - -static int __init set_protiflush(char *s) -{ - mt_protiflush = 1; - return 1; -} -__setup("protiflush", set_protiflush); - -static int __init set_protdflush(char *s) -{ - mt_protdflush = 1; - return 1; -} -__setup("protdflush", set_protdflush); - -static int __init niflush(char *s) -{ - get_option(&s, &mt_n_iflushes); - return 1; -} -__setup("niflush=", niflush); - -static int __init ndflush(char *s) -{ - get_option(&s, &mt_n_dflushes); - return 1; -} -__setup("ndflush=", ndflush); - static unsigned int itc_base; static int __init set_itc_base(char *str) @@ -232,16 +198,6 @@ void mips_mt_set_cpuoptions(void) printk("Config7: 0x%08x\n", read_c0_config7()); } - /* Report Cache management debug options */ - if (mt_protiflush) - printk("I-cache flushes single-threaded\n"); - if (mt_protdflush) - printk("D-cache flushes single-threaded\n"); - if (mt_n_iflushes != 1) - printk("I-Cache Flushes Repeated %d times\n", mt_n_iflushes); - if (mt_n_dflushes != 1) - printk("D-Cache Flushes Repeated %d times\n", mt_n_dflushes); - if (itc_base != 0) { /* * Configure ITC mapping. This code is very @@ -283,21 +239,6 @@ void mips_mt_set_cpuoptions(void) } } -/* - * Function to protect cache flushes from concurrent execution - * depends on MP software model chosen. - */ - -void mt_cflush_lockdown(void) -{ - /* FILL IN VSMP and AP/SP VERSIONS HERE */ -} - -void mt_cflush_release(void) -{ - /* FILL IN VSMP and AP/SP VERSIONS HERE */ -} - struct class *mt_class; static int __init mt_init(void) diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c index cbf4cc0b0b6c..3d80a51256de 100644 --- a/arch/mips/kernel/relocate.c +++ b/arch/mips/kernel/relocate.c @@ -146,7 +146,7 @@ int __init do_relocations(void *kbase_old, void *kbase_new, long offset) break; type = (*r >> 24) & 0xff; - loc_orig = (void *)(kbase_old + ((*r & 0x00ffffff) << 2)); + loc_orig = kbase_old + ((*r & 0x00ffffff) << 2); loc_new = RELOCATED(loc_orig); if (reloc_handlers_rel[type] == NULL) { diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index e64b9e8bb002..01a5ff4c41ff 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -333,7 +333,7 @@ static void __init finalize_initrd(void) maybe_bswap_initrd(); - reserve_bootmem(__pa(initrd_start), size, BOOTMEM_DEFAULT); + memblock_reserve(__pa(initrd_start), size); initrd_below_start_ok = 1; pr_info("Initial ramdisk at: 0x%lx (%lu bytes)\n", @@ -370,20 +370,10 @@ static void __init bootmem_init(void) #else /* !CONFIG_SGI_IP27 */ -static unsigned long __init bootmap_bytes(unsigned long pages) -{ - unsigned long bytes = DIV_ROUND_UP(pages, 8); - - return ALIGN(bytes, sizeof(long)); -} - static void __init bootmem_init(void) { unsigned long reserved_end; - unsigned long mapstart = ~0UL; - unsigned long bootmap_size; phys_addr_t ramstart = PHYS_ADDR_MAX; - bool bootmap_valid = false; int i; /* @@ -395,6 +385,8 @@ static void __init bootmem_init(void) init_initrd(); reserved_end = (unsigned long) PFN_UP(__pa_symbol(&_end)); + memblock_reserve(PHYS_OFFSET, reserved_end << PAGE_SHIFT); + /* * max_low_pfn is not a number of pages. The number of pages * of the system is given by 'max_low_pfn - min_low_pfn'. @@ -442,9 +434,6 @@ static void __init bootmem_init(void) if (initrd_end && end <= (unsigned long)PFN_UP(__pa(initrd_end))) continue; #endif - if (start >= mapstart) - continue; - mapstart = max(reserved_end, start); } if (min_low_pfn >= max_low_pfn) @@ -456,9 +445,11 @@ static void __init bootmem_init(void) /* * Reserve any memory between the start of RAM and PHYS_OFFSET */ - if (ramstart > PHYS_OFFSET) + if (ramstart > PHYS_OFFSET) { add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET, BOOT_MEM_RESERVED); + memblock_reserve(PHYS_OFFSET, ramstart - PHYS_OFFSET); + } if (min_low_pfn > ARCH_PFN_OFFSET) { pr_info("Wasting %lu bytes for tracking %lu unused pages\n", @@ -483,52 +474,6 @@ static void __init bootmem_init(void) max_low_pfn = PFN_DOWN(HIGHMEM_START); } -#ifdef CONFIG_BLK_DEV_INITRD - /* - * mapstart should be after initrd_end - */ - if (initrd_end) - mapstart = max(mapstart, (unsigned long)PFN_UP(__pa(initrd_end))); -#endif - - /* - * check that mapstart doesn't overlap with any of - * memory regions that have been reserved through eg. DTB - */ - bootmap_size = bootmap_bytes(max_low_pfn - min_low_pfn); - - bootmap_valid = memory_region_available(PFN_PHYS(mapstart), - bootmap_size); - for (i = 0; i < boot_mem_map.nr_map && !bootmap_valid; i++) { - unsigned long mapstart_addr; - - switch (boot_mem_map.map[i].type) { - case BOOT_MEM_RESERVED: - mapstart_addr = PFN_ALIGN(boot_mem_map.map[i].addr + - boot_mem_map.map[i].size); - if (PHYS_PFN(mapstart_addr) < mapstart) - break; - - bootmap_valid = memory_region_available(mapstart_addr, - bootmap_size); - if (bootmap_valid) - mapstart = PHYS_PFN(mapstart_addr); - break; - default: - break; - } - } - - if (!bootmap_valid) - panic("No memory area to place a bootmap bitmap"); - - /* - * Initialize the boot-time allocator with low memory only. - */ - if (bootmap_size != init_bootmem_node(NODE_DATA(0), mapstart, - min_low_pfn, max_low_pfn)) - panic("Unexpected memory size required for bootmap"); - for (i = 0; i < boot_mem_map.nr_map; i++) { unsigned long start, end; @@ -577,9 +522,9 @@ static void __init bootmem_init(void) default: /* Not usable memory */ if (start > min_low_pfn && end < max_low_pfn) - reserve_bootmem(boot_mem_map.map[i].addr, - boot_mem_map.map[i].size, - BOOTMEM_DEFAULT); + memblock_reserve(boot_mem_map.map[i].addr, + boot_mem_map.map[i].size); + continue; } @@ -602,15 +547,9 @@ static void __init bootmem_init(void) size = end - start; /* Register lowmem ranges */ - free_bootmem(PFN_PHYS(start), size << PAGE_SHIFT); memory_present(0, start, end); } - /* - * Reserve the bootmap memory. - */ - reserve_bootmem(PFN_PHYS(mapstart), bootmap_size, BOOTMEM_DEFAULT); - #ifdef CONFIG_RELOCATABLE /* * The kernel reserves all memory below its _end symbol as bootmem, @@ -642,29 +581,6 @@ static void __init bootmem_init(void) #endif /* CONFIG_SGI_IP27 */ -/* - * arch_mem_init - initialize memory management subsystem - * - * o plat_mem_setup() detects the memory configuration and will record detected - * memory areas using add_memory_region. - * - * At this stage the memory configuration of the system is known to the - * kernel but generic memory management system is still entirely uninitialized. - * - * o bootmem_init() - * o sparse_init() - * o paging_init() - * o dma_contiguous_reserve() - * - * At this stage the bootmem allocator is ready to use. - * - * NOTE: historically plat_mem_setup did the entire platform initialization. - * This was rather impractical because it meant plat_mem_setup had to - * get away without any kind of memory allocator. To keep old code from - * breaking plat_setup was just renamed to plat_mem_setup and a second platform - * initialization hook for anything else was introduced. - */ - static int usermem __initdata; static int __init early_parse_mem(char *p) @@ -841,6 +757,28 @@ static void __init request_crashkernel(struct resource *res) #define BUILTIN_EXTEND_WITH_PROM \ IS_ENABLED(CONFIG_MIPS_CMDLINE_BUILTIN_EXTEND) +/* + * arch_mem_init - initialize memory management subsystem + * + * o plat_mem_setup() detects the memory configuration and will record detected + * memory areas using add_memory_region. + * + * At this stage the memory configuration of the system is known to the + * kernel but generic memory management system is still entirely uninitialized. + * + * o bootmem_init() + * o sparse_init() + * o paging_init() + * o dma_contiguous_reserve() + * + * At this stage the bootmem allocator is ready to use. + * + * NOTE: historically plat_mem_setup did the entire platform initialization. + * This was rather impractical because it meant plat_mem_setup had to + * get away without any kind of memory allocator. To keep old code from + * breaking plat_setup was just renamed to plat_mem_setup and a second platform + * initialization hook for anything else was introduced. + */ static void __init arch_mem_init(char **cmdline_p) { struct memblock_region *reg; @@ -916,21 +854,29 @@ static void __init arch_mem_init(char **cmdline_p) early_init_fdt_scan_reserved_mem(); bootmem_init(); + + /* + * Prevent memblock from allocating high memory. + * This cannot be done before max_low_pfn is detected, so up + * to this point is possible to only reserve physical memory + * with memblock_reserve; memblock_virt_alloc* can be used + * only after this point + */ + memblock_set_current_limit(PFN_PHYS(max_low_pfn)); + #ifdef CONFIG_PROC_VMCORE if (setup_elfcorehdr && setup_elfcorehdr_size) { printk(KERN_INFO "kdump reserved memory at %lx-%lx\n", setup_elfcorehdr, setup_elfcorehdr_size); - reserve_bootmem(setup_elfcorehdr, setup_elfcorehdr_size, - BOOTMEM_DEFAULT); + memblock_reserve(setup_elfcorehdr, setup_elfcorehdr_size); } #endif mips_parse_crashkernel(); #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) - reserve_bootmem(crashk_res.start, - crashk_res.end - crashk_res.start + 1, - BOOTMEM_DEFAULT); + memblock_reserve(crashk_res.start, + crashk_res.end - crashk_res.start + 1); #endif device_tree_init(); sparse_init(); @@ -940,7 +886,7 @@ static void __init arch_mem_init(char **cmdline_p) /* Tell bootmem about cma reserved memblock section */ for_each_memblock(reserved, reg) if (reg->size != 0) - reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); + memblock_reserve(reg->base, reg->size); reserve_bootmem_region(__pa_symbol(&__nosave_begin), __pa_symbol(&__nosave_end)); /* Reserve for hibernation */ diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 159e83add4bb..76fae9b79f13 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -25,6 +25,7 @@ #include <linux/linkage.h> #include <linux/bug.h> #include <linux/kernel.h> +#include <linux/kexec.h> #include <asm/time.h> #include <asm/pgtable.h> @@ -423,6 +424,9 @@ const struct plat_smp_ops bmips43xx_smp_ops = { .cpu_disable = bmips_cpu_disable, .cpu_die = bmips_cpu_die, #endif +#ifdef CONFIG_KEXEC + .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, +#endif }; const struct plat_smp_ops bmips5000_smp_ops = { @@ -437,6 +441,9 @@ const struct plat_smp_ops bmips5000_smp_ops = { .cpu_disable = bmips_cpu_disable, .cpu_die = bmips_cpu_die, #endif +#ifdef CONFIG_KEXEC + .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, +#endif }; #endif /* CONFIG_SMP */ diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index 03f1026ad148..faccfa4b280b 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -398,6 +398,55 @@ static void cps_smp_finish(void) local_irq_enable(); } +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC) + +enum cpu_death { + CPU_DEATH_HALT, + CPU_DEATH_POWER, +}; + +static void cps_shutdown_this_cpu(enum cpu_death death) +{ + unsigned int cpu, core, vpe_id; + + cpu = smp_processor_id(); + core = cpu_core(&cpu_data[cpu]); + + if (death == CPU_DEATH_HALT) { + vpe_id = cpu_vpe_id(&cpu_data[cpu]); + + pr_debug("Halting core %d VP%d\n", core, vpe_id); + if (cpu_has_mipsmt) { + /* Halt this TC */ + write_c0_tchalt(TCHALT_H); + instruction_hazard(); + } else if (cpu_has_vp) { + write_cpc_cl_vp_stop(1 << vpe_id); + + /* Ensure that the VP_STOP register is written */ + wmb(); + } + } else { + pr_debug("Gating power to core %d\n", core); + /* Power down the core */ + cps_pm_enter_state(CPS_PM_POWER_GATED); + } +} + +#ifdef CONFIG_KEXEC + +static void cps_kexec_nonboot_cpu(void) +{ + if (cpu_has_mipsmt || cpu_has_vp) + cps_shutdown_this_cpu(CPU_DEATH_HALT); + else + cps_shutdown_this_cpu(CPU_DEATH_POWER); +} + +#endif /* CONFIG_KEXEC */ + +#endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC */ + #ifdef CONFIG_HOTPLUG_CPU static int cps_cpu_disable(void) @@ -421,19 +470,15 @@ static int cps_cpu_disable(void) } static unsigned cpu_death_sibling; -static enum { - CPU_DEATH_HALT, - CPU_DEATH_POWER, -} cpu_death; +static enum cpu_death cpu_death; void play_dead(void) { - unsigned int cpu, core, vpe_id; + unsigned int cpu; local_irq_disable(); idle_task_exit(); cpu = smp_processor_id(); - core = cpu_core(&cpu_data[cpu]); cpu_death = CPU_DEATH_POWER; pr_debug("CPU%d going offline\n", cpu); @@ -456,25 +501,7 @@ void play_dead(void) /* This CPU has chosen its way out */ (void)cpu_report_death(); - if (cpu_death == CPU_DEATH_HALT) { - vpe_id = cpu_vpe_id(&cpu_data[cpu]); - - pr_debug("Halting core %d VP%d\n", core, vpe_id); - if (cpu_has_mipsmt) { - /* Halt this TC */ - write_c0_tchalt(TCHALT_H); - instruction_hazard(); - } else if (cpu_has_vp) { - write_cpc_cl_vp_stop(1 << vpe_id); - - /* Ensure that the VP_STOP register is written */ - wmb(); - } - } else { - pr_debug("Gating power to core %d\n", core); - /* Power down the core */ - cps_pm_enter_state(CPS_PM_POWER_GATED); - } + cps_shutdown_this_cpu(cpu_death); /* This should never be reached */ panic("Failed to offline CPU %u", cpu); @@ -593,6 +620,9 @@ static const struct plat_smp_ops cps_smp_ops = { .cpu_disable = cps_cpu_disable, .cpu_die = cps_cpu_die, #endif +#ifdef CONFIG_KEXEC + .kexec_nonboot_cpu = cps_kexec_nonboot_cpu, +#endif }; bool mips_cps_smp_in_use(void) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 9dab0ed1b227..5feef28deac8 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -29,6 +29,7 @@ #include <linux/spinlock.h> #include <linux/kallsyms.h> #include <linux/bootmem.h> +#include <linux/memblock.h> #include <linux/interrupt.h> #include <linux/ptrace.h> #include <linux/kgdb.h> @@ -348,7 +349,7 @@ static void __show_regs(const struct pt_regs *regs) */ void show_regs(struct pt_regs *regs) { - __show_regs((struct pt_regs *)regs); + __show_regs(regs); dump_stack(); } @@ -2260,8 +2261,10 @@ void __init trap_init(void) unsigned long size = 0x200 + VECTORSPACING*64; phys_addr_t ebase_pa; + memblock_set_bottom_up(true); ebase = (unsigned long) __alloc_bootmem(size, 1 << fls(size), 0); + memblock_set_bottom_up(false); /* * Try to ensure ebase resides in KSeg0 if possible. diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index 2d0b912f9e3e..ce446eed62d2 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -130,7 +130,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#ifndef CONFIG_CPU_MIPSR6 +#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR #define _LoadW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -151,8 +151,8 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#else -/* MIPSR6 has no lwl instruction */ +#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +/* For CPUs without lwl instruction */ #define _LoadW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -186,7 +186,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#endif /* CONFIG_CPU_MIPSR6 */ +#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ #define _LoadHWU(addr, value, res, type) \ do { \ @@ -212,7 +212,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#ifndef CONFIG_CPU_MIPSR6 +#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR #define _LoadWU(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -255,8 +255,8 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#else -/* MIPSR6 has not lwl and ldl instructions */ +#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +/* For CPUs without lwl and ldl instructions */ #define _LoadWU(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -339,7 +339,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#endif /* CONFIG_CPU_MIPSR6 */ +#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ #define _StoreHW(addr, value, res, type) \ @@ -365,7 +365,7 @@ do { \ : "r" (value), "r" (addr), "i" (-EFAULT));\ } while(0) -#ifndef CONFIG_CPU_MIPSR6 +#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR #define _StoreW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -406,8 +406,7 @@ do { \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ } while(0) -#else -/* MIPSR6 has no swl and sdl instructions */ +#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ #define _StoreW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -483,7 +482,7 @@ do { \ : "memory"); \ } while(0) -#endif /* CONFIG_CPU_MIPSR6 */ +#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ #else /* __BIG_ENDIAN */ @@ -509,7 +508,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#ifndef CONFIG_CPU_MIPSR6 +#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR #define _LoadW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -530,8 +529,8 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#else -/* MIPSR6 has no lwl instruction */ +#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +/* For CPUs without lwl instruction */ #define _LoadW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -565,7 +564,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#endif /* CONFIG_CPU_MIPSR6 */ +#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ #define _LoadHWU(addr, value, res, type) \ @@ -592,7 +591,7 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#ifndef CONFIG_CPU_MIPSR6 +#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR #define _LoadWU(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -635,8 +634,8 @@ do { \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#else -/* MIPSR6 has not lwl and ldl instructions */ +#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +/* For CPUs without lwl and ldl instructions */ #define _LoadWU(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -718,7 +717,7 @@ do { \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ } while(0) -#endif /* CONFIG_CPU_MIPSR6 */ +#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ #define _StoreHW(addr, value, res, type) \ do { \ @@ -743,7 +742,7 @@ do { \ : "r" (value), "r" (addr), "i" (-EFAULT));\ } while(0) -#ifndef CONFIG_CPU_MIPSR6 +#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR #define _StoreW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -784,8 +783,8 @@ do { \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ } while(0) -#else -/* MIPSR6 has no swl and sdl instructions */ +#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ +/* For CPUs without swl and sdl instructions */ #define _StoreW(addr, value, res, type) \ do { \ __asm__ __volatile__ ( \ @@ -861,7 +860,7 @@ do { \ : "memory"); \ } while(0) -#endif /* CONFIG_CPU_MIPSR6 */ +#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ #endif #define LoadHWU(addr, value, res) _LoadHWU(addr, value, res, kernel) diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index 6537e022ef62..479f50559c83 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -7,7 +7,7 @@ lib-y += bitops.o csum_partial.o delay.o memcpy.o memset.o \ mips-atomic.o strncpy_user.o \ strnlen_user.o uncached.o -obj-y += iomap.o iomap_copy.o +obj-y += iomap_copy.o obj-$(CONFIG_PCI) += iomap-pci.o lib-$(CONFIG_GENERIC_CSUM) := $(filter-out csum_partial.o, $(lib-y)) diff --git a/arch/mips/lib/iomap-pci.c b/arch/mips/lib/iomap-pci.c index 4850509c5534..210f5a95ecb1 100644 --- a/arch/mips/lib/iomap-pci.c +++ b/arch/mips/lib/iomap-pci.c @@ -44,10 +44,3 @@ void __iomem *__pci_ioport_map(struct pci_dev *dev, } #endif /* CONFIG_PCI_DRIVERS_LEGACY */ - -void pci_iounmap(struct pci_dev *dev, void __iomem * addr) -{ - iounmap(addr); -} - -EXPORT_SYMBOL(pci_iounmap); diff --git a/arch/mips/lib/iomap.c b/arch/mips/lib/iomap.c deleted file mode 100644 index 9b31653f318c..000000000000 --- a/arch/mips/lib/iomap.c +++ /dev/null @@ -1,227 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Implement the default iomap interfaces - * - * (C) Copyright 2004 Linus Torvalds - * (C) Copyright 2006 Ralf Baechle <ralf@linux-mips.org> - * (C) Copyright 2007 MIPS Technologies, Inc. - * written by Ralf Baechle <ralf@linux-mips.org> - */ -#include <linux/export.h> -#include <asm/io.h> - -/* - * Read/write from/to an (offsettable) iomem cookie. It might be a PIO - * access or a MMIO access, these functions don't care. The info is - * encoded in the hardware mapping set up by the mapping functions - * (or the cookie itself, depending on implementation and hw). - * - * The generic routines don't assume any hardware mappings, and just - * encode the PIO/MMIO as part of the cookie. They coldly assume that - * the MMIO IO mappings are not in the low address range. - * - * Architectures for which this is not true can't use this generic - * implementation and should do their own copy. - */ - -#define PIO_MASK 0x0ffffUL - -unsigned int ioread8(void __iomem *addr) -{ - return readb(addr); -} - -EXPORT_SYMBOL(ioread8); - -unsigned int ioread16(void __iomem *addr) -{ - return readw(addr); -} - -EXPORT_SYMBOL(ioread16); - -unsigned int ioread16be(void __iomem *addr) -{ - return be16_to_cpu(__raw_readw(addr)); -} - -EXPORT_SYMBOL(ioread16be); - -unsigned int ioread32(void __iomem *addr) -{ - return readl(addr); -} - -EXPORT_SYMBOL(ioread32); - -unsigned int ioread32be(void __iomem *addr) -{ - return be32_to_cpu(__raw_readl(addr)); -} - -EXPORT_SYMBOL(ioread32be); - -void iowrite8(u8 val, void __iomem *addr) -{ - writeb(val, addr); -} - -EXPORT_SYMBOL(iowrite8); - -void iowrite16(u16 val, void __iomem *addr) -{ - writew(val, addr); -} - -EXPORT_SYMBOL(iowrite16); - -void iowrite16be(u16 val, void __iomem *addr) -{ - __raw_writew(cpu_to_be16(val), addr); -} - -EXPORT_SYMBOL(iowrite16be); - -void iowrite32(u32 val, void __iomem *addr) -{ - writel(val, addr); -} - -EXPORT_SYMBOL(iowrite32); - -void iowrite32be(u32 val, void __iomem *addr) -{ - __raw_writel(cpu_to_be32(val), addr); -} - -EXPORT_SYMBOL(iowrite32be); - -/* - * These are the "repeat MMIO read/write" functions. - * Note the "__mem" accesses, since we want to convert - * to CPU byte order if the host bus happens to not match the - * endianness of PCI/ISA (see mach-generic/mangle-port.h). - */ -static inline void mmio_insb(void __iomem *addr, u8 *dst, int count) -{ - while (--count >= 0) { - u8 data = __mem_readb(addr); - *dst = data; - dst++; - } -} - -static inline void mmio_insw(void __iomem *addr, u16 *dst, int count) -{ - while (--count >= 0) { - u16 data = __mem_readw(addr); - *dst = data; - dst++; - } -} - -static inline void mmio_insl(void __iomem *addr, u32 *dst, int count) -{ - while (--count >= 0) { - u32 data = __mem_readl(addr); - *dst = data; - dst++; - } -} - -static inline void mmio_outsb(void __iomem *addr, const u8 *src, int count) -{ - while (--count >= 0) { - __mem_writeb(*src, addr); - src++; - } -} - -static inline void mmio_outsw(void __iomem *addr, const u16 *src, int count) -{ - while (--count >= 0) { - __mem_writew(*src, addr); - src++; - } -} - -static inline void mmio_outsl(void __iomem *addr, const u32 *src, int count) -{ - while (--count >= 0) { - __mem_writel(*src, addr); - src++; - } -} - -void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) -{ - mmio_insb(addr, dst, count); -} - -EXPORT_SYMBOL(ioread8_rep); - -void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) -{ - mmio_insw(addr, dst, count); -} - -EXPORT_SYMBOL(ioread16_rep); - -void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) -{ - mmio_insl(addr, dst, count); -} - -EXPORT_SYMBOL(ioread32_rep); - -void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count) -{ - mmio_outsb(addr, src, count); -} - -EXPORT_SYMBOL(iowrite8_rep); - -void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count) -{ - mmio_outsw(addr, src, count); -} - -EXPORT_SYMBOL(iowrite16_rep); - -void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count) -{ - mmio_outsl(addr, src, count); -} - -EXPORT_SYMBOL(iowrite32_rep); - -/* - * Create a virtual mapping cookie for an IO port range - * - * This uses the same mapping are as the in/out family which has to be setup - * by the platform initialization code. - * - * Just to make matters somewhat more interesting on MIPS systems with - * multiple host bridge each will have it's own ioport address space. - */ -static void __iomem *ioport_map_legacy(unsigned long port, unsigned int nr) -{ - return (void __iomem *) (mips_io_port_base + port); -} - -void __iomem *ioport_map(unsigned long port, unsigned int nr) -{ - if (port > PIO_MASK) - return NULL; - - return ioport_map_legacy(port, nr); -} - -EXPORT_SYMBOL(ioport_map); - -void ioport_unmap(void __iomem *addr) -{ - /* Nothing to do */ -} - -EXPORT_SYMBOL(ioport_unmap); diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index 03e3304d6ae5..cdd19d8561e8 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S @@ -204,9 +204,10 @@ #define LOADB(reg, addr, handler) EXC(lb, LD_INSN, reg, addr, handler) #define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler) -#define _PREF(hint, addr, type) \ +#ifdef CONFIG_CPU_HAS_PREFETCH +# define _PREF(hint, addr, type) \ .if \mode == LEGACY_MODE; \ - PREF(hint, addr); \ + kernel_pref(hint, addr); \ .else; \ .if ((\from == USEROP) && (type == SRC_PREFETCH)) || \ ((\to == USEROP) && (type == DST_PREFETCH)); \ @@ -218,12 +219,15 @@ * used later on. Therefore use $v1. \ */ \ .set at=v1; \ - PREFE(hint, addr); \ + user_pref(hint, addr); \ .set noat; \ .else; \ - PREF(hint, addr); \ + kernel_pref(hint, addr); \ .endif; \ .endif +#else +# define _PREF(hint, addr, type) +#endif #define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH) #define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH) @@ -297,7 +301,7 @@ and t0, src, ADDRMASK PREFS( 0, 2*32(src) ) PREFD( 1, 2*32(dst) ) -#ifndef CONFIG_CPU_MIPSR6 +#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR bnez t1, .Ldst_unaligned\@ nop bnez t0, .Lsrc_unaligned_dst_aligned\@ @@ -385,7 +389,7 @@ bne rem, len, 1b .set noreorder -#ifndef CONFIG_CPU_MIPSR6 +#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR /* * src and dst are aligned, need to copy rem bytes (rem < NBYTES) * A loop would do only a byte at a time with possible branch @@ -487,7 +491,7 @@ bne len, rem, 1b .set noreorder -#endif /* !CONFIG_CPU_MIPSR6 */ +#endif /* CONFIG_CPU_HAS_LOAD_STORE_LR */ .Lcopy_bytes_checklen\@: beqz len, .Ldone\@ nop @@ -516,7 +520,7 @@ jr ra nop -#ifdef CONFIG_CPU_MIPSR6 +#ifndef CONFIG_CPU_HAS_LOAD_STORE_LR .Lcopy_unaligned_bytes\@: 1: COPY_BYTE(0) @@ -530,7 +534,7 @@ ADD src, src, 8 b 1b ADD dst, dst, 8 -#endif /* CONFIG_CPU_MIPSR6 */ +#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ .if __memcpy == 1 END(memcpy) .set __memcpy, 0 diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 069acec3df9f..418611ef13cf 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -78,7 +78,6 @@ #endif .endm - .set noreorder .align 5 /* @@ -94,13 +93,16 @@ .endif sltiu t0, a2, STORSIZE /* very small region? */ + .set noreorder bnez t0, .Lsmall_memset\@ andi t0, a0, STORMASK /* aligned? */ + .set reorder #ifdef CONFIG_CPU_MICROMIPS move t8, a1 /* used by 'swp' instruction */ move t9, a1 #endif + .set noreorder #ifndef CONFIG_CPU_DADDI_WORKAROUNDS beqz t0, 1f PTR_SUBU t0, STORSIZE /* alignment in bytes */ @@ -111,8 +113,9 @@ PTR_SUBU t0, AT /* alignment in bytes */ .set at #endif + .set reorder -#ifndef CONFIG_CPU_MIPSR6 +#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR R10KCBARRIER(0(ra)) #ifdef __MIPSEB__ EX(LONG_S_L, a1, (a0), .Lfirst_fixup\@) /* make word/dword aligned */ @@ -122,11 +125,13 @@ PTR_SUBU a0, t0 /* long align ptr */ PTR_ADDU a2, t0 /* correct size */ -#else /* CONFIG_CPU_MIPSR6 */ +#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ #define STORE_BYTE(N) \ EX(sb, a1, N(a0), .Lbyte_fixup\@); \ + .set noreorder; \ beqz t0, 0f; \ - PTR_ADDU t0, 1; + PTR_ADDU t0, 1; \ + .set reorder; PTR_ADDU a2, t0 /* correct size */ PTR_ADDU t0, 1 @@ -145,19 +150,17 @@ ori a0, STORMASK xori a0, STORMASK PTR_ADDIU a0, STORSIZE -#endif /* CONFIG_CPU_MIPSR6 */ +#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ 1: ori t1, a2, 0x3f /* # of full blocks */ xori t1, 0x3f + andi t0, a2, 0x40-STORSIZE beqz t1, .Lmemset_partial\@ /* no block to fill */ - andi t0, a2, 0x40-STORSIZE PTR_ADDU t1, a0 /* end address */ - .set reorder 1: PTR_ADDIU a0, 64 R10KCBARRIER(0(ra)) f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode bne t1, a0, 1b - .set noreorder .Lmemset_partial\@: R10KCBARRIER(0(ra)) @@ -173,20 +176,18 @@ PTR_SUBU t1, AT .set at #endif + PTR_ADDU a0, t0 /* dest ptr */ jr t1 - PTR_ADDU a0, t0 /* dest ptr */ - .set push - .set noreorder - .set nomacro /* ... but first do longs ... */ f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode -2: .set pop - andi a2, STORMASK /* At most one long to go */ +2: andi a2, STORMASK /* At most one long to go */ + .set noreorder beqz a2, 1f -#ifndef CONFIG_CPU_MIPSR6 +#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR PTR_ADDU a0, a2 /* What's left */ + .set reorder R10KCBARRIER(0(ra)) #ifdef __MIPSEB__ EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@) @@ -195,6 +196,7 @@ #endif #else PTR_SUBU t0, $0, a2 + .set reorder move a2, zero /* No remaining longs */ PTR_ADDIU t0, 1 STORE_BYTE(0) @@ -210,41 +212,42 @@ #endif 0: #endif -1: jr ra - move a2, zero +1: move a2, zero + jr ra .Lsmall_memset\@: + PTR_ADDU t1, a0, a2 beqz a2, 2f - PTR_ADDU t1, a0, a2 1: PTR_ADDIU a0, 1 /* fill bytewise */ R10KCBARRIER(0(ra)) + .set noreorder bne t1, a0, 1b EX(sb, a1, -1(a0), .Lsmall_fixup\@) + .set reorder -2: jr ra /* done */ - move a2, zero +2: move a2, zero + jr ra /* done */ .if __memset == 1 END(memset) .set __memset, 0 .hidden __memset .endif -#ifdef CONFIG_CPU_MIPSR6 +#ifndef CONFIG_CPU_HAS_LOAD_STORE_LR .Lbyte_fixup\@: /* * unset_bytes = (#bytes - (#unaligned bytes)) - (-#unaligned bytes remaining + 1) + 1 * a2 = a2 - t0 + 1 */ PTR_SUBU a2, t0 + PTR_ADDIU a2, 1 jr ra - PTR_ADDIU a2, 1 -#endif /* CONFIG_CPU_MIPSR6 */ +#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */ .Lfirst_fixup\@: /* unset_bytes already in a2 */ jr ra - nop .Lfwd_fixup\@: /* @@ -255,8 +258,8 @@ andi a2, 0x3f LONG_L t0, THREAD_BUADDR(t0) LONG_ADDU a2, t1 + LONG_SUBU a2, t0 jr ra - LONG_SUBU a2, t0 .Lpartial_fixup\@: /* @@ -267,24 +270,21 @@ andi a2, STORMASK LONG_L t0, THREAD_BUADDR(t0) LONG_ADDU a2, a0 + LONG_SUBU a2, t0 jr ra - LONG_SUBU a2, t0 .Llast_fixup\@: /* unset_bytes already in a2 */ jr ra - nop .Lsmall_fixup\@: /* * unset_bytes = end_addr - current_addr + 1 * a2 = t1 - a0 + 1 */ - .set reorder PTR_SUBU a2, t1, a0 PTR_ADDIU a2, 1 jr ra - .set noreorder .endm @@ -298,8 +298,8 @@ LEAF(memset) EXPORT_SYMBOL(memset) + move v0, a0 /* result */ beqz a1, 1f - move v0, a0 /* result */ andi a1, 0xff /* spread fillword */ LONG_SLL t1, a1, 8 diff --git a/arch/mips/loongson64/common/Makefile b/arch/mips/loongson64/common/Makefile index 57ee03022941..684624f61f5a 100644 --- a/arch/mips/loongson64/common/Makefile +++ b/arch/mips/loongson64/common/Makefile @@ -6,7 +6,6 @@ obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \ bonito-irq.o mem.o machtype.o platform.o serial.o obj-$(CONFIG_PCI) += pci.o -obj-$(CONFIG_CPU_LOONGSON2) += dma.o # # Serial port support diff --git a/arch/mips/loongson64/fuloong-2e/Makefile b/arch/mips/loongson64/fuloong-2e/Makefile index b7622720c1ad..0a9a472bec0a 100644 --- a/arch/mips/loongson64/fuloong-2e/Makefile +++ b/arch/mips/loongson64/fuloong-2e/Makefile @@ -2,4 +2,4 @@ # Makefile for Lemote Fuloong2e mini-PC board. # -obj-y += irq.o reset.o +obj-y += irq.o reset.o dma.o diff --git a/arch/mips/loongson64/fuloong-2e/dma.c b/arch/mips/loongson64/fuloong-2e/dma.c new file mode 100644 index 000000000000..e122292bf666 --- /dev/null +++ b/arch/mips/loongson64/fuloong-2e/dma.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/dma-direct.h> + +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return paddr | 0x80000000; +} + +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +{ + return dma_addr & 0x7fffffff; +} diff --git a/arch/mips/loongson64/lemote-2f/Makefile b/arch/mips/loongson64/lemote-2f/Makefile index 08b8abcbfef5..b5792c334cd5 100644 --- a/arch/mips/loongson64/lemote-2f/Makefile +++ b/arch/mips/loongson64/lemote-2f/Makefile @@ -2,7 +2,7 @@ # Makefile for lemote loongson2f family machines # -obj-y += clock.o machtype.o irq.o reset.o ec_kb3310b.o +obj-y += clock.o machtype.o irq.o reset.o dma.o ec_kb3310b.o # # Suspend Support diff --git a/arch/mips/loongson64/common/dma.c b/arch/mips/loongson64/lemote-2f/dma.c index 48f04126bde2..abf0e39d7e46 100644 --- a/arch/mips/loongson64/common/dma.c +++ b/arch/mips/loongson64/lemote-2f/dma.c @@ -8,11 +8,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) { -#if defined(CONFIG_CPU_LOONGSON2F) && defined(CONFIG_64BIT) if (dma_addr > 0x8fffffff) return dma_addr; return dma_addr & 0x0fffffff; -#else - return dma_addr & 0x7fffffff; -#endif } diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/loongson-3/irq.c index cbeb20f9fc95..5605061f5f98 100644 --- a/arch/mips/loongson64/loongson-3/irq.c +++ b/arch/mips/loongson64/loongson-3/irq.c @@ -96,51 +96,8 @@ void mach_irq_dispatch(unsigned int pending) } } -static struct irqaction cascade_irqaction = { - .handler = no_action, - .flags = IRQF_NO_SUSPEND, - .name = "cascade", -}; - -static inline void mask_loongson_irq(struct irq_data *d) -{ - clear_c0_status(0x100 << (d->irq - MIPS_CPU_IRQ_BASE)); - irq_disable_hazard(); - - /* Workaround: UART IRQ may deliver to any core */ - if (d->irq == LOONGSON_UART_IRQ) { - int cpu = smp_processor_id(); - int node_id = cpu_logical_map(cpu) / loongson_sysconf.cores_per_node; - int core_id = cpu_logical_map(cpu) % loongson_sysconf.cores_per_node; - u64 intenclr_addr = smp_group[node_id] | - (u64)(&LOONGSON_INT_ROUTER_INTENCLR); - u64 introuter_lpc_addr = smp_group[node_id] | - (u64)(&LOONGSON_INT_ROUTER_LPC); - - *(volatile u32 *)intenclr_addr = 1 << 10; - *(volatile u8 *)introuter_lpc_addr = 0x10 + (1<<core_id); - } -} - -static inline void unmask_loongson_irq(struct irq_data *d) -{ - /* Workaround: UART IRQ may deliver to any core */ - if (d->irq == LOONGSON_UART_IRQ) { - int cpu = smp_processor_id(); - int node_id = cpu_logical_map(cpu) / loongson_sysconf.cores_per_node; - int core_id = cpu_logical_map(cpu) % loongson_sysconf.cores_per_node; - u64 intenset_addr = smp_group[node_id] | - (u64)(&LOONGSON_INT_ROUTER_INTENSET); - u64 introuter_lpc_addr = smp_group[node_id] | - (u64)(&LOONGSON_INT_ROUTER_LPC); - - *(volatile u32 *)intenset_addr = 1 << 10; - *(volatile u8 *)introuter_lpc_addr = 0x10 + (1<<core_id); - } - - set_c0_status(0x100 << (d->irq - MIPS_CPU_IRQ_BASE)); - irq_enable_hazard(); -} +static inline void mask_loongson_irq(struct irq_data *d) { } +static inline void unmask_loongson_irq(struct irq_data *d) { } /* For MIPS IRQs which shared by all cores */ static struct irq_chip loongson_irq_chip = { @@ -183,12 +140,11 @@ void __init mach_init_irq(void) chip->irq_set_affinity = plat_set_irq_affinity; irq_set_chip_and_handler(LOONGSON_UART_IRQ, - &loongson_irq_chip, handle_level_irq); - - /* setup HT1 irq */ - setup_irq(LOONGSON_HT1_IRQ, &cascade_irqaction); + &loongson_irq_chip, handle_percpu_irq); + irq_set_chip_and_handler(LOONGSON_BRIDGE_IRQ, + &loongson_irq_chip, handle_percpu_irq); - set_c0_status(STATUSF_IP2 | STATUSF_IP6); + set_c0_status(STATUSF_IP2 | STATUSF_IP3 | STATUSF_IP6); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c index 9717106de4a5..c1e6ec52c614 100644 --- a/arch/mips/loongson64/loongson-3/numa.c +++ b/arch/mips/loongson64/loongson-3/numa.c @@ -180,43 +180,39 @@ static void __init szmem(unsigned int node) static void __init node_mem_init(unsigned int node) { - unsigned long bootmap_size; unsigned long node_addrspace_offset; - unsigned long start_pfn, end_pfn, freepfn; + unsigned long start_pfn, end_pfn; node_addrspace_offset = nid_to_addroffset(node); pr_info("Node%d's addrspace_offset is 0x%lx\n", node, node_addrspace_offset); get_pfn_range_for_nid(node, &start_pfn, &end_pfn); - freepfn = start_pfn; - if (node == 0) - freepfn = PFN_UP(__pa_symbol(&_end)); /* kernel end address */ - pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx, freepfn=0x%lx\n", - node, start_pfn, end_pfn, freepfn); + pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx\n", + node, start_pfn, end_pfn); __node_data[node] = prealloc__node_data + node; - NODE_DATA(node)->bdata = &bootmem_node_data[node]; NODE_DATA(node)->node_start_pfn = start_pfn; NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn; - bootmap_size = init_bootmem_node(NODE_DATA(node), freepfn, - start_pfn, end_pfn); free_bootmem_with_active_regions(node, end_pfn); - if (node == 0) /* used by finalize_initrd() */ + + if (node == 0) { + /* kernel end address */ + unsigned long kernel_end_pfn = PFN_UP(__pa_symbol(&_end)); + + /* used by finalize_initrd() */ max_low_pfn = end_pfn; - /* This is reserved for the kernel and bdata->node_bootmem_map */ - reserve_bootmem_node(NODE_DATA(node), start_pfn << PAGE_SHIFT, - ((freepfn - start_pfn) << PAGE_SHIFT) + bootmap_size, - BOOTMEM_DEFAULT); + /* Reserve the kernel text/data/bss */ + memblock_reserve(start_pfn << PAGE_SHIFT, + ((kernel_end_pfn - start_pfn) << PAGE_SHIFT)); - if (node == 0 && node_end_pfn(0) >= (0xffffffff >> PAGE_SHIFT)) { /* Reserve 0xfe000000~0xffffffff for RS780E integrated GPU */ - reserve_bootmem_node(NODE_DATA(node), - (node_addrspace_offset | 0xfe000000), - 32 << 20, BOOTMEM_DEFAULT); + if (node_end_pfn(0) >= (0xffffffff >> PAGE_SHIFT)) + memblock_reserve((node_addrspace_offset | 0xfe000000), + 32 << 20); } sparse_memory_present_with_active_regions(node); diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c index fea95d003269..b5c1e0aa955e 100644 --- a/arch/mips/loongson64/loongson-3/smp.c +++ b/arch/mips/loongson64/loongson-3/smp.c @@ -21,6 +21,7 @@ #include <linux/sched/task_stack.h> #include <linux/smp.h> #include <linux/cpufreq.h> +#include <linux/kexec.h> #include <asm/processor.h> #include <asm/time.h> #include <asm/clock.h> @@ -349,7 +350,7 @@ static void loongson3_smp_finish(void) write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ); local_irq_enable(); loongson3_ipi_write64(0, - (void *)(ipi_mailbox_buf[cpu_logical_map(cpu)]+0x0)); + ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x0); pr_info("CPU#%d finished, CP0_ST=%x\n", smp_processor_id(), read_c0_status()); } @@ -416,13 +417,13 @@ static int loongson3_boot_secondary(int cpu, struct task_struct *idle) cpu, startargs[0], startargs[1], startargs[2]); loongson3_ipi_write64(startargs[3], - (void *)(ipi_mailbox_buf[cpu_logical_map(cpu)]+0x18)); + ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x18); loongson3_ipi_write64(startargs[2], - (void *)(ipi_mailbox_buf[cpu_logical_map(cpu)]+0x10)); + ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x10); loongson3_ipi_write64(startargs[1], - (void *)(ipi_mailbox_buf[cpu_logical_map(cpu)]+0x8)); + ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x8); loongson3_ipi_write64(startargs[0], - (void *)(ipi_mailbox_buf[cpu_logical_map(cpu)]+0x0)); + ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x0); return 0; } @@ -749,4 +750,7 @@ const struct plat_smp_ops loongson3_smp_ops = { .cpu_disable = loongson3_cpu_disable, .cpu_die = loongson3_cpu_die, #endif +#ifdef CONFIG_KEXEC + .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, +#endif }; diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 400676ce03f4..15cae0f11880 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -32,7 +32,6 @@ #include <linux/kcore.h> #include <linux/initrd.h> -#include <asm/asm-offsets.h> #include <asm/bootinfo.h> #include <asm/cachectl.h> #include <asm/cpu.h> @@ -521,17 +520,13 @@ unsigned long pgd_current[NR_CPUS]; #endif /* - * gcc 3.3 and older have trouble determining that PTRS_PER_PGD and PGD_ORDER - * are constants. So we use the variants from asm-offset.h until that gcc - * will officially be retired. - * * Align swapper_pg_dir in to 64K, allows its address to be loaded * with a single LUI instruction in the TLB handlers. If we used * __aligned(64K), its size would get rounded up to the alignment * size, and waste space. So we place it in its own section and align * it in the linker script. */ -pgd_t swapper_pg_dir[_PTRS_PER_PGD] __section(.bss..swapper_pg_dir); +pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir); #ifndef __PAGETABLE_PUD_FOLDED pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss; #endif diff --git a/arch/mips/netlogic/common/irq.c b/arch/mips/netlogic/common/irq.c index f4961bc9a61d..cf33dd8a487e 100644 --- a/arch/mips/netlogic/common/irq.c +++ b/arch/mips/netlogic/common/irq.c @@ -291,7 +291,7 @@ static int __init xlp_of_pic_init(struct device_node *node, /* we need a hack to get the PIC's SoC chip id */ ret = of_address_to_resource(node, 0, &res); if (ret < 0) { - pr_err("PIC %s: reg property not found!\n", node->name); + pr_err("PIC %pOFn: reg property not found!\n", node); return -EINVAL; } @@ -304,21 +304,21 @@ static int __init xlp_of_pic_init(struct device_node *node, break; } if (socid == NLM_NR_NODES) { - pr_err("PIC %s: Node mapping for bus %d not found!\n", - node->name, bus); + pr_err("PIC %pOFn: Node mapping for bus %d not found!\n", + node, bus); return -EINVAL; } } else { socid = (res.start >> 18) & 0x3; if (!nlm_node_present(socid)) { - pr_err("PIC %s: node %d does not exist!\n", - node->name, socid); + pr_err("PIC %pOFn: node %d does not exist!\n", + node, socid); return -EINVAL; } } if (!nlm_node_present(socid)) { - pr_err("PIC %s: node %d does not exist!\n", node->name, socid); + pr_err("PIC %pOFn: node %d does not exist!\n", node, socid); return -EINVAL; } @@ -326,7 +326,7 @@ static int __init xlp_of_pic_init(struct device_node *node, nlm_irq_to_xirq(socid, PIC_IRQ_BASE), PIC_IRQ_BASE, &xlp_pic_irq_domain_ops, NULL); if (xlp_pic_domain == NULL) { - pr_err("PIC %s: Creating legacy domain failed!\n", node->name); + pr_err("PIC %pOFn: Creating legacy domain failed!\n", node); return -EINVAL; } pr_info("Node %d: IRQ domain created for PIC@%pR\n", socid, &res); diff --git a/arch/mips/pci/ops-loongson3.c b/arch/mips/pci/ops-loongson3.c index 9e118431e226..2f6ad36bdea6 100644 --- a/arch/mips/pci/ops-loongson3.c +++ b/arch/mips/pci/ops-loongson3.c @@ -18,22 +18,36 @@ static int loongson3_pci_config_access(unsigned char access_type, int where, u32 *data) { unsigned char busnum = bus->number; - u_int64_t addr, type; - void *addrp; - int device = PCI_SLOT(devfn); int function = PCI_FUNC(devfn); + int device = PCI_SLOT(devfn); int reg = where & ~3; + void *addrp; + u64 addr; + + if (where < PCI_CFG_SPACE_SIZE) { /* standard config */ + addr = (busnum << 16) | (device << 11) | (function << 8) | reg; + if (busnum == 0) { + if (device > 31) + return PCIBIOS_DEVICE_NOT_FOUND; + addrp = (void *)TO_UNCAC(HT1LO_PCICFG_BASE | addr); + } else { + addrp = (void *)TO_UNCAC(HT1LO_PCICFG_BASE_TP1 | addr); + } + } else if (where < PCI_CFG_SPACE_EXP_SIZE) { /* extended config */ + struct pci_dev *rootdev; + + rootdev = pci_get_domain_bus_and_slot(0, 0, 0); + if (!rootdev) + return PCIBIOS_DEVICE_NOT_FOUND; - addr = (busnum << 16) | (device << 11) | (function << 8) | reg; - if (busnum == 0) { - if (device > 31) + addr = pci_resource_start(rootdev, 3); + if (!addr) return PCIBIOS_DEVICE_NOT_FOUND; - addrp = (void *)(TO_UNCAC(HT1LO_PCICFG_BASE) | (addr & 0xffff)); - type = 0; + addr |= busnum << 20 | device << 15 | function << 12 | reg; + addrp = (void *)TO_UNCAC(addr); } else { - addrp = (void *)(TO_UNCAC(HT1LO_PCICFG_BASE_TP1) | (addr)); - type = 0x10000; + return PCIBIOS_DEVICE_NOT_FOUND; } if (access_type == PCI_ACCESS_WRITE) diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c index f1e92bf743c2..3c3b1e6abb53 100644 --- a/arch/mips/pci/pci-legacy.c +++ b/arch/mips/pci/pci-legacy.c @@ -127,8 +127,12 @@ static void pcibios_scanbus(struct pci_controller *hose) if (pci_has_flag(PCI_PROBE_ONLY)) { pci_bus_claim_resources(bus); } else { + struct pci_bus *child; + pci_bus_size_bridges(bus); pci_bus_assign_resources(bus); + list_for_each_entry(child, &bus->children, node) + pcie_bus_configure_settings(child); } pci_bus_add_devices(bus); } diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c index 711cdccdf65b..f376a1df326a 100644 --- a/arch/mips/pci/pci-rt2880.c +++ b/arch/mips/pci/pci-rt2880.c @@ -246,6 +246,8 @@ static int rt288x_pci_probe(struct platform_device *pdev) rt2880_pci_write_u32(PCI_BASE_ADDRESS_0, 0x08000000); (void) rt2880_pci_read_u32(PCI_BASE_ADDRESS_0); + rt2880_pci_controller.of_node = pdev->dev.of_node; + register_pci_controller(&rt2880_pci_controller); return 0; } diff --git a/arch/mips/pmcs-msp71xx/msp_usb.c b/arch/mips/pmcs-msp71xx/msp_usb.c index c87c5f810cd1..d38ac70b5a2e 100644 --- a/arch/mips/pmcs-msp71xx/msp_usb.c +++ b/arch/mips/pmcs-msp71xx/msp_usb.c @@ -133,13 +133,13 @@ static int __init msp_usb_setup(void) * "D" for device-mode. If it works for Ethernet, why not USB... * -- hammtrev, 2007/03/22 */ - snprintf((char *)&envstr[0], sizeof(envstr), "usbmode"); + snprintf(&envstr[0], sizeof(envstr), "usbmode"); /* set default host mode */ val = 1; /* get environment string */ - strp = prom_getenv((char *)&envstr[0]); + strp = prom_getenv(&envstr[0]); if (strp) { /* compare string */ if (!strcmp(strp, "device")) diff --git a/arch/mips/ralink/cevt-rt3352.c b/arch/mips/ralink/cevt-rt3352.c index 92f284d2b802..61a08943eb2f 100644 --- a/arch/mips/ralink/cevt-rt3352.c +++ b/arch/mips/ralink/cevt-rt3352.c @@ -134,7 +134,7 @@ static int __init ralink_systick_init(struct device_node *np) systick.dev.min_delta_ticks = 0x3; systick.dev.irq = irq_of_parse_and_map(np, 0); if (!systick.dev.irq) { - pr_err("%s: request_irq failed", np->name); + pr_err("%pOFn: request_irq failed", np); return -EINVAL; } @@ -146,8 +146,8 @@ static int __init ralink_systick_init(struct device_node *np) clockevents_register_device(&systick.dev); - pr_info("%s: running - mult: %d, shift: %d\n", - np->name, systick.dev.mult, systick.dev.shift); + pr_info("%pOFn: running - mult: %d, shift: %d\n", + np, systick.dev.mult, systick.dev.shift); return 0; } diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c index 765d5ba98fa2..fc056f2acfeb 100644 --- a/arch/mips/ralink/ill_acc.c +++ b/arch/mips/ralink/ill_acc.c @@ -62,7 +62,7 @@ static int __init ill_acc_of_setup(void) pdev = of_find_device_by_node(np); if (!pdev) { - pr_err("%s: failed to lookup pdev\n", np->name); + pr_err("%pOFn: failed to lookup pdev\n", np); return -EINVAL; } diff --git a/arch/mips/ralink/rt305x.c b/arch/mips/ralink/rt305x.c index 93d472c60ce4..0f2264e0cf76 100644 --- a/arch/mips/ralink/rt305x.c +++ b/arch/mips/ralink/rt305x.c @@ -49,6 +49,10 @@ static struct rt2880_pmx_func rgmii_func[] = { FUNC("rgmii", 0, 40, 12) }; static struct rt2880_pmx_func rt3352_lna_func[] = { FUNC("lna", 0, 36, 2) }; static struct rt2880_pmx_func rt3352_pa_func[] = { FUNC("pa", 0, 38, 2) }; static struct rt2880_pmx_func rt3352_led_func[] = { FUNC("led", 0, 40, 5) }; +static struct rt2880_pmx_func rt3352_cs1_func[] = { + FUNC("spi_cs1", 0, 45, 1), + FUNC("wdg_cs1", 1, 45, 1), +}; static struct rt2880_pmx_group rt3050_pinmux_data[] = { GRP("i2c", i2c_func, 1, RT305X_GPIO_MODE_I2C), @@ -75,6 +79,7 @@ static struct rt2880_pmx_group rt3352_pinmux_data[] = { GRP("lna", rt3352_lna_func, 1, RT3352_GPIO_MODE_LNA), GRP("pa", rt3352_pa_func, 1, RT3352_GPIO_MODE_PA), GRP("led", rt3352_led_func, 1, RT5350_GPIO_MODE_PHY_LED), + GRP("spi_cs1", rt3352_cs1_func, 2, RT5350_GPIO_MODE_SPI_CS1), { 0 } }; diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c index 2ed8e4990b7a..082541d33161 100644 --- a/arch/mips/sgi-ip22/ip28-berr.c +++ b/arch/mips/sgi-ip22/ip28-berr.c @@ -464,7 +464,7 @@ void ip22_be_interrupt(int irq) die_if_kernel("Oops", regs); force_sig(SIGBUS, current); } else if (debug_be_interrupt) - show_regs((struct pt_regs *)regs); + show_regs(regs); } static int ip28_be_handler(struct pt_regs *regs, int is_fixup) diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 59133d0abc83..6f7bef052b7f 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -389,7 +389,6 @@ static void __init node_mem_init(cnodeid_t node) { unsigned long slot_firstpfn = slot_getbasepfn(node, 0); unsigned long slot_freepfn = node_getfirstfree(node); - unsigned long bootmap_size; unsigned long start_pfn, end_pfn; get_pfn_range_for_nid(node, &start_pfn, &end_pfn); @@ -400,7 +399,6 @@ static void __init node_mem_init(cnodeid_t node) __node_data[node] = __va(slot_freepfn << PAGE_SHIFT); memset(__node_data[node], 0, PAGE_SIZE); - NODE_DATA(node)->bdata = &bootmem_node_data[node]; NODE_DATA(node)->node_start_pfn = start_pfn; NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn; @@ -409,12 +407,11 @@ static void __init node_mem_init(cnodeid_t node) slot_freepfn += PFN_UP(sizeof(struct pglist_data) + sizeof(struct hub_data)); - bootmap_size = init_bootmem_node(NODE_DATA(node), slot_freepfn, - start_pfn, end_pfn); free_bootmem_with_active_regions(node, end_pfn); - reserve_bootmem_node(NODE_DATA(node), slot_firstpfn << PAGE_SHIFT, - ((slot_freepfn - slot_firstpfn) << PAGE_SHIFT) + bootmap_size, - BOOTMEM_DEFAULT); + + memblock_reserve(slot_firstpfn << PAGE_SHIFT, + ((slot_freepfn - slot_firstpfn) << PAGE_SHIFT)); + sparse_memory_present_with_active_regions(node); } diff --git a/arch/mips/tools/.gitignore b/arch/mips/tools/.gitignore new file mode 100644 index 000000000000..56d34ccccce4 --- /dev/null +++ b/arch/mips/tools/.gitignore @@ -0,0 +1 @@ +elf-entry diff --git a/arch/mips/tools/Makefile b/arch/mips/tools/Makefile new file mode 100644 index 000000000000..3baee4bc6775 --- /dev/null +++ b/arch/mips/tools/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 +hostprogs-y := elf-entry +PHONY += elf-entry +elf-entry: $(obj)/elf-entry + @: diff --git a/arch/mips/tools/elf-entry.c b/arch/mips/tools/elf-entry.c new file mode 100644 index 000000000000..adde79ce7fc0 --- /dev/null +++ b/arch/mips/tools/elf-entry.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <byteswap.h> +#include <elf.h> +#include <endian.h> +#include <inttypes.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef be32toh +/* If libc provides [bl]e{32,64}toh() then we'll use them */ +#elif BYTE_ORDER == LITTLE_ENDIAN +# define be32toh(x) bswap_32(x) +# define le32toh(x) (x) +# define be64toh(x) bswap_64(x) +# define le64toh(x) (x) +#elif BYTE_ORDER == BIG_ENDIAN +# define be32toh(x) (x) +# define le32toh(x) bswap_32(x) +# define be64toh(x) (x) +# define le64toh(x) bswap_64(x) +#endif + +__attribute__((noreturn)) +static void die(const char *msg) +{ + fputs(msg, stderr); + exit(EXIT_FAILURE); +} + +int main(int argc, const char *argv[]) +{ + uint64_t entry; + size_t nread; + FILE *file; + union { + Elf32_Ehdr ehdr32; + Elf64_Ehdr ehdr64; + } hdr; + + if (argc != 2) + die("Usage: elf-entry <elf-file>\n"); + + file = fopen(argv[1], "r"); + if (!file) { + perror("Unable to open input file"); + return EXIT_FAILURE; + } + + nread = fread(&hdr, 1, sizeof(hdr), file); + if (nread != sizeof(hdr)) { + perror("Unable to read input file"); + return EXIT_FAILURE; + } + + if (memcmp(hdr.ehdr32.e_ident, ELFMAG, SELFMAG)) + die("Input is not an ELF\n"); + + switch (hdr.ehdr32.e_ident[EI_CLASS]) { + case ELFCLASS32: + switch (hdr.ehdr32.e_ident[EI_DATA]) { + case ELFDATA2LSB: + entry = le32toh(hdr.ehdr32.e_entry); + break; + case ELFDATA2MSB: + entry = be32toh(hdr.ehdr32.e_entry); + break; + default: + die("Invalid ELF encoding\n"); + } + + /* Sign extend to form a canonical address */ + entry = (int64_t)(int32_t)entry; + break; + + case ELFCLASS64: + switch (hdr.ehdr32.e_ident[EI_DATA]) { + case ELFDATA2LSB: + entry = le64toh(hdr.ehdr64.e_entry); + break; + case ELFDATA2MSB: + entry = be64toh(hdr.ehdr64.e_entry); + break; + default: + die("Invalid ELF encoding\n"); + } + break; + + default: + die("Invalid ELF class\n"); + } + + printf("0x%016" PRIx64 "\n", entry); + return EXIT_SUCCESS; +} diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index f6d9182ef82a..70a1ab66d252 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -960,12 +960,11 @@ void __init txx9_sramc_init(struct resource *r) goto exit_put; err = sysfs_create_bin_file(&dev->dev.kobj, &dev->bindata_attr); if (err) { - device_unregister(&dev->dev); iounmap(dev->base); - kfree(dev); + device_unregister(&dev->dev); } return; exit_put: + iounmap(dev->base); put_device(&dev->dev); - return; } diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile index 3509fac10491..9f525ed70049 100644 --- a/arch/nds32/Makefile +++ b/arch/nds32/Makefile @@ -47,7 +47,7 @@ CHECKFLAGS += -D__NDS32_EB__ endif boot := arch/nds32/boot -core-$(BUILTIN_DTB) += $(boot)/dts/ +core-y += $(boot)/dts/ .PHONY: FORCE diff --git a/arch/nios2/Makefile b/arch/nios2/Makefile index 8673a79dca9c..52c03e60b114 100644 --- a/arch/nios2/Makefile +++ b/arch/nios2/Makefile @@ -49,21 +49,13 @@ BOOT_TARGETS = vmImage zImage PHONY += $(BOOT_TARGETS) install KBUILD_IMAGE := $(nios2-boot)/vmImage -ifneq ($(CONFIG_NIOS2_DTB_SOURCE),"") - core-y += $(nios2-boot)/ -endif +core-y += $(nios2-boot)/dts/ all: vmImage archclean: $(Q)$(MAKE) $(clean)=$(nios2-boot) -%.dtb: | scripts - $(Q)$(MAKE) $(build)=$(nios2-boot) $(nios2-boot)/$@ - -dtbs: - $(Q)$(MAKE) $(build)=$(nios2-boot) $(nios2-boot)/$@ - $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(nios2-boot) $(nios2-boot)/$@ @@ -76,5 +68,4 @@ define archhelp echo ' (your) ~/bin/$(INSTALLKERNEL) or' echo ' (distribution) /sbin/$(INSTALLKERNEL) or' echo ' install to $$(INSTALL_PATH)' - echo ' dtbs - Build device tree blobs for enabled boards' endef diff --git a/arch/nios2/boot/Makefile b/arch/nios2/boot/Makefile index 2ba23a679732..37dfc7e584bc 100644 --- a/arch/nios2/boot/Makefile +++ b/arch/nios2/boot/Makefile @@ -31,27 +31,5 @@ $(obj)/zImage: $(obj)/compressed/vmlinux FORCE $(obj)/compressed/vmlinux: $(obj)/vmlinux.gz FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed $@ -# Rule to build device tree blobs -DTB_SRC := $(patsubst "%",%,$(CONFIG_NIOS2_DTB_SOURCE)) - -# Make sure the generated dtb gets removed during clean -extra-$(CONFIG_NIOS2_DTB_SOURCE_BOOL) += system.dtb - -$(obj)/system.dtb: $(DTB_SRC) FORCE - $(call cmd,dtc) - -# Ensure system.dtb exists -$(obj)/linked_dtb.o: $(obj)/system.dtb - -obj-$(CONFIG_NIOS2_DTB_SOURCE_BOOL) += linked_dtb.o - -targets += $(dtb-y) - -# Rule to build device tree blobs with make command -$(obj)/%.dtb: $(src)/dts/%.dts FORCE - $(call if_changed_dep,dtc) - -$(obj)/dtbs: $(addprefix $(obj)/, $(dtb-y)) - install: sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)" diff --git a/arch/nios2/boot/dts/Makefile b/arch/nios2/boot/dts/Makefile new file mode 100644 index 000000000000..a91a0b09be63 --- /dev/null +++ b/arch/nios2/boot/dts/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y := $(patsubst "%.dts",%.dtb.o,$(CONFIG_NIOS2_DTB_SOURCE)) + +dtstree := $(srctree)/$(src) +dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts)) diff --git a/arch/nios2/boot/linked_dtb.S b/arch/nios2/boot/linked_dtb.S deleted file mode 100644 index 071f922db338..000000000000 --- a/arch/nios2/boot/linked_dtb.S +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (C) 2011 Thomas Chou <thomas@wytron.com.tw> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - */ -.section .dtb.init.rodata,"a" -.incbin "arch/nios2/boot/system.dtb" diff --git a/arch/nios2/kernel/cpuinfo.c b/arch/nios2/kernel/cpuinfo.c index 93207718bb22..ccc1d2a15a0a 100644 --- a/arch/nios2/kernel/cpuinfo.c +++ b/arch/nios2/kernel/cpuinfo.c @@ -47,7 +47,7 @@ void __init setup_cpuinfo(void) const char *str; int len; - cpu = of_find_node_by_type(NULL, "cpu"); + cpu = of_get_cpu_node(0, NULL); if (!cpu) panic("%s: No CPU found in devicetree!\n", __func__); @@ -120,6 +120,8 @@ void __init setup_cpuinfo(void) cpuinfo.reset_addr = fcpu(cpu, "altr,reset-addr"); cpuinfo.exception_addr = fcpu(cpu, "altr,exception-addr"); cpuinfo.fast_tlb_miss_exc_addr = fcpu(cpu, "altr,fast-tlb-miss-addr"); + + of_node_put(cpu); } #ifdef CONFIG_PROC_FS diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c index ab88b6dd4679..54467d0085a1 100644 --- a/arch/nios2/kernel/time.c +++ b/arch/nios2/kernel/time.c @@ -214,12 +214,12 @@ static int __init nios2_timer_get_base_and_freq(struct device_node *np, { *base = of_iomap(np, 0); if (!*base) { - pr_crit("Unable to map reg for %s\n", np->name); + pr_crit("Unable to map reg for %pOFn\n", np); return -ENXIO; } if (of_property_read_u32(np, "clock-frequency", freq)) { - pr_crit("Unable to get %s clock frequency\n", np->name); + pr_crit("Unable to get %pOFn clock frequency\n", np); return -EINVAL; } diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index 9d28ab14d139..e17fcd83120f 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -158,9 +158,8 @@ static struct device_node *setup_find_cpu_node(int cpu) { u32 hwid; struct device_node *cpun; - struct device_node *cpus = of_find_node_by_path("/cpus"); - for_each_available_child_of_node(cpus, cpun) { + for_each_of_cpu_node(cpun) { if (of_property_read_u32(cpun, "reg", &hwid)) continue; if (hwid == cpu) diff --git a/arch/powerpc/Kbuild b/arch/powerpc/Kbuild new file mode 100644 index 000000000000..1625a06802ca --- /dev/null +++ b/arch/powerpc/Kbuild @@ -0,0 +1,16 @@ +subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror + +obj-y += kernel/ +obj-y += mm/ +obj-y += lib/ +obj-y += sysdev/ +obj-y += platforms/ +obj-y += math-emu/ +obj-y += crypto/ +obj-y += net/ + +obj-$(CONFIG_XMON) += xmon/ +obj-$(CONFIG_KVM) += kvm/ + +obj-$(CONFIG_PERF_EVENTS) += perf/ +obj-$(CONFIG_KEXEC_FILE) += purgatory/ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a80669209155..e84943d24e5c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -137,7 +137,7 @@ config PPC select ARCH_HAS_PMEM_API if PPC64 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MEMBARRIER_CALLBACKS - select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE + select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64 select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST @@ -180,6 +180,8 @@ config PPC select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_CBPF_JIT if !PPC64 + select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13) + select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) select HAVE_CONTEXT_TRACKING if PPC64 select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW @@ -188,6 +190,7 @@ config PPC select HAVE_EBPF_JIT if PPC64 select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC @@ -285,12 +288,10 @@ config ARCH_MAY_HAVE_PC_FDC config PPC_UDBG_16550 bool - default n config GENERIC_TBSYNC bool default y if PPC32 && SMP - default n config AUDIT_ARCH bool @@ -309,13 +310,11 @@ config EPAPR_BOOT bool help Used to allow a board to specify it wants an ePAPR compliant wrapper. - default n config DEFAULT_UIMAGE bool help Used to allow a board to specify it wants a uImage built by default - default n config ARCH_HIBERNATION_POSSIBLE bool @@ -329,11 +328,9 @@ config ARCH_SUSPEND_POSSIBLE config PPC_DCR_NATIVE bool - default n config PPC_DCR_MMIO bool - default n config PPC_DCR bool @@ -344,7 +341,6 @@ config PPC_OF_PLATFORM_PCI bool depends on PCI depends on PPC64 # not supported on 32 bits yet - default n config ARCH_SUPPORTS_DEBUG_PAGEALLOC depends on PPC32 || PPC_BOOK3S_64 @@ -447,14 +443,12 @@ config PPC_TRANSACTIONAL_MEM depends on SMP select ALTIVEC select VSX - default n ---help--- Support user-mode Transactional Memory on POWERPC. config LD_HEAD_STUB_CATCH bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if EXPERT depends on PPC64 - default n help Very large kernels can cause linker branch stubs to be generated by code in head_64.S, which moves the head text sections out of their @@ -557,7 +551,6 @@ config RELOCATABLE config RELOCATABLE_TEST bool "Test relocatable kernel" depends on (PPC64 && RELOCATABLE) - default n help This runs the relocatable kernel at the address it was initially loaded at, which tends to be non-zero and therefore test the @@ -769,7 +762,6 @@ config PPC_SUBPAGE_PROT config PPC_COPRO_BASE bool - default n config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" @@ -892,7 +884,6 @@ config PPC_INDIRECT_PCI bool depends on PCI default y if 40x || 44x - default n config EISA bool @@ -989,7 +980,6 @@ source "drivers/pcmcia/Kconfig" config HAS_RAPIDIO bool - default n config RAPIDIO tristate "RapidIO support" @@ -1012,7 +1002,6 @@ endmenu config NONSTATIC_KERNEL bool - default n menu "Advanced setup" depends on PPC32 diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index fd63cd914a74..f4961fbcb48d 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -2,7 +2,6 @@ config PPC_DISABLE_WERROR bool "Don't build arch/powerpc code with -Werror" - default n help This option tells the compiler NOT to build the code under arch/powerpc with the -Werror flag (which means warnings @@ -56,7 +55,6 @@ config PPC_EMULATED_STATS config CODE_PATCHING_SELFTEST bool "Run self-tests of the code-patching code" depends on DEBUG_KERNEL - default n config JUMP_LABEL_FEATURE_CHECKS bool "Enable use of jump label for cpu/mmu_has_feature()" @@ -70,7 +68,6 @@ config JUMP_LABEL_FEATURE_CHECKS config JUMP_LABEL_FEATURE_CHECK_DEBUG bool "Do extra check on feature fixup calls" depends on DEBUG_KERNEL && JUMP_LABEL_FEATURE_CHECKS - default n help This tries to catch incorrect usage of cpu_has_feature() and mmu_has_feature() in the code. @@ -80,16 +77,13 @@ config JUMP_LABEL_FEATURE_CHECK_DEBUG config FTR_FIXUP_SELFTEST bool "Run self-tests of the feature-fixup code" depends on DEBUG_KERNEL - default n config MSI_BITMAP_SELFTEST bool "Run self-tests of the MSI bitmap code" depends on DEBUG_KERNEL - default n config PPC_IRQ_SOFT_MASK_DEBUG bool "Include extra checks for powerpc irq soft masking" - default n config XMON bool "Include xmon kernel debugger" diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 11a1acba164a..17be664dafa2 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -112,6 +112,13 @@ KBUILD_LDFLAGS += -m elf$(BITS)$(LDEMULATION) KBUILD_ARFLAGS += --target=elf$(BITS)-$(GNUTARGET) endif +cflags-$(CONFIG_STACKPROTECTOR) += -mstack-protector-guard=tls +ifdef CONFIG_PPC64 +cflags-$(CONFIG_STACKPROTECTOR) += -mstack-protector-guard-reg=r13 +else +cflags-$(CONFIG_STACKPROTECTOR) += -mstack-protector-guard-reg=r2 +endif + LDFLAGS_vmlinux-y := -Bstatic LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y) @@ -160,8 +167,17 @@ else CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 endif +ifdef CONFIG_FUNCTION_TRACER +CC_FLAGS_FTRACE := -pg ifdef CONFIG_MPROFILE_KERNEL - CC_FLAGS_FTRACE := -pg -mprofile-kernel +CC_FLAGS_FTRACE += -mprofile-kernel +endif +# Work around gcc code-gen bugs with -pg / -fno-omit-frame-pointer in gcc <= 4.8 +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=44199 +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52828 +ifneq ($(cc-name),clang) +CC_FLAGS_FTRACE += $(call cc-ifversion, -lt, 0409, -mno-sched-epilog) +endif endif CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU)) @@ -229,16 +245,15 @@ ifdef CONFIG_6xx KBUILD_CFLAGS += -mcpu=powerpc endif -# Work around a gcc code-gen bug with -fno-omit-frame-pointer. -ifdef CONFIG_FUNCTION_TRACER -KBUILD_CFLAGS += -mno-sched-epilog -endif - cpu-as-$(CONFIG_4xx) += -Wa,-m405 cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) cpu-as-$(CONFIG_E200) += -Wa,-me200 cpu-as-$(CONFIG_E500) += -Wa,-me500 -cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4 + +# When using '-many -mpower4' gas will first try and find a matching power4 +# mnemonic and failing that it will allow any valid mnemonic that GAS knows +# about. GCC will pass -many to GAS when assembling, clang does not. +cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4 -Wa,-many cpu-as-$(CONFIG_PPC_E500MC) += $(call as-option,-Wa$(comma)-me500mc) KBUILD_AFLAGS += $(cpu-as-y) @@ -258,18 +273,8 @@ head-$(CONFIG_PPC_FPU) += arch/powerpc/kernel/fpu.o head-$(CONFIG_ALTIVEC) += arch/powerpc/kernel/vector.o head-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += arch/powerpc/kernel/prom_init.o -core-y += arch/powerpc/kernel/ \ - arch/powerpc/mm/ \ - arch/powerpc/lib/ \ - arch/powerpc/sysdev/ \ - arch/powerpc/platforms/ \ - arch/powerpc/math-emu/ \ - arch/powerpc/crypto/ \ - arch/powerpc/net/ -core-$(CONFIG_XMON) += arch/powerpc/xmon/ -core-$(CONFIG_KVM) += arch/powerpc/kvm/ -core-$(CONFIG_PERF_EVENTS) += arch/powerpc/perf/ -core-$(CONFIG_KEXEC_FILE) += arch/powerpc/purgatory/ +# See arch/powerpc/Kbuild for content of core part of the kernel +core-y += arch/powerpc/ drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ @@ -293,9 +298,6 @@ $(BOOT_TARGETS2): vmlinux bootwrapper_install: $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) -%.dtb: scripts - $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@) - # Used to create 'merged defconfigs' # To use it $(call) it with the first argument as the base defconfig # and the second argument as a space separated list of .config files to merge, @@ -400,40 +402,20 @@ archclean: archprepare: checkbin -# Use the file '.tmp_gas_check' for binutils tests, as gas won't output -# to stdout and these checks are run even on install targets. -TOUT := .tmp_gas_check +ifdef CONFIG_STACKPROTECTOR +prepare: stack_protector_prepare + +stack_protector_prepare: prepare0 +ifdef CONFIG_PPC64 + $(eval KBUILD_CFLAGS += -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "PACA_CANARY") print $$3;}' include/generated/asm-offsets.h)) +else + $(eval KBUILD_CFLAGS += -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "TASK_CANARY") print $$3;}' include/generated/asm-offsets.h)) +endif +endif -# Check gcc and binutils versions: -# - gcc-3.4 and binutils-2.14 are a fatal combination -# - Require gcc 4.0 or above on 64-bit -# - gcc-4.2.0 has issues compiling modules on 64-bit +# Check toolchain versions: +# - gcc-4.6 is the minimum kernel-wide version so nothing required. checkbin: - @if test "$(cc-name)" != "clang" \ - && test "$(cc-version)" = "0304" ; then \ - if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) >/dev/null 2>&1 ; then \ - echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build '; \ - echo 'correctly with gcc-3.4 and your version of binutils.'; \ - echo '*** Please upgrade your binutils or downgrade your gcc'; \ - false; \ - fi ; \ - fi - @if test "$(cc-name)" != "clang" \ - && test "$(cc-version)" -lt "0400" \ - && test "x${CONFIG_PPC64}" = "xy" ; then \ - echo -n "Sorry, GCC v4.0 or above is required to build " ; \ - echo "the 64-bit powerpc kernel." ; \ - false ; \ - fi - @if test "$(cc-name)" != "clang" \ - && test "$(cc-fullversion)" = "040200" \ - && test "x${CONFIG_MODULES}${CONFIG_PPC64}" = "xyy" ; then \ - echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \ - echo 'kernel with modules enabled.' ; \ - echo -n '*** Please use a different GCC version or ' ; \ - echo 'disable kernel modules' ; \ - false ; \ - fi @if test "x${CONFIG_CPU_LITTLE_ENDIAN}" = "xy" \ && $(LD) --version | head -1 | grep ' 2\.24$$' >/dev/null ; then \ echo -n '*** binutils 2.24 miscompiles weak symbols ' ; \ @@ -441,7 +423,3 @@ checkbin: echo -n '*** Please use a different binutils version.' ; \ false ; \ fi - - -CLEAN_FILES += $(TOUT) - diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore index f92d0530ceb1..32034a0cc554 100644 --- a/arch/powerpc/boot/.gitignore +++ b/arch/powerpc/boot/.gitignore @@ -44,4 +44,5 @@ fdt_sw.c fdt_wip.c libfdt.h libfdt_internal.h +autoconf.h diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 0fb96c26136f..39354365f54a 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -32,8 +32,8 @@ else endif BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ - -fno-strict-aliasing -Os -msoft-float -pipe \ - -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ + -fno-strict-aliasing -O2 -msoft-float -mno-altivec -mno-vsx \ + -pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ -D$(compress-y) ifdef CONFIG_PPC64_BOOT_WRAPPER @@ -197,9 +197,14 @@ $(obj)/empty.c: $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S $(Q)cp $< $@ +$(obj)/serial.c: $(obj)/autoconf.h + +$(obj)/autoconf.h: $(obj)/%: $(objtree)/include/generated/% + $(Q)cp $< $@ + clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \ $(zlib-decomp-) $(libfdt) $(libfdtheader) \ - empty.c zImage.coff.lds zImage.ps3.lds zImage.lds + autoconf.h empty.c zImage.coff.lds zImage.ps3.lds zImage.lds quiet_cmd_bootcc = BOOTCC $@ cmd_bootcc = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $< @@ -304,9 +309,9 @@ image-$(CONFIG_PPC_ADDER875) += cuImage.adder875-uboot \ dtbImage.adder875-redboot # Board ports in arch/powerpc/platform/52xx/Kconfig -image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200 lite5200.dtb -image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200b lite5200b.dtb -image-$(CONFIG_PPC_MEDIA5200) += cuImage.media5200 media5200.dtb +image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200 +image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200b +image-$(CONFIG_PPC_MEDIA5200) += cuImage.media5200 # Board ports in arch/powerpc/platform/82xx/Kconfig image-$(CONFIG_MPC8272_ADS) += cuImage.mpc8272ads @@ -381,11 +386,11 @@ $(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperb $(call if_changed,wrap,$(subst $(obj)/zImage.,,$@)) # dtbImage% - a dtbImage is a zImage with an embedded device tree blob -$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE - $(call if_changed,wrap,$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) +$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/dts/%.dtb FORCE + $(call if_changed,wrap,$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz) -$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE - $(call if_changed,wrap,$*,,$(obj)/$*.dtb) +$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/dts/%.dtb FORCE + $(call if_changed,wrap,$*,,$(obj)/dts/$*.dtb) # This cannot be in the root of $(src) as the zImage rule always adds a $(obj) # prefix @@ -395,36 +400,33 @@ $(obj)/vmlinux.strip: vmlinux $(obj)/uImage: vmlinux $(wrapperbits) FORCE $(call if_changed,wrap,uboot) -$(obj)/uImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE - $(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) - -$(obj)/uImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE - $(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb) +$(obj)/uImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE + $(call if_changed,wrap,uboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz) -$(obj)/cuImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE - $(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) +$(obj)/uImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE + $(call if_changed,wrap,uboot-$*,,$(obj)/dts/$*.dtb) -$(obj)/cuImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE - $(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb) +$(obj)/cuImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE + $(call if_changed,wrap,cuboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz) -$(obj)/simpleImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE - $(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) +$(obj)/cuImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE + $(call if_changed,wrap,cuboot-$*,,$(obj)/dts/$*.dtb) -$(obj)/simpleImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE - $(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb) +$(obj)/simpleImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE + $(call if_changed,wrap,simpleboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz) -$(obj)/treeImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE - $(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz) +$(obj)/simpleImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE + $(call if_changed,wrap,simpleboot-$*,,$(obj)/dts/$*.dtb) -$(obj)/treeImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE - $(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb) +$(obj)/treeImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE + $(call if_changed,wrap,treeboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz) -# Rule to build device tree blobs -$(obj)/%.dtb: $(src)/dts/%.dts FORCE - $(call if_changed_dep,dtc) +$(obj)/treeImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE + $(call if_changed,wrap,treeboot-$*,,$(obj)/dts/$*.dtb) -$(obj)/%.dtb: $(src)/dts/fsl/%.dts FORCE - $(call if_changed_dep,dtc) +# Needed for the above targets to work with dts/fsl/ files +$(obj)/dts/%.dtb: $(obj)/dts/fsl/%.dtb + @cp $< $@ # If there isn't a platform selected then just strip the vmlinux. ifeq (,$(image-y)) diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index dcf2f15e6797..32dfe6d083f3 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -47,8 +47,10 @@ p_end: .long _end p_pstack: .long _platform_stack_top #endif - .weak _zimage_start .globl _zimage_start + /* Clang appears to require the .weak directive to be after the symbol + * is defined. See https://bugs.llvm.org/show_bug.cgi?id=38921 */ + .weak _zimage_start _zimage_start: .globl _zimage_start_lib _zimage_start_lib: diff --git a/arch/powerpc/boot/dts/Makefile b/arch/powerpc/boot/dts/Makefile new file mode 100644 index 000000000000..fb335d05aae8 --- /dev/null +++ b/arch/powerpc/boot/dts/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 + +subdir-y += fsl + +dtstree := $(srctree)/$(src) +dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts)) diff --git a/arch/powerpc/boot/dts/fsl/Makefile b/arch/powerpc/boot/dts/fsl/Makefile new file mode 100644 index 000000000000..3bae982641e9 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 + +dtstree := $(srctree)/$(src) +dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts)) diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h index 2a0c8b1bf147..2abc8e83b95e 100644 --- a/arch/powerpc/boot/libfdt_env.h +++ b/arch/powerpc/boot/libfdt_env.h @@ -5,6 +5,8 @@ #include <types.h> #include <string.h> +#define INT_MAX ((int)(~0U>>1)) + #include "of.h" typedef unsigned long uintptr_t; diff --git a/arch/powerpc/boot/opal.c b/arch/powerpc/boot/opal.c index 0272570d02de..dfb199ef5b94 100644 --- a/arch/powerpc/boot/opal.c +++ b/arch/powerpc/boot/opal.c @@ -13,8 +13,6 @@ #include <libfdt.h> #include "../include/asm/opal-api.h" -#ifdef CONFIG_PPC64_BOOT_WRAPPER - /* Global OPAL struct used by opal-call.S */ struct opal { u64 base; @@ -101,9 +99,3 @@ int opal_console_init(void *devp, struct serial_console_data *scdp) return 0; } -#else -int opal_console_init(void *devp, struct serial_console_data *scdp) -{ - return -1; -} -#endif /* __powerpc64__ */ diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index 48e3743faedf..f045f8494bf9 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -18,6 +18,7 @@ #include "stdio.h" #include "io.h" #include "ops.h" +#include "autoconf.h" static int serial_open(void) { diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 67c39f4acede..f686cc1eac0b 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -262,3 +262,4 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m # CONFIG_CRYPTO_HW is not set +CONFIG_PRINTK_TIME=y diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 59e47ec85336..f71eddafb02f 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -112,3 +112,4 @@ CONFIG_PPC_EARLY_DEBUG=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_HW is not set +CONFIG_PRINTK_TIME=y diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 6ab34e60495f..ef2ef98d3f28 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -44,6 +44,9 @@ CONFIG_PPC_MEMTRACE=y # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y CONFIG_CPU_IDLE=y CONFIG_HZ_100=y CONFIG_BINFMT_MISC=m @@ -350,3 +353,4 @@ CONFIG_VIRTUALIZATION=y CONFIG_KVM_BOOK3S_64=m CONFIG_KVM_BOOK3S_64_HV=m CONFIG_VHOST_NET=m +CONFIG_PRINTK_TIME=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 5033e630afea..f2515674a1e2 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -40,6 +40,9 @@ CONFIG_PS3_LPM=m CONFIG_PPC_IBM_CELL_BLADE=y CONFIG_RTAS_FLASH=m CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y CONFIG_CPU_FREQ_PMAC64=y CONFIG_HZ_100=y CONFIG_BINFMT_MISC=m @@ -365,3 +368,4 @@ CONFIG_VIRTUALIZATION=y CONFIG_KVM_BOOK3S_64=m CONFIG_KVM_BOOK3S_64_HV=m CONFIG_VHOST_NET=m +CONFIG_PRINTK_TIME=y diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 187e2f7c12c8..cf8d55f67272 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -171,3 +171,4 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_LZO=m +CONFIG_PRINTK_TIME=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 0dd5cf7b566d..5e09a40cbcbf 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -325,3 +325,4 @@ CONFIG_VIRTUALIZATION=y CONFIG_KVM_BOOK3S_64=m CONFIG_KVM_BOOK3S_64_HV=m CONFIG_VHOST_NET=m +CONFIG_PRINTK_TIME=y diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 6bd5e7261335..cfdd08897a06 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -3,20 +3,17 @@ CONFIG_ALTIVEC=y CONFIG_VSX=y CONFIG_NR_CPUS=2048 CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_KERNEL_XZ=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y # CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y -CONFIG_TASKSTATS=y -CONFIG_TASK_DELAY_ACCT=y -CONFIG_TASK_XACCT=y -CONFIG_TASK_IO_ACCOUNTING=y +# CONFIG_CPU_ISOLATION is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=20 -CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_GZIP is not set # CONFIG_RD_BZIP2 is not set @@ -24,8 +21,14 @@ CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_LZO is not set # CONFIG_RD_LZ4 is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_EXPERT=y +# CONFIG_SGETMASK_SYSCALL is not set +# CONFIG_SYSFS_SYSCALL is not set +# CONFIG_SHMEM is not set +# CONFIG_AIO is not set CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set +CONFIG_SLAB_FREELIST_HARDENED=y CONFIG_JUMP_LABEL=y CONFIG_STRICT_KERNEL_RWX=y CONFIG_MODULES=y @@ -35,7 +38,9 @@ CONFIG_MODULE_SIG_FORCE=y CONFIG_MODULE_SIG_SHA512=y CONFIG_PARTITION_ADVANCED=y # CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_PPC_VAS is not set # CONFIG_PPC_PSERIES is not set +# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_IDLE=y CONFIG_HZ_100=y @@ -48,8 +53,9 @@ CONFIG_NUMA=y CONFIG_PPC_64K_PAGES=y CONFIG_SCHED_SMT=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=tty0 console=hvc0 powersave=off" +CONFIG_CMDLINE="console=tty0 console=hvc0 ipr.fast_reboot=1 quiet" # CONFIG_SECCOMP is not set +# CONFIG_PPC_MEM_KEYS is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -60,7 +66,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_IPV6 is not set CONFIG_DNS_RESOLVER=y # CONFIG_WIRELESS is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -73,8 +78,10 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_VIRTIO_BLK=m CONFIG_BLK_DEV_NVME=m -CONFIG_EEPROM_AT24=y +CONFIG_NVME_MULTIPATH=y +CONFIG_EEPROM_AT24=m # CONFIG_CXL is not set +# CONFIG_OCXL is not set CONFIG_BLK_DEV_SD=m CONFIG_BLK_DEV_SR=m CONFIG_BLK_DEV_SR_VENDOR=y @@ -85,7 +92,6 @@ CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_CXGB3_ISCSI=m CONFIG_SCSI_CXGB4_ISCSI=m CONFIG_SCSI_BNX2_ISCSI=m -CONFIG_BE2ISCSI=m CONFIG_SCSI_AACRAID=m CONFIG_MEGARAID_NEWGEN=y CONFIG_MEGARAID_MM=m @@ -102,7 +108,7 @@ CONFIG_SCSI_VIRTIO=m CONFIG_SCSI_DH=y CONFIG_SCSI_DH_ALUA=m CONFIG_ATA=y -CONFIG_SATA_AHCI=y +CONFIG_SATA_AHCI=m # CONFIG_ATA_SFF is not set CONFIG_MD=y CONFIG_BLK_DEV_MD=m @@ -119,25 +125,72 @@ CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_NET_VENDOR_ADAPTEC is not set +# CONFIG_NET_VENDOR_AGERE is not set +# CONFIG_NET_VENDOR_ALACRITECH is not set CONFIG_ACENIC=m CONFIG_ACENIC_OMIT_TIGON_I=y -CONFIG_TIGON3=y +# CONFIG_NET_VENDOR_AMAZON is not set +# CONFIG_NET_VENDOR_AMD is not set +# CONFIG_NET_VENDOR_AQUANTIA is not set +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_ATHEROS is not set +CONFIG_TIGON3=m CONFIG_BNX2X=m -CONFIG_CHELSIO_T1=y +# CONFIG_NET_VENDOR_BROCADE is not set +# CONFIG_NET_CADENCE is not set +# CONFIG_NET_VENDOR_CAVIUM is not set +CONFIG_CHELSIO_T1=m +# CONFIG_NET_VENDOR_CISCO is not set +# CONFIG_NET_VENDOR_CORTINA is not set +# CONFIG_NET_VENDOR_DEC is not set +# CONFIG_NET_VENDOR_DLINK is not set CONFIG_BE2NET=m -CONFIG_S2IO=m -CONFIG_E100=m +# CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_HP is not set +# CONFIG_NET_VENDOR_HUAWEI is not set CONFIG_E1000=m -CONFIG_E1000E=m +CONFIG_IGB=m CONFIG_IXGB=m CONFIG_IXGBE=m +CONFIG_I40E=m +CONFIG_S2IO=m +# CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m +# CONFIG_MLX4_CORE_GEN2 is not set CONFIG_MLX5_CORE=m -CONFIG_MLX5_CORE_EN=y +# CONFIG_NET_VENDOR_MICREL is not set CONFIG_MYRI10GE=m +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NI is not set +# CONFIG_NET_VENDOR_NVIDIA is not set +# CONFIG_NET_VENDOR_OKI is not set +# CONFIG_NET_PACKET_ENGINE is not set CONFIG_QLGE=m CONFIG_NETXEN_NIC=m +# CONFIG_NET_VENDOR_QUALCOMM is not set +# CONFIG_NET_VENDOR_RDC is not set +# CONFIG_NET_VENDOR_REALTEK is not set +# CONFIG_NET_VENDOR_RENESAS is not set +# CONFIG_NET_VENDOR_ROCKER is not set +# CONFIG_NET_VENDOR_SAMSUNG is not set +# CONFIG_NET_VENDOR_SEEQ is not set CONFIG_SFC=m +# CONFIG_NET_VENDOR_SILAN is not set +# CONFIG_NET_VENDOR_SIS is not set +# CONFIG_NET_VENDOR_SMSC is not set +# CONFIG_NET_VENDOR_SOCIONEXT is not set +# CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SUN is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set +# CONFIG_NET_VENDOR_TEHUTI is not set +# CONFIG_NET_VENDOR_TI is not set +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_XILINX is not set +CONFIG_PHYLIB=y # CONFIG_USB_NET_DRIVERS is not set # CONFIG_WLAN is not set CONFIG_INPUT_EVDEV=y @@ -149,39 +202,51 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_IPMI_HANDLER=y CONFIG_IPMI_DEVICE_INTERFACE=y CONFIG_IPMI_POWERNV=y +CONFIG_IPMI_WATCHDOG=y CONFIG_HW_RANDOM=y +CONFIG_TCG_TPM=y CONFIG_TCG_TIS_I2C_NUVOTON=y +CONFIG_I2C=y # CONFIG_I2C_COMPAT is not set CONFIG_I2C_CHARDEV=y # CONFIG_I2C_HELPER_AUTO is not set -CONFIG_DRM=y -CONFIG_DRM_RADEON=y +CONFIG_I2C_ALGOBIT=y +CONFIG_I2C_OPAL=m +CONFIG_PPS=y +CONFIG_SENSORS_IBMPOWERNV=m +CONFIG_DRM=m CONFIG_DRM_AST=m +CONFIG_FB=y CONFIG_FIRMWARE_EDID=y -CONFIG_FB_MODE_HELPERS=y -CONFIG_FB_OF=y -CONFIG_FB_MATROX=y -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -CONFIG_FB_MATROX_G=y -# CONFIG_LCD_CLASS_DEVICE is not set -# CONFIG_BACKLIGHT_GENERIC is not set # CONFIG_VGA_CONSOLE is not set +CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_HID_GENERIC=m +CONFIG_HID_A4TECH=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_EZKEY=y +CONFIG_HID_ITE=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_LOGITECH=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y CONFIG_USB_HIDDEV=y -CONFIG_USB=y -CONFIG_USB_MON=y -CONFIG_USB_XHCI_HCD=y -CONFIG_USB_EHCI_HCD=y +CONFIG_USB=m +CONFIG_USB_XHCI_HCD=m +CONFIG_USB_EHCI_HCD=m # CONFIG_USB_EHCI_HCD_PPC_OF is not set -CONFIG_USB_OHCI_HCD=y -CONFIG_USB_STORAGE=y +CONFIG_USB_OHCI_HCD=m +CONFIG_USB_STORAGE=m CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_OPAL=m CONFIG_RTC_DRV_GENERIC=m CONFIG_VIRT_DRIVERS=y -CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_PCI=m # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT4_FS=m CONFIG_EXT4_FS_POSIX_ACL=y @@ -195,10 +260,9 @@ CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_NLS=y CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y @@ -207,26 +271,24 @@ CONFIG_NLS_UTF8=y CONFIG_CRC16=y CONFIG_CRC_ITU_T=y CONFIG_LIBCRC32C=y +# CONFIG_XZ_DEC_X86 is not set +# CONFIG_XZ_DEC_IA64 is not set +# CONFIG_XZ_DEC_ARM is not set +# CONFIG_XZ_DEC_ARMTHUMB is not set +# CONFIG_XZ_DEC_SPARC is not set CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y -CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y CONFIG_WQ_WATCHDOG=y -CONFIG_SCHEDSTATS=y +# CONFIG_SCHED_DEBUG is not set # CONFIG_FTRACE is not set +# CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_XMON=y CONFIG_XMON_DEFAULT=y -CONFIG_SECURITY=y -CONFIG_IMA=y -CONFIG_EVM=y +CONFIG_ENCRYPTED_KEYS=y # CONFIG_CRYPTO_ECHAINIV is not set -CONFIG_CRYPTO_ECB=y -CONFIG_CRYPTO_CMAC=y -CONFIG_CRYPTO_MD4=y -CONFIG_CRYPTO_ARC4=y -CONFIG_CRYPTO_DES=y # CONFIG_CRYPTO_HW is not set diff --git a/arch/powerpc/include/asm/accounting.h b/arch/powerpc/include/asm/accounting.h index 3abcf98ed2e0..c607c5d835cc 100644 --- a/arch/powerpc/include/asm/accounting.h +++ b/arch/powerpc/include/asm/accounting.h @@ -15,8 +15,10 @@ struct cpu_accounting_data { /* Accumulated cputime values to flush on ticks*/ unsigned long utime; unsigned long stime; +#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME unsigned long utime_scaled; unsigned long stime_scaled; +#endif unsigned long gtime; unsigned long hardirq_time; unsigned long softirq_time; @@ -25,8 +27,10 @@ struct cpu_accounting_data { /* Internal counters */ unsigned long starttime; /* TB value snapshot */ unsigned long starttime_user; /* TB value on exit to usermode */ +#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME unsigned long startspurr; /* SPURR value snapshot */ unsigned long utime_sspurr; /* ->user_time when ->startspurr set */ +#endif }; #endif diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index c55ba3b4873b..ec691d489656 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -63,7 +63,6 @@ void program_check_exception(struct pt_regs *regs); void alignment_exception(struct pt_regs *regs); void slb_miss_bad_addr(struct pt_regs *regs); void StackOverflow(struct pt_regs *regs); -void nonrecoverable_exception(struct pt_regs *regs); void kernel_fp_unavailable_exception(struct pt_regs *regs); void altivec_unavailable_exception(struct pt_regs *regs); void vsx_unavailable_exception(struct pt_regs *regs); @@ -78,6 +77,8 @@ void kernel_bad_stack(struct pt_regs *regs); void system_reset_exception(struct pt_regs *regs); void machine_check_exception(struct pt_regs *regs); void emulation_assist_interrupt(struct pt_regs *regs); +long do_slb_fault(struct pt_regs *regs, unsigned long ea); +void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err); /* signals, syscalls and interrupts */ long sys_swapcontext(struct ucontext __user *old_ctx, diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 796d026da37e..c21d33704633 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -8,7 +8,97 @@ #include <asm/book3s/32/hash.h> /* And here we include common definitions */ -#include <asm/pte-common.h> + +#define _PAGE_KERNEL_RO 0 +#define _PAGE_KERNEL_ROX 0 +#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW) +#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW) + +#define _PAGE_HPTEFLAGS _PAGE_HASHPTE + +#ifndef __ASSEMBLY__ + +static inline bool pte_user(pte_t pte) +{ + return pte_val(pte) & _PAGE_USER; +} +#endif /* __ASSEMBLY__ */ + +/* + * Location of the PFN in the PTE. Most 32-bit platforms use the same + * as _PAGE_SHIFT here (ie, naturally aligned). + * Platform who don't just pre-define the value so we don't override it here. + */ +#define PTE_RPN_SHIFT (PAGE_SHIFT) + +/* + * The mask covered by the RPN must be a ULL on 32-bit platforms with + * 64-bit PTEs. + */ +#ifdef CONFIG_PTE_64BIT +#define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1)) +#else +#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) +#endif + +/* + * _PAGE_CHG_MASK masks of bits that are to be preserved across + * pgprot changes. + */ +#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HASHPTE | _PAGE_DIRTY | \ + _PAGE_ACCESSED | _PAGE_SPECIAL) + +/* + * We define 2 sets of base prot bits, one for basic pages (ie, + * cacheable kernel and user pages) and one for non cacheable + * pages. We always set _PAGE_COHERENT when SMP is enabled or + * the processor might need it for DMA coherency. + */ +#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED) +#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT) + +/* + * Permission masks used to generate the __P and __S table. + * + * Note:__pgprot is defined in arch/powerpc/include/asm/page.h + * + * Write permissions imply read permissions for now. + */ +#define PAGE_NONE __pgprot(_PAGE_BASE) +#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) +#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER) +#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER) + +/* Permission masks used for kernel mappings */ +#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) +#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE) +#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ + _PAGE_NO_CACHE | _PAGE_GUARDED) +#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX) +#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO) +#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX) + +/* + * Protection used for kernel text. We want the debuggers to be able to + * set breakpoints anywhere, so don't write protect the kernel text + * on platforms where such control is possible. + */ +#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\ + defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE) +#define PAGE_KERNEL_TEXT PAGE_KERNEL_X +#else +#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX +#endif + +/* Make modules code happy. We don't set RO yet */ +#define PAGE_KERNEL_EXEC PAGE_KERNEL_X + +/* Advertise special mapping type for AGP */ +#define PAGE_AGP (PAGE_KERNEL_NC) +#define HAVE_PAGE_AGP #define PTE_INDEX_SIZE PTE_SHIFT #define PMD_INDEX_SIZE 0 @@ -219,7 +309,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO); + pte_update(ptep, _PAGE_RW, 0); } static inline void __ptep_set_access_flags(struct vm_area_struct *vma, @@ -228,10 +318,9 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, int psize) { unsigned long set = pte_val(entry) & - (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); - unsigned long clr = ~pte_val(entry) & _PAGE_RO; + (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW); - pte_update(ptep, clr, set); + pte_update(ptep, 0, set); flush_tlb_page(vma, address); } @@ -286,7 +375,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -int map_kernel_page(unsigned long va, phys_addr_t pa, int flags); +int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); /* Generic accessors to PTE bits */ static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} @@ -295,13 +384,28 @@ static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); } static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); } static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } -static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } +static inline bool pte_exec(pte_t pte) { return true; } static inline int pte_present(pte_t pte) { return pte_val(pte) & _PAGE_PRESENT; } +static inline bool pte_hw_valid(pte_t pte) +{ + return pte_val(pte) & _PAGE_PRESENT; +} + +static inline bool pte_hashpte(pte_t pte) +{ + return !!(pte_val(pte) & _PAGE_HASHPTE); +} + +static inline bool pte_ci(pte_t pte) +{ + return !!(pte_val(pte) & _PAGE_NO_CACHE); +} + /* * We only find page table entry in the last level * Hence no need for other accessors @@ -309,17 +413,14 @@ static inline int pte_present(pte_t pte) #define pte_access_permitted pte_access_permitted static inline bool pte_access_permitted(pte_t pte, bool write) { - unsigned long pteval = pte_val(pte); /* * A read-only access is controlled by _PAGE_USER bit. * We have _PAGE_READ set for WRITE and EXECUTE */ - unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_USER; - - if (write) - need_pte_bits |= _PAGE_WRITE; + if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte)) + return false; - if ((pteval & need_pte_bits) != need_pte_bits) + if (write && !pte_write(pte)) return false; return true; @@ -348,6 +449,11 @@ static inline pte_t pte_wrprotect(pte_t pte) return __pte(pte_val(pte) & ~_PAGE_RW); } +static inline pte_t pte_exprotect(pte_t pte) +{ + return pte; +} + static inline pte_t pte_mkclean(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_DIRTY); @@ -358,6 +464,16 @@ static inline pte_t pte_mkold(pte_t pte) return __pte(pte_val(pte) & ~_PAGE_ACCESSED); } +static inline pte_t pte_mkexec(pte_t pte) +{ + return pte; +} + +static inline pte_t pte_mkpte(pte_t pte) +{ + return pte; +} + static inline pte_t pte_mkwrite(pte_t pte) { return __pte(pte_val(pte) | _PAGE_RW); @@ -383,6 +499,16 @@ static inline pte_t pte_mkhuge(pte_t pte) return pte; } +static inline pte_t pte_mkprivileged(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_USER); +} + +static inline pte_t pte_mkuser(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_USER); +} + static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 9a3798660cef..15bc16b1dc9c 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -66,7 +66,7 @@ static inline int hash__hugepd_ok(hugepd_t hpd) * if it is not a pte and have hugepd shift mask * set, then it is a hugepd directory pointer */ - if (!(hpdval & _PAGE_PTE) && + if (!(hpdval & _PAGE_PTE) && (hpdval & _PAGE_PRESENT) && ((hpdval & HUGEPD_SHIFT_MASK) != 0)) return true; return false; diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index d52a51b2ce7b..247aff9cc6ba 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -18,6 +18,11 @@ #include <asm/book3s/64/hash-4k.h> #endif +/* Bits to set in a PMD/PUD/PGD entry valid bit*/ +#define HASH_PMD_VAL_BITS (0x8000000000000000UL) +#define HASH_PUD_VAL_BITS (0x8000000000000000UL) +#define HASH_PGD_VAL_BITS (0x8000000000000000UL) + /* * Size of EA range mapped by our pagetables. */ @@ -196,8 +201,7 @@ static inline void hpte_do_hugepage_flush(struct mm_struct *mm, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -extern int hash__map_kernel_page(unsigned long ea, unsigned long pa, - unsigned long flags); +int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot); extern int __meminit hash__vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys); diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index 50888388a359..5b0177733994 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -39,4 +39,7 @@ static inline bool gigantic_page_supported(void) } #endif +/* hugepd entry valid bit */ +#define HUGEPD_VAL_BITS (0x8000000000000000UL) + #endif diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 66db23e2f4dc..12e522807f9f 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -30,7 +30,7 @@ * SLB */ -#define SLB_NUM_BOLTED 3 +#define SLB_NUM_BOLTED 2 #define SLB_CACHE_ENTRIES 8 #define SLB_MIN_SIZE 32 @@ -499,6 +499,8 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, extern void pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); +extern void hash__setup_new_exec(void); + #ifdef CONFIG_PPC_PSERIES void hpte_init_pseries(void); #else @@ -507,11 +509,18 @@ static inline void hpte_init_pseries(void) { } extern void hpte_init_native(void); +struct slb_entry { + u64 esid; + u64 vsid; +}; + extern void slb_initialize(void); -extern void slb_flush_and_rebolt(void); +void slb_flush_and_restore_bolted(void); void slb_flush_all_realmode(void); void __slb_restore_bolted_realmode(void); void slb_restore_bolted_realmode(void); +void slb_save_contents(struct slb_entry *slb_ptr); +void slb_dump_contents(struct slb_entry *slb_ptr); extern void slb_vmalloc_update(void); extern void slb_set_size(u16 size); @@ -524,13 +533,9 @@ extern void slb_set_size(u16 size); * from mmu context id and effective segment id of the address. * * For user processes max context id is limited to MAX_USER_CONTEXT. - - * For kernel space, we use context ids 1-4 to map addresses as below: - * NOTE: each context only support 64TB now. - * 0x00001 - [ 0xc000000000000000 - 0xc0003fffffffffff ] - * 0x00002 - [ 0xd000000000000000 - 0xd0003fffffffffff ] - * 0x00003 - [ 0xe000000000000000 - 0xe0003fffffffffff ] - * 0x00004 - [ 0xf000000000000000 - 0xf0003fffffffffff ] + * more details in get_user_context + * + * For kernel space get_kernel_context * * The proto-VSIDs are then scrambled into real VSIDs with the * multiplicative hash: @@ -571,6 +576,21 @@ extern void slb_set_size(u16 size); #define ESID_BITS_1T_MASK ((1 << ESID_BITS_1T) - 1) /* + * Now certain config support MAX_PHYSMEM more than 512TB. Hence we will need + * to use more than one context for linear mapping the kernel. + * For vmalloc and memmap, we use just one context with 512TB. With 64 byte + * struct page size, we need ony 32 TB in memmap for 2PB (51 bits (MAX_PHYSMEM_BITS)). + */ +#if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT) +#define MAX_KERNEL_CTX_CNT (1UL << (MAX_PHYSMEM_BITS - MAX_EA_BITS_PER_CONTEXT)) +#else +#define MAX_KERNEL_CTX_CNT 1 +#endif + +#define MAX_VMALLOC_CTX_CNT 1 +#define MAX_MEMMAP_CTX_CNT 1 + +/* * 256MB segment * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments * available for user + kernel mapping. VSID 0 is reserved as invalid, contexts @@ -580,12 +600,13 @@ extern void slb_set_size(u16 size); * We also need to avoid the last segment of the last context, because that * would give a protovsid of 0x1fffffffff. That will result in a VSID 0 * because of the modulo operation in vsid scramble. + * + * We add one extra context to MIN_USER_CONTEXT so that we can map kernel + * context easily. The +1 is to map the unused 0xe region mapping. */ #define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2) -#define MIN_USER_CONTEXT (5) - -/* Would be nice to use KERNEL_REGION_ID here */ -#define KERNEL_REGION_CONTEXT_OFFSET (0xc - 1) +#define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \ + MAX_MEMMAP_CTX_CNT + 2) /* * For platforms that support on 65bit VA we limit the context bits @@ -746,6 +767,39 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, } /* + * For kernel space, we use context ids as below + * below. Range is 512TB per context. + * + * 0x00001 - [ 0xc000000000000000 - 0xc001ffffffffffff] + * 0x00002 - [ 0xc002000000000000 - 0xc003ffffffffffff] + * 0x00003 - [ 0xc004000000000000 - 0xc005ffffffffffff] + * 0x00004 - [ 0xc006000000000000 - 0xc007ffffffffffff] + + * 0x00005 - [ 0xd000000000000000 - 0xd001ffffffffffff ] + * 0x00006 - Not used - Can map 0xe000000000000000 range. + * 0x00007 - [ 0xf000000000000000 - 0xf001ffffffffffff ] + * + * So we can compute the context from the region (top nibble) by + * subtracting 11, or 0xc - 1. + */ +static inline unsigned long get_kernel_context(unsigned long ea) +{ + unsigned long region_id = REGION_ID(ea); + unsigned long ctx; + /* + * For linear mapping we do support multiple context + */ + if (region_id == KERNEL_REGION_ID) { + /* + * We already verified ea to be not beyond the addr limit. + */ + ctx = 1 + ((ea & ~REGION_MASK) >> MAX_EA_BITS_PER_CONTEXT); + } else + ctx = (region_id - 0xc) + MAX_KERNEL_CTX_CNT; + return ctx; +} + +/* * This is only valid for addresses >= PAGE_OFFSET */ static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) @@ -755,20 +809,7 @@ static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) if (!is_kernel_addr(ea)) return 0; - /* - * For kernel space, we use context ids 1-4 to map the address space as - * below: - * - * 0x00001 - [ 0xc000000000000000 - 0xc0003fffffffffff ] - * 0x00002 - [ 0xd000000000000000 - 0xd0003fffffffffff ] - * 0x00003 - [ 0xe000000000000000 - 0xe0003fffffffffff ] - * 0x00004 - [ 0xf000000000000000 - 0xf0003fffffffffff ] - * - * So we can compute the context from the region (top nibble) by - * subtracting 11, or 0xc - 1. - */ - context = (ea >> 60) - KERNEL_REGION_CONTEXT_OFFSET; - + context = get_kernel_context(ea); return get_vsid(context, ea, ssize); } diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 9c8c669a6b6a..6328857f259f 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -208,7 +208,7 @@ extern void radix_init_pseries(void); static inline void radix_init_pseries(void) { }; #endif -static inline int get_ea_context(mm_context_t *ctx, unsigned long ea) +static inline int get_user_context(mm_context_t *ctx, unsigned long ea) { int index = ea >> MAX_EA_BITS_PER_CONTEXT; @@ -223,7 +223,7 @@ static inline int get_ea_context(mm_context_t *ctx, unsigned long ea) static inline unsigned long get_user_vsid(mm_context_t *ctx, unsigned long ea, int ssize) { - unsigned long context = get_ea_context(ctx, ea); + unsigned long context = get_user_context(ctx, ea); return get_vsid(context, ea, ssize); } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h index d7ee249d6890..e3d4dd4ae2fa 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h @@ -10,6 +10,9 @@ * * Defined in such a way that we can optimize away code block at build time * if CONFIG_HUGETLB_PAGE=n. + * + * returns true for pmd migration entries, THP, devmap, hugetlb + * But compile time dependent on CONFIG_HUGETLB_PAGE */ static inline int pmd_huge(pmd_t pmd) { diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index fc69eda5d588..c4a726c10af5 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -14,10 +14,6 @@ */ #define _PAGE_BIT_SWAP_TYPE 0 -#define _PAGE_NA 0 -#define _PAGE_RO 0 -#define _PAGE_USER 0 - #define _PAGE_EXEC 0x00001 /* execute permission */ #define _PAGE_WRITE 0x00002 /* write access allowed */ #define _PAGE_READ 0x00004 /* read access allowed */ @@ -123,10 +119,6 @@ #define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \ _PAGE_RW | _PAGE_EXEC) /* - * No page size encoding in the linux PTE - */ -#define _PAGE_PSIZE 0 -/* * _PAGE_CHG_MASK masks of bits that are to be preserved across * pgprot changes */ @@ -137,19 +129,12 @@ #define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \ H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4) /* - * Mask of bits returned by pte_pgprot() - */ -#define PAGE_PROT_BITS (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \ - H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \ - _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \ - _PAGE_SOFT_DIRTY | H_PTE_PKEY) -/* * We define 2 sets of base prot bits, one for basic pages (ie, * cacheable kernel and user pages) and one for non cacheable * pages. We always set _PAGE_COHERENT when SMP is enabled or * the processor might need it for DMA coherency. */ -#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE) +#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED) #define _PAGE_BASE (_PAGE_BASE_NC) /* Permission masks used to generate the __P and __S table, @@ -159,8 +144,6 @@ * Write permissions imply read permissions for now (we could make write-only * pages on BookE but we don't bother for now). Execute permission control is * possible on platforms that define _PAGE_EXEC - * - * Note due to the way vm flags are laid out, the bits are XWR */ #define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_PRIVILEGED) #define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW) @@ -170,24 +153,6 @@ #define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_READ) #define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) -#define __P000 PAGE_NONE -#define __P001 PAGE_READONLY -#define __P010 PAGE_COPY -#define __P011 PAGE_COPY -#define __P100 PAGE_READONLY_X -#define __P101 PAGE_READONLY_X -#define __P110 PAGE_COPY_X -#define __P111 PAGE_COPY_X - -#define __S000 PAGE_NONE -#define __S001 PAGE_READONLY -#define __S010 PAGE_SHARED -#define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_X -#define __S101 PAGE_READONLY_X -#define __S110 PAGE_SHARED_X -#define __S111 PAGE_SHARED_X - /* Permission masks used for kernel mappings */ #define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) #define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ @@ -520,7 +485,11 @@ static inline int pte_special(pte_t pte) return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SPECIAL)); } -static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } +static inline bool pte_exec(pte_t pte) +{ + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_EXEC)); +} + #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline bool pte_soft_dirty(pte_t pte) @@ -530,12 +499,12 @@ static inline bool pte_soft_dirty(pte_t pte) static inline pte_t pte_mksoft_dirty(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY); + return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SOFT_DIRTY)); } static inline pte_t pte_clear_soft_dirty(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_SOFT_DIRTY); + return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SOFT_DIRTY)); } #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ @@ -556,7 +525,7 @@ static inline pte_t pte_mk_savedwrite(pte_t pte) */ VM_BUG_ON((pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_RWX | _PAGE_PRIVILEGED)) != cpu_to_be64(_PAGE_PRESENT | _PAGE_PRIVILEGED)); - return __pte(pte_val(pte) & ~_PAGE_PRIVILEGED); + return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED)); } #define pte_clear_savedwrite pte_clear_savedwrite @@ -566,14 +535,14 @@ static inline pte_t pte_clear_savedwrite(pte_t pte) * Used by KSM subsystem to make a protnone pte readonly. */ VM_BUG_ON(!pte_protnone(pte)); - return __pte(pte_val(pte) | _PAGE_PRIVILEGED); + return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED)); } #else #define pte_clear_savedwrite pte_clear_savedwrite static inline pte_t pte_clear_savedwrite(pte_t pte) { VM_WARN_ON(1); - return __pte(pte_val(pte) & ~_PAGE_WRITE); + return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE)); } #endif /* CONFIG_NUMA_BALANCING */ @@ -588,6 +557,11 @@ static inline int pte_present(pte_t pte) return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)); } +static inline bool pte_hw_valid(pte_t pte) +{ + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT)); +} + #ifdef CONFIG_PPC_MEM_KEYS extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute); #else @@ -597,25 +571,22 @@ static inline bool arch_pte_access_permitted(u64 pte, bool write, bool execute) } #endif /* CONFIG_PPC_MEM_KEYS */ +static inline bool pte_user(pte_t pte) +{ + return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED)); +} + #define pte_access_permitted pte_access_permitted static inline bool pte_access_permitted(pte_t pte, bool write) { - unsigned long pteval = pte_val(pte); - /* Also check for pte_user */ - unsigned long clear_pte_bits = _PAGE_PRIVILEGED; /* * _PAGE_READ is needed for any access and will be * cleared for PROT_NONE */ - unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_READ; - - if (write) - need_pte_bits |= _PAGE_WRITE; - - if ((pteval & need_pte_bits) != need_pte_bits) + if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte)) return false; - if ((pteval & clear_pte_bits) == clear_pte_bits) + if (write && !pte_write(pte)) return false; return arch_pte_access_permitted(pte_val(pte), write, 0); @@ -644,17 +615,32 @@ static inline pte_t pte_wrprotect(pte_t pte) { if (unlikely(pte_savedwrite(pte))) return pte_clear_savedwrite(pte); - return __pte(pte_val(pte) & ~_PAGE_WRITE); + return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE)); +} + +static inline pte_t pte_exprotect(pte_t pte) +{ + return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_EXEC)); } static inline pte_t pte_mkclean(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_DIRTY); + return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_DIRTY)); } static inline pte_t pte_mkold(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_ACCESSED); + return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_ACCESSED)); +} + +static inline pte_t pte_mkexec(pte_t pte) +{ + return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_EXEC)); +} + +static inline pte_t pte_mkpte(pte_t pte) +{ + return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE)); } static inline pte_t pte_mkwrite(pte_t pte) @@ -662,22 +648,22 @@ static inline pte_t pte_mkwrite(pte_t pte) /* * write implies read, hence set both */ - return __pte(pte_val(pte) | _PAGE_RW); + return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_RW)); } static inline pte_t pte_mkdirty(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY); + return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_DIRTY | _PAGE_SOFT_DIRTY)); } static inline pte_t pte_mkyoung(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_ACCESSED); + return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_ACCESSED)); } static inline pte_t pte_mkspecial(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_SPECIAL); + return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL)); } static inline pte_t pte_mkhuge(pte_t pte) @@ -687,7 +673,17 @@ static inline pte_t pte_mkhuge(pte_t pte) static inline pte_t pte_mkdevmap(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP); + return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL | _PAGE_DEVMAP)); +} + +static inline pte_t pte_mkprivileged(pte_t pte) +{ + return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED)); +} + +static inline pte_t pte_mkuser(pte_t pte) +{ + return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED)); } /* @@ -706,12 +702,8 @@ static inline int pte_devmap(pte_t pte) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { /* FIXME!! check whether this need to be a conditional */ - return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); -} - -static inline bool pte_user(pte_t pte) -{ - return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED)); + return __pte_raw((pte_raw(pte) & cpu_to_be64(_PAGE_CHG_MASK)) | + cpu_to_be64(pgprot_val(newprot))); } /* Encode and de-code a swap entry */ @@ -742,6 +734,8 @@ static inline bool pte_user(pte_t pte) */ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE }) #define __swp_entry_to_pte(x) __pte((x).val | _PAGE_PTE) +#define __pmd_to_swp_entry(pmd) (__pte_to_swp_entry(pmd_pte(pmd))) +#define __swp_entry_to_pmd(x) (pte_pmd(__swp_entry_to_pte(x))) #ifdef CONFIG_MEM_SOFT_DIRTY #define _PAGE_SWP_SOFT_DIRTY (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE)) @@ -752,7 +746,7 @@ static inline bool pte_user(pte_t pte) #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline pte_t pte_swp_mksoft_dirty(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY); + return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_SOFT_DIRTY)); } static inline bool pte_swp_soft_dirty(pte_t pte) @@ -762,7 +756,7 @@ static inline bool pte_swp_soft_dirty(pte_t pte) static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY); + return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SWP_SOFT_DIRTY)); } #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ @@ -851,10 +845,10 @@ static inline pgprot_t pgprot_writecombine(pgprot_t prot) */ static inline bool pte_ci(pte_t pte) { - unsigned long pte_v = pte_val(pte); + __be64 pte_v = pte_raw(pte); - if (((pte_v & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) || - ((pte_v & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)) + if (((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_TOLERANT)) || + ((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_NON_IDEMPOTENT))) return true; return false; } @@ -876,8 +870,16 @@ static inline int pmd_none(pmd_t pmd) static inline int pmd_present(pmd_t pmd) { + /* + * A pmd is considerent present if _PAGE_PRESENT is set. + * We also need to consider the pmd present which is marked + * invalid during a split. Hence we look for _PAGE_INVALID + * if we find _PAGE_PRESENT cleared. + */ + if (pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)) + return true; - return !pmd_none(pmd); + return false; } static inline int pmd_bad(pmd_t pmd) @@ -904,7 +906,7 @@ static inline int pud_none(pud_t pud) static inline int pud_present(pud_t pud) { - return !pud_none(pud); + return (pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT)); } extern struct page *pud_page(pud_t pud); @@ -951,7 +953,7 @@ static inline int pgd_none(pgd_t pgd) static inline int pgd_present(pgd_t pgd) { - return !pgd_none(pgd); + return (pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT)); } static inline pte_t pgd_pte(pgd_t pgd) @@ -1021,17 +1023,16 @@ extern struct page *pgd_page(pgd_t pgd); #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) -static inline int map_kernel_page(unsigned long ea, unsigned long pa, - unsigned long flags) +static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) { if (radix_enabled()) { #if defined(CONFIG_PPC_RADIX_MMU) && defined(DEBUG_VM) unsigned long page_size = 1 << mmu_psize_defs[mmu_io_psize].shift; WARN((page_size != PAGE_SIZE), "I/O page size != PAGE_SIZE"); #endif - return radix__map_kernel_page(ea, pa, __pgprot(flags), PAGE_SIZE); + return radix__map_kernel_page(ea, pa, prot, PAGE_SIZE); } - return hash__map_kernel_page(ea, pa, flags); + return hash__map_kernel_page(ea, pa, prot); } static inline int __meminit vmemmap_create_mapping(unsigned long start, @@ -1083,6 +1084,12 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd) #define pmd_soft_dirty(pmd) pte_soft_dirty(pmd_pte(pmd)) #define pmd_mksoft_dirty(pmd) pte_pmd(pte_mksoft_dirty(pmd_pte(pmd))) #define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd))) + +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +#define pmd_swp_mksoft_dirty(pmd) pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd))) +#define pmd_swp_soft_dirty(pmd) pte_swp_soft_dirty(pmd_pte(pmd)) +#define pmd_swp_clear_soft_dirty(pmd) pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd))) +#endif #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ #ifdef CONFIG_NUMA_BALANCING @@ -1128,6 +1135,10 @@ pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set); } +/* + * returns true for pmd migration entries, THP, devmap, hugetlb + * But compile time dependent on THP config + */ static inline int pmd_large(pmd_t pmd) { return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)); @@ -1162,8 +1173,22 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED); } +/* + * Only returns true for a THP. False for pmd migration entry. + * We also need to return true when we come across a pte that + * in between a thp split. While splitting THP, we mark the pmd + * invalid (pmdp_invalidate()) before we set it with pte page + * address. A pmd_trans_huge() check against a pmd entry during that time + * should return true. + * We should not call this on a hugetlb entry. We should check for HugeTLB + * entry using vma->vm_flags + * The page table walk rule is explained in Documentation/vm/transhuge.rst + */ static inline int pmd_trans_huge(pmd_t pmd) { + if (!pmd_present(pmd)) + return false; + if (radix_enabled()) return radix__pmd_trans_huge(pmd); return hash__pmd_trans_huge(pmd); diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index d71a90924f3b..deb99fd6e060 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -23,11 +23,13 @@ extern int threads_per_core; extern int threads_per_subcore; extern int threads_shift; +extern bool has_big_cores; extern cpumask_t threads_core_mask; #else #define threads_per_core 1 #define threads_per_subcore 1 #define threads_shift 0 +#define has_big_cores 0 #define threads_core_mask (*get_cpu_mask(0)) #endif diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 133672744b2e..ae73dc8da2d4 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -61,7 +61,6 @@ static inline void arch_vtime_task_switch(struct task_struct *prev) struct cpu_accounting_data *acct0 = get_accounting(prev); acct->starttime = acct0->starttime; - acct->startspurr = acct0->startspurr; } #endif diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index ce242b9ea8c6..7c1d8e74b25d 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -99,4 +99,9 @@ void __init walk_drmem_lmbs_early(unsigned long node, void (*func)(struct drmem_lmb *, const __be32 **)); #endif +static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb) +{ + lmb->aa_index = 0xffffffff; +} + #endif /* _ASM_POWERPC_LMB_H */ diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 219637ea69a1..8b596d096ebe 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -43,7 +43,6 @@ struct pci_dn; #define EEH_VALID_PE_ZERO 0x10 /* PE#0 is valid */ #define EEH_ENABLE_IO_FOR_LOG 0x20 /* Enable IO for log */ #define EEH_EARLY_DUMP_LOG 0x40 /* Dump log immediately */ -#define EEH_POSTPONED_PROBE 0x80 /* Powernv may postpone device probe */ /* * Delay for PE reset, all in ms @@ -99,13 +98,13 @@ struct eeh_pe { atomic_t pass_dev_cnt; /* Count of passed through devs */ struct eeh_pe *parent; /* Parent PE */ void *data; /* PE auxillary data */ - struct list_head child_list; /* Link PE to the child list */ - struct list_head edevs; /* Link list of EEH devices */ - struct list_head child; /* Child PEs */ + struct list_head child_list; /* List of PEs below this PE */ + struct list_head child; /* Memb. child_list/eeh_phb_pe */ + struct list_head edevs; /* List of eeh_dev in this PE */ }; #define eeh_pe_for_each_dev(pe, edev, tmp) \ - list_for_each_entry_safe(edev, tmp, &pe->edevs, list) + list_for_each_entry_safe(edev, tmp, &pe->edevs, entry) #define eeh_for_each_pe(root, pe) \ for (pe = root; pe; pe = eeh_pe_next(pe, root)) @@ -142,13 +141,12 @@ struct eeh_dev { int aer_cap; /* Saved AER capability */ int af_cap; /* Saved AF capability */ struct eeh_pe *pe; /* Associated PE */ - struct list_head list; /* Form link list in the PE */ - struct list_head rmv_list; /* Record the removed edevs */ + struct list_head entry; /* Membership in eeh_pe.edevs */ + struct list_head rmv_entry; /* Membership in rmv_list */ struct pci_dn *pdn; /* Associated PCI device node */ struct pci_dev *pdev; /* Associated PCI device */ bool in_error; /* Error flag for edev */ struct pci_dev *physfn; /* Associated SRIOV PF */ - struct pci_bus *bus; /* PCI bus for partial hotplug */ }; static inline struct pci_dn *eeh_dev_to_pdn(struct eeh_dev *edev) @@ -207,9 +205,8 @@ struct eeh_ops { void* (*probe)(struct pci_dn *pdn, void *data); int (*set_option)(struct eeh_pe *pe, int option); int (*get_pe_addr)(struct eeh_pe *pe); - int (*get_state)(struct eeh_pe *pe, int *state); + int (*get_state)(struct eeh_pe *pe, int *delay); int (*reset)(struct eeh_pe *pe, int option); - int (*wait_state)(struct eeh_pe *pe, int max_wait); int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len); int (*configure_bridge)(struct eeh_pe *pe); int (*err_inject)(struct eeh_pe *pe, int type, int func, @@ -243,11 +240,7 @@ static inline bool eeh_has_flag(int flag) static inline bool eeh_enabled(void) { - if (eeh_has_flag(EEH_FORCE_DISABLED) || - !eeh_has_flag(EEH_ENABLED)) - return false; - - return true; + return eeh_has_flag(EEH_ENABLED) && !eeh_has_flag(EEH_FORCE_DISABLED); } static inline void eeh_serialize_lock(unsigned long *flags) @@ -270,6 +263,7 @@ typedef void *(*eeh_edev_traverse_func)(struct eeh_dev *edev, void *flag); typedef void *(*eeh_pe_traverse_func)(struct eeh_pe *pe, void *flag); void eeh_set_pe_aux_size(int size); int eeh_phb_pe_create(struct pci_controller *phb); +int eeh_wait_state(struct eeh_pe *pe, int max_wait); struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb); struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root); struct eeh_pe *eeh_pe_get(struct pci_controller *phb, diff --git a/arch/powerpc/include/asm/error-injection.h b/arch/powerpc/include/asm/error-injection.h new file mode 100644 index 000000000000..62fd24739852 --- /dev/null +++ b/arch/powerpc/include/asm/error-injection.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef _ASM_ERROR_INJECTION_H +#define _ASM_ERROR_INJECTION_H + +#include <linux/compiler.h> +#include <linux/linkage.h> +#include <asm/ptrace.h> +#include <asm-generic/error-injection.h> + +void override_function_with_return(struct pt_regs *regs); + +#endif /* _ASM_ERROR_INJECTION_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index a86feddddad0..3b4767ed3ec5 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -61,14 +61,6 @@ #define MAX_MCE_DEPTH 4 /* - * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR - * EX_CCR similarly with DSISR, but being 4 byte registers there is a hole - * in the save area so it's not necessary to overlap them. Could be used - * for future savings though if another 4 byte register was to be saved. - */ -#define EX_LR EX_DAR - -/* * EX_R3 is only used by the bad_stack handler. bad_stack reloads and * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap * with EX_DAR. @@ -236,11 +228,10 @@ * PPR save/restore macros used in exceptions_64s.S * Used for P7 or later processors */ -#define SAVE_PPR(area, ra, rb) \ +#define SAVE_PPR(area, ra) \ BEGIN_FTR_SECTION_NESTED(940) \ - ld ra,PACACURRENT(r13); \ - ld rb,area+EX_PPR(r13); /* Read PPR from paca */ \ - std rb,TASKTHREADPPR(ra); \ + ld ra,area+EX_PPR(r13); /* Read PPR from paca */ \ + std ra,_PPR(r1); \ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940) #define RESTORE_PPR_PACA(area, ra) \ @@ -508,7 +499,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) 3: EXCEPTION_PROLOG_COMMON_1(); \ beq 4f; /* if from kernel mode */ \ ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \ - SAVE_PPR(area, r9, r10); \ + SAVE_PPR(area, r9); \ 4: EXCEPTION_PROLOG_COMMON_2(area) \ EXCEPTION_PROLOG_COMMON_3(n) \ ACCOUNT_STOLEN_TIME diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 7a051bd21f87..00bc42d95679 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -52,6 +52,8 @@ #define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000) #define FW_FEATURE_DRMEM_V2 ASM_CONST(0x0000000400000000) #define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000800000000) +#define FW_FEATURE_BLOCK_REMOVE ASM_CONST(0x0000001000000000) +#define FW_FEATURE_PAPR_SCM ASM_CONST(0x0000002000000000) #ifndef __ASSEMBLY__ @@ -69,7 +71,8 @@ enum { FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY | FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN | FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 | - FW_FEATURE_DRC_INFO, + FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE | + FW_FEATURE_PAPR_SCM, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL, FW_FEATURE_POWERNV_ALWAYS = 0, diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 41cc15c14eee..b9fbed84ddca 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -72,7 +72,7 @@ enum fixed_addresses { static inline void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { - map_kernel_page(fix_to_virt(idx), phys, pgprot_val(flags)); + map_kernel_page(fix_to_virt(idx), phys, flags); } #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 45e8789bb770..33a4fc891947 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -278,6 +278,7 @@ #define H_COP 0x304 #define H_GET_MPP_X 0x314 #define H_SET_MODE 0x31C +#define H_BLOCK_REMOVE 0x328 #define H_CLEAR_HPT 0x358 #define H_REQUEST_VMC 0x360 #define H_RESIZE_HPT_PREPARE 0x36C @@ -295,7 +296,15 @@ #define H_INT_ESB 0x3C8 #define H_INT_SYNC 0x3CC #define H_INT_RESET 0x3D0 -#define MAX_HCALL_OPCODE H_INT_RESET +#define H_SCM_READ_METADATA 0x3E4 +#define H_SCM_WRITE_METADATA 0x3E8 +#define H_SCM_BIND_MEM 0x3EC +#define H_SCM_UNBIND_MEM 0x3F0 +#define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4 +#define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8 +#define H_SCM_MEM_QUERY 0x3FC +#define H_SCM_BLOCK_CLEAR 0x400 +#define MAX_HCALL_OPCODE H_SCM_BLOCK_CLEAR /* H_VIOCTL functions */ #define H_GET_VIOA_DUMP_SIZE 0x01 diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index e0331e754568..3ef40b703c4a 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -3,6 +3,9 @@ #ifdef __KERNEL__ #define ARCH_HAS_IOREMAP_WC +#ifdef CONFIG_PPC32 +#define ARCH_HAS_IOREMAP_WT +#endif /* * This program is free software; you can redistribute it and/or @@ -108,25 +111,6 @@ extern bool isa_io_special; #define IO_SET_SYNC_FLAG() #endif -/* gcc 4.0 and older doesn't have 'Z' constraint */ -#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ == 0) -#define DEF_MMIO_IN_X(name, size, insn) \ -static inline u##size name(const volatile u##size __iomem *addr) \ -{ \ - u##size ret; \ - __asm__ __volatile__("sync;"#insn" %0,0,%1;twi 0,%0,0;isync" \ - : "=r" (ret) : "r" (addr), "m" (*addr) : "memory"); \ - return ret; \ -} - -#define DEF_MMIO_OUT_X(name, size, insn) \ -static inline void name(volatile u##size __iomem *addr, u##size val) \ -{ \ - __asm__ __volatile__("sync;"#insn" %1,0,%2" \ - : "=m" (*addr) : "r" (val), "r" (addr) : "memory"); \ - IO_SET_SYNC_FLAG(); \ -} -#else /* newer gcc */ #define DEF_MMIO_IN_X(name, size, insn) \ static inline u##size name(const volatile u##size __iomem *addr) \ { \ @@ -143,7 +127,6 @@ static inline void name(volatile u##size __iomem *addr, u##size val) \ : "=Z" (*addr) : "r" (val) : "memory"); \ IO_SET_SYNC_FLAG(); \ } -#endif #define DEF_MMIO_IN_D(name, size, insn) \ static inline u##size name(const volatile u##size __iomem *addr) \ @@ -746,6 +729,10 @@ static inline void iosync(void) * * * ioremap_wc enables write combining * + * * ioremap_wt enables write through + * + * * ioremap_coherent maps coherent cached memory + * * * iounmap undoes such a mapping and can be hooked * * * __ioremap_at (and the pending __iounmap_at) are low level functions to @@ -767,6 +754,8 @@ extern void __iomem *ioremap(phys_addr_t address, unsigned long size); extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size, unsigned long flags); extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size); +void __iomem *ioremap_wt(phys_addr_t address, unsigned long size); +void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size); #define ioremap_nocache(addr, size) ioremap((addr), (size)) #define ioremap_uc(addr, size) ioremap((addr), (size)) #define ioremap_cache(addr, size) \ @@ -777,12 +766,12 @@ extern void iounmap(volatile void __iomem *addr); extern void __iomem *__ioremap(phys_addr_t, unsigned long size, unsigned long flags); extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size, - unsigned long flags, void *caller); + pgprot_t prot, void *caller); extern void __iounmap(volatile void __iomem *addr); extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea, - unsigned long size, unsigned long flags); + unsigned long size, pgprot_t prot); extern void __iounmap_at(void *ea, unsigned long size); /* diff --git a/arch/powerpc/include/asm/kgdb.h b/arch/powerpc/include/asm/kgdb.h index 9db24e77b9f4..a9e098a3b881 100644 --- a/arch/powerpc/include/asm/kgdb.h +++ b/arch/powerpc/include/asm/kgdb.h @@ -26,9 +26,12 @@ #define BREAK_INSTR_SIZE 4 #define BUFMAX ((NUMREGBYTES * 2) + 512) #define OUTBUFMAX ((NUMREGBYTES * 2) + 512) + +#define BREAK_INSTR 0x7d821008 /* twge r2, r2 */ + static inline void arch_kgdb_breakpoint(void) { - asm(".long 0x7d821008"); /* twge r2, r2 */ + asm(stringify_in_c(.long BREAK_INSTR)); } #define CACHE_FLUSH_IS_SAFE 1 #define DBG_MAX_REG_NUM 70 diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index a47de82fb8e2..8311869005fa 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -35,7 +35,7 @@ struct machdep_calls { char *name; #ifdef CONFIG_PPC64 void __iomem * (*ioremap)(phys_addr_t addr, unsigned long size, - unsigned long flags, void *caller); + pgprot_t prot, void *caller); void (*iounmap)(volatile void __iomem *token); #ifdef CONFIG_PM @@ -108,6 +108,7 @@ struct machdep_calls { /* Early exception handlers called in realmode */ int (*hmi_exception_early)(struct pt_regs *regs); + long (*machine_check_early)(struct pt_regs *regs); /* Called during machine check exception to retrive fixup address. */ bool (*mce_check_early_recovery)(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 3a1226e9b465..a8b8903e1844 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -210,4 +210,7 @@ extern void release_mce_event(void); extern void machine_check_queue_event(void); extern void machine_check_print_event_info(struct machine_check_event *evt, bool user_mode); +#ifdef CONFIG_PPC_BOOK3S_64 +void flush_and_reload_slb(void); +#endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* __ASM_PPC64_MCE_H__ */ diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 13ea441ac531..eb20eb3b8fb0 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -309,6 +309,21 @@ static inline u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address) */ #define MMU_PAGE_COUNT 16 +/* + * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS + * if we increase SECTIONS_WIDTH we will not store node details in page->flags and + * page_to_nid does a page->section->node lookup + * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce + * memory requirements with large number of sections. + * 51 bits is the max physical real address on POWER9 + */ +#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \ + defined (CONFIG_PPC_64K_PAGES) +#define MAX_PHYSMEM_BITS 51 +#else +#define MAX_PHYSMEM_BITS 46 +#endif + #ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/mmu.h> #else /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index b694d6af1150..0381394a425b 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -82,7 +82,7 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea) { int context_id; - context_id = get_ea_context(&mm->context, ea); + context_id = get_user_context(&mm->context, ea); if (!context_id) return true; return false; diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index fad8ddd697ac..0abf2e7fd222 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -393,7 +393,14 @@ extern struct bus_type mpic_subsys; #define MPIC_REGSET_TSI108 MPIC_REGSET(1) /* Tsi108/109 PIC */ /* Get the version of primary MPIC */ +#ifdef CONFIG_MPIC extern u32 fsl_mpic_primary_get_version(void); +#else +static inline u32 fsl_mpic_primary_get_version(void) +{ + return 0; +} +#endif /* Allocate the controller structure and setup the linux irq descs * for the range if interrupts passed in. No HW initialization is diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 6c82b9660c55..3ffb0ff5a038 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -128,14 +128,65 @@ extern int icache_44x_need_flush; #include <asm/nohash/32/pte-8xx.h> #endif -/* And here we include common definitions */ -#include <asm/pte-common.h> +/* + * Location of the PFN in the PTE. Most 32-bit platforms use the same + * as _PAGE_SHIFT here (ie, naturally aligned). + * Platform who don't just pre-define the value so we don't override it here. + */ +#ifndef PTE_RPN_SHIFT +#define PTE_RPN_SHIFT (PAGE_SHIFT) +#endif + +/* + * The mask covered by the RPN must be a ULL on 32-bit platforms with + * 64-bit PTEs. + */ +#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) +#define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1)) +#else +#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) +#endif + +/* + * _PAGE_CHG_MASK masks of bits that are to be preserved across + * pgprot changes. + */ +#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL) #ifndef __ASSEMBLY__ #define pte_clear(mm, addr, ptep) \ do { pte_update(ptep, ~0, 0); } while (0) +#ifndef pte_mkwrite +static inline pte_t pte_mkwrite(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_RW); +} +#endif + +static inline pte_t pte_mkdirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_DIRTY); +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_ACCESSED); +} + +#ifndef pte_wrprotect +static inline pte_t pte_wrprotect(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_RW); +} +#endif + +static inline pte_t pte_mkexec(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_EXEC); +} + #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD) #define pmd_present(pmd) (pmd_val(pmd) & _PMD_PRESENT_MASK) @@ -244,7 +295,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO); + unsigned long clr = ~pte_val(pte_wrprotect(__pte(~0))); + unsigned long set = pte_val(pte_wrprotect(__pte(0))); + + pte_update(ptep, clr, set); } static inline void __ptep_set_access_flags(struct vm_area_struct *vma, @@ -252,9 +306,10 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, int psize) { - unsigned long set = pte_val(entry) & - (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); - unsigned long clr = ~pte_val(entry) & (_PAGE_RO | _PAGE_NA); + pte_t pte_set = pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(0))))); + pte_t pte_clr = pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(~0))))); + unsigned long set = pte_val(entry) & pte_val(pte_set); + unsigned long clr = ~pte_val(entry) & ~pte_val(pte_clr); pte_update(ptep, clr, set); @@ -317,7 +372,7 @@ static inline int pte_young(pte_t pte) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -int map_kernel_page(unsigned long va, phys_addr_t pa, int flags); +int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/32/pte-40x.h b/arch/powerpc/include/asm/nohash/32/pte-40x.h index bb4b3a4b92a0..661f4599f2fc 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-40x.h +++ b/arch/powerpc/include/asm/nohash/32/pte-40x.h @@ -50,13 +50,56 @@ #define _PAGE_EXEC 0x200 /* hardware: EX permission */ #define _PAGE_ACCESSED 0x400 /* software: R: page referenced */ +/* No page size encoding in the linux PTE */ +#define _PAGE_PSIZE 0 + +/* cache related flags non existing on 40x */ +#define _PAGE_COHERENT 0 + +#define _PAGE_KERNEL_RO 0 +#define _PAGE_KERNEL_ROX _PAGE_EXEC +#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE) +#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE | _PAGE_EXEC) + #define _PMD_PRESENT 0x400 /* PMD points to page of PTEs */ +#define _PMD_PRESENT_MASK _PMD_PRESENT #define _PMD_BAD 0x802 #define _PMD_SIZE_4M 0x0c0 #define _PMD_SIZE_16M 0x0e0 +#define _PMD_USER 0 + +#define _PTE_NONE_MASK 0 /* Until my rework is finished, 40x still needs atomic PTE updates */ #define PTE_ATOMIC_UPDATES 1 +#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED) +#define _PAGE_BASE (_PAGE_BASE_NC) + +/* Permission masks used to generate the __P and __S table */ +#define PAGE_NONE __pgprot(_PAGE_BASE) +#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC) +#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) + +#ifndef __ASSEMBLY__ +static inline pte_t pte_wrprotect(pte_t pte) +{ + return __pte(pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE)); +} + +#define pte_wrprotect pte_wrprotect + +static inline pte_t pte_mkclean(pte_t pte) +{ + return __pte(pte_val(pte) & ~(_PAGE_DIRTY | _PAGE_HWWRITE)); +} + +#define pte_mkclean pte_mkclean +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_NOHASH_32_PTE_40x_H */ diff --git a/arch/powerpc/include/asm/nohash/32/pte-44x.h b/arch/powerpc/include/asm/nohash/32/pte-44x.h index f812c0272364..78bc304f750e 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-44x.h +++ b/arch/powerpc/include/asm/nohash/32/pte-44x.h @@ -85,14 +85,44 @@ #define _PAGE_NO_CACHE 0x00000400 /* H: I bit */ #define _PAGE_WRITETHRU 0x00000800 /* H: W bit */ +/* No page size encoding in the linux PTE */ +#define _PAGE_PSIZE 0 + +#define _PAGE_KERNEL_RO 0 +#define _PAGE_KERNEL_ROX _PAGE_EXEC +#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW) +#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) + /* TODO: Add large page lowmem mapping support */ #define _PMD_PRESENT 0 #define _PMD_PRESENT_MASK (PAGE_MASK) #define _PMD_BAD (~PAGE_MASK) +#define _PMD_USER 0 /* ERPN in a PTE never gets cleared, ignore it */ #define _PTE_NONE_MASK 0xffffffff00000000ULL +/* + * We define 2 sets of base prot bits, one for basic pages (ie, + * cacheable kernel and user pages) and one for non cacheable + * pages. We always set _PAGE_COHERENT when SMP is enabled or + * the processor might need it for DMA coherency. + */ +#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED) +#if defined(CONFIG_SMP) +#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT) +#else +#define _PAGE_BASE (_PAGE_BASE_NC) +#endif + +/* Permission masks used to generate the __P and __S table */ +#define PAGE_NONE __pgprot(_PAGE_BASE) +#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC) +#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_NOHASH_32_PTE_44x_H */ diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index f04cb46ae8a1..6bfe041ef59d 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -29,10 +29,10 @@ */ /* Definitions for 8xx embedded chips. */ -#define _PAGE_PRESENT 0x0001 /* Page is valid */ -#define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */ -#define _PAGE_PRIVILEGED 0x0004 /* No ASID (context) compare */ -#define _PAGE_HUGE 0x0008 /* SPS: Small Page Size (1 if 16k, 512k or 8M)*/ +#define _PAGE_PRESENT 0x0001 /* V: Page is valid */ +#define _PAGE_NO_CACHE 0x0002 /* CI: cache inhibit */ +#define _PAGE_SH 0x0004 /* SH: No ASID (context) compare */ +#define _PAGE_SPS 0x0008 /* SPS: Small Page Size (1 if 16k, 512k or 8M)*/ #define _PAGE_DIRTY 0x0100 /* C: page changed */ /* These 4 software bits must be masked out when the L2 entry is loaded @@ -46,18 +46,95 @@ #define _PAGE_NA 0x0200 /* Supervisor NA, User no access */ #define _PAGE_RO 0x0600 /* Supervisor RO, User no access */ +/* cache related flags non existing on 8xx */ +#define _PAGE_COHERENT 0 +#define _PAGE_WRITETHRU 0 + +#define _PAGE_KERNEL_RO (_PAGE_SH | _PAGE_RO) +#define _PAGE_KERNEL_ROX (_PAGE_SH | _PAGE_RO | _PAGE_EXEC) +#define _PAGE_KERNEL_RW (_PAGE_SH | _PAGE_DIRTY) +#define _PAGE_KERNEL_RWX (_PAGE_SH | _PAGE_DIRTY | _PAGE_EXEC) + #define _PMD_PRESENT 0x0001 +#define _PMD_PRESENT_MASK _PMD_PRESENT #define _PMD_BAD 0x0fd0 #define _PMD_PAGE_MASK 0x000c #define _PMD_PAGE_8M 0x000c #define _PMD_PAGE_512K 0x0004 #define _PMD_USER 0x0020 /* APG 1 */ +#define _PTE_NONE_MASK 0 + /* Until my rework is finished, 8xx still needs atomic PTE updates */ #define PTE_ATOMIC_UPDATES 1 #ifdef CONFIG_PPC_16K_PAGES -#define _PAGE_PSIZE _PAGE_HUGE +#define _PAGE_PSIZE _PAGE_SPS +#else +#define _PAGE_PSIZE 0 +#endif + +#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE) +#define _PAGE_BASE (_PAGE_BASE_NC) + +/* Permission masks used to generate the __P and __S table */ +#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_NA) +#define PAGE_SHARED __pgprot(_PAGE_BASE) +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_EXEC) +#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_RO) +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_RO | _PAGE_EXEC) +#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_RO) +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_RO | _PAGE_EXEC) + +#ifndef __ASSEMBLY__ +static inline pte_t pte_wrprotect(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_RO); +} + +#define pte_wrprotect pte_wrprotect + +static inline int pte_write(pte_t pte) +{ + return !(pte_val(pte) & _PAGE_RO); +} + +#define pte_write pte_write + +static inline pte_t pte_mkwrite(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_RO); +} + +#define pte_mkwrite pte_mkwrite + +static inline bool pte_user(pte_t pte) +{ + return !(pte_val(pte) & _PAGE_SH); +} + +#define pte_user pte_user + +static inline pte_t pte_mkprivileged(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_SH); +} + +#define pte_mkprivileged pte_mkprivileged + +static inline pte_t pte_mkuser(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_SH); +} + +#define pte_mkuser pte_mkuser + +static inline pte_t pte_mkhuge(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_SPS); +} + +#define pte_mkhuge pte_mkhuge #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h b/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h index d1ee24e9e137..0fc1bd42bb3e 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h +++ b/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h @@ -31,11 +31,44 @@ #define _PAGE_WRITETHRU 0x00400 /* H: W bit */ #define _PAGE_SPECIAL 0x00800 /* S: Special page */ +#define _PAGE_KERNEL_RO 0 +#define _PAGE_KERNEL_ROX _PAGE_EXEC +#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW) +#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) + +/* No page size encoding in the linux PTE */ +#define _PAGE_PSIZE 0 + #define _PMD_PRESENT 0 #define _PMD_PRESENT_MASK (PAGE_MASK) #define _PMD_BAD (~PAGE_MASK) +#define _PMD_USER 0 + +#define _PTE_NONE_MASK 0 #define PTE_WIMGE_SHIFT (6) +/* + * We define 2 sets of base prot bits, one for basic pages (ie, + * cacheable kernel and user pages) and one for non cacheable + * pages. We always set _PAGE_COHERENT when SMP is enabled or + * the processor might need it for DMA coherency. + */ +#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED) +#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC) +#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT) +#else +#define _PAGE_BASE (_PAGE_BASE_NC) +#endif + +/* Permission masks used to generate the __P and __S table */ +#define PAGE_NONE __pgprot(_PAGE_BASE) +#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC) +#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_NOHASH_32_PTE_FSL_BOOKE_H */ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 68283e632f04..67421f74efcf 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -89,11 +89,47 @@ * Include the PTE bits definitions */ #include <asm/nohash/pte-book3e.h> -#include <asm/pte-common.h> + +#define _PAGE_SAO 0 + +#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) + +/* + * _PAGE_CHG_MASK masks of bits that are to be preserved across + * pgprot changes. + */ +#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL) + +#define H_PAGE_4K_PFN 0 #ifndef __ASSEMBLY__ /* pte_clear moved to later in this file */ +static inline pte_t pte_mkwrite(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_RW); +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_DIRTY); +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_ACCESSED); +} + +static inline pte_t pte_wrprotect(pte_t pte) +{ + return __pte(pte_val(pte) & ~_PAGE_RW); +} + +static inline pte_t pte_mkexec(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_EXEC); +} + #define PMD_BAD_BITS (PTE_TABLE_SIZE-1) #define PUD_BAD_BITS (PMD_TABLE_SIZE-1) @@ -328,8 +364,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __swp_entry_to_pte(x) __pte((x).val) -extern int map_kernel_page(unsigned long ea, unsigned long pa, - unsigned long flags); +int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot); extern int __meminit vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys); diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index b321c82b3624..70ff23974b59 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -8,18 +8,50 @@ #include <asm/nohash/32/pgtable.h> #endif +/* Permission masks used for kernel mappings */ +#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) +#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE) +#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ + _PAGE_NO_CACHE | _PAGE_GUARDED) +#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX) +#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO) +#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX) + +/* + * Protection used for kernel text. We want the debuggers to be able to + * set breakpoints anywhere, so don't write protect the kernel text + * on platforms where such control is possible. + */ +#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\ + defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE) +#define PAGE_KERNEL_TEXT PAGE_KERNEL_X +#else +#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX +#endif + +/* Make modules code happy. We don't set RO yet */ +#define PAGE_KERNEL_EXEC PAGE_KERNEL_X + +/* Advertise special mapping type for AGP */ +#define PAGE_AGP (PAGE_KERNEL_NC) +#define HAVE_PAGE_AGP + #ifndef __ASSEMBLY__ /* Generic accessors to PTE bits */ +#ifndef pte_write static inline int pte_write(pte_t pte) { - return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO; + return pte_val(pte) & _PAGE_RW; } +#endif static inline int pte_read(pte_t pte) { return 1; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; } -static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } +static inline bool pte_hashpte(pte_t pte) { return false; } +static inline bool pte_ci(pte_t pte) { return pte_val(pte) & _PAGE_NO_CACHE; } +static inline bool pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; } #ifdef CONFIG_NUMA_BALANCING /* @@ -29,8 +61,7 @@ static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PA */ static inline int pte_protnone(pte_t pte) { - return (pte_val(pte) & - (_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT; + return pte_present(pte) && !pte_user(pte); } static inline int pmd_protnone(pmd_t pmd) @@ -44,6 +75,23 @@ static inline int pte_present(pte_t pte) return pte_val(pte) & _PAGE_PRESENT; } +static inline bool pte_hw_valid(pte_t pte) +{ + return pte_val(pte) & _PAGE_PRESENT; +} + +/* + * Don't just check for any non zero bits in __PAGE_USER, since for book3e + * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in + * _PAGE_USER. Need to explicitly match _PAGE_BAP_UR bit in that case too. + */ +#ifndef pte_user +static inline bool pte_user(pte_t pte) +{ + return (pte_val(pte) & _PAGE_USER) == _PAGE_USER; +} +#endif + /* * We only find page table entry in the last level * Hence no need for other accessors @@ -77,53 +125,53 @@ static inline unsigned long pte_pfn(pte_t pte) { return pte_val(pte) >> PTE_RPN_SHIFT; } /* Generic modifiers for PTE bits */ -static inline pte_t pte_wrprotect(pte_t pte) +static inline pte_t pte_exprotect(pte_t pte) { - pte_basic_t ptev; - - ptev = pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE); - ptev |= _PAGE_RO; - return __pte(ptev); + return __pte(pte_val(pte) & ~_PAGE_EXEC); } +#ifndef pte_mkclean static inline pte_t pte_mkclean(pte_t pte) { - return __pte(pte_val(pte) & ~(_PAGE_DIRTY | _PAGE_HWWRITE)); + return __pte(pte_val(pte) & ~_PAGE_DIRTY); } +#endif static inline pte_t pte_mkold(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_ACCESSED); } -static inline pte_t pte_mkwrite(pte_t pte) +static inline pte_t pte_mkpte(pte_t pte) { - pte_basic_t ptev; - - ptev = pte_val(pte) & ~_PAGE_RO; - ptev |= _PAGE_RW; - return __pte(ptev); + return pte; } -static inline pte_t pte_mkdirty(pte_t pte) +static inline pte_t pte_mkspecial(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_DIRTY); + return __pte(pte_val(pte) | _PAGE_SPECIAL); } -static inline pte_t pte_mkyoung(pte_t pte) +#ifndef pte_mkhuge +static inline pte_t pte_mkhuge(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_ACCESSED); + return __pte(pte_val(pte)); } +#endif -static inline pte_t pte_mkspecial(pte_t pte) +#ifndef pte_mkprivileged +static inline pte_t pte_mkprivileged(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_SPECIAL); + return __pte(pte_val(pte) & ~_PAGE_USER); } +#endif -static inline pte_t pte_mkhuge(pte_t pte) +#ifndef pte_mkuser +static inline pte_t pte_mkuser(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_HUGE); + return __pte(pte_val(pte) | _PAGE_USER); } +#endif static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { @@ -197,6 +245,8 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addre #if _PAGE_WRITETHRU != 0 #define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \ _PAGE_COHERENT | _PAGE_WRITETHRU)) +#else +#define pgprot_cached_wthru(prot) pgprot_noncached(prot) #endif #define pgprot_cached_noncoherent(prot) \ diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h index 12730b81cd98..dd40d200f274 100644 --- a/arch/powerpc/include/asm/nohash/pte-book3e.h +++ b/arch/powerpc/include/asm/nohash/pte-book3e.h @@ -77,7 +77,48 @@ #define _PMD_PRESENT 0 #define _PMD_PRESENT_MASK (PAGE_MASK) #define _PMD_BAD (~PAGE_MASK) +#define _PMD_USER 0 +#else +#define _PTE_NONE_MASK 0 +#endif + +/* + * We define 2 sets of base prot bits, one for basic pages (ie, + * cacheable kernel and user pages) and one for non cacheable + * pages. We always set _PAGE_COHERENT when SMP is enabled or + * the processor might need it for DMA coherency. + */ +#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE) +#if defined(CONFIG_SMP) +#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT) +#else +#define _PAGE_BASE (_PAGE_BASE_NC) #endif +/* Permission masks used to generate the __P and __S table */ +#define PAGE_NONE __pgprot(_PAGE_BASE) +#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC) +#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) + +#ifndef __ASSEMBLY__ +static inline pte_t pte_mkprivileged(pte_t pte) +{ + return __pte((pte_val(pte) & ~_PAGE_USER) | _PAGE_PRIVILEGED); +} + +#define pte_mkprivileged pte_mkprivileged + +static inline pte_t pte_mkuser(pte_t pte) +{ + return __pte((pte_val(pte) & ~_PAGE_PRIVILEGED) | _PAGE_USER); +} + +#define pte_mkuser pte_mkuser +#endif /* __ASSEMBLY__ */ + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_NOHASH_PTE_BOOK3E_H */ diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 8365353330b4..870fb7b239ea 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -1050,6 +1050,7 @@ enum OpalSysCooling { enum { OPAL_REBOOT_NORMAL = 0, OPAL_REBOOT_PLATFORM_ERROR = 1, + OPAL_REBOOT_FULL_IPL = 2, }; /* Argument to OPAL_PCI_TCE_KILL */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index ad4f16164619..e843bc5d1a0f 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -113,7 +113,13 @@ struct paca_struct { * on the linear mapping */ /* SLB related definitions */ u16 vmalloc_sllp; - u16 slb_cache_ptr; + u8 slb_cache_ptr; + u8 stab_rr; /* stab/slb round-robin counter */ +#ifdef CONFIG_DEBUG_VM + u8 in_kernel_slb_handler; +#endif + u32 slb_used_bitmap; /* Bitmaps for first 32 SLB entries. */ + u32 slb_kern_bitmap; u32 slb_cache[SLB_CACHE_ENTRIES]; #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -160,7 +166,6 @@ struct paca_struct { */ struct task_struct *__current; /* Pointer to current */ u64 kstack; /* Saved Kernel stack addr */ - u64 stab_rr; /* stab/slb round-robin counter */ u64 saved_r1; /* r1 save for RTAS calls or PM or EE=0 */ u64 saved_msr; /* MSR saved here by enter_rtas */ u16 trap_save; /* Used when bad stack is encountered */ @@ -250,6 +255,15 @@ struct paca_struct { #ifdef CONFIG_PPC_PSERIES u8 *mce_data_buf; /* buffer to hold per cpu rtas errlog */ #endif /* CONFIG_PPC_PSERIES */ + +#ifdef CONFIG_PPC_BOOK3S_64 + /* Capture SLB related old contents in MCE handler. */ + struct slb_entry *mce_faulty_slbs; + u16 slb_save_cache_ptr; +#endif /* CONFIG_PPC_BOOK3S_64 */ +#ifdef CONFIG_STACKPROTECTOR + unsigned long canary; +#endif } ____cacheline_aligned; extern void copy_mm_to_paca(struct mm_struct *mm); diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 14c79a7dc855..9679b7519a35 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -20,6 +20,25 @@ struct mm_struct; #include <asm/nohash/pgtable.h> #endif /* !CONFIG_PPC_BOOK3S */ +/* Note due to the way vm flags are laid out, the bits are XWR */ +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY_X +#define __P101 PAGE_READONLY_X +#define __P110 PAGE_COPY_X +#define __P111 PAGE_COPY_X + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY_X +#define __S101 PAGE_READONLY_X +#define __S110 PAGE_SHARED_X +#define __S111 PAGE_SHARED_X + #ifndef __ASSEMBLY__ #include <asm/tlbflush.h> @@ -27,6 +46,16 @@ struct mm_struct; /* Keep these as a macros to avoid include dependency mess */ #define pte_page(x) pfn_to_page(pte_pfn(x)) #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) +/* + * Select all bits except the pfn + */ +static inline pgprot_t pte_pgprot(pte_t pte) +{ + unsigned long pte_flags; + + pte_flags = pte_val(pte) & ~PTE_RPN_MASK; + return __pgprot(pte_flags); +} /* * ZERO_PAGE is a global shared page that is always zero: used diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 726288048652..f67da277d652 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -58,6 +58,7 @@ void eeh_save_bars(struct eeh_dev *edev); int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); void eeh_pe_state_mark(struct eeh_pe *pe, int state); +void eeh_pe_mark_isolated(struct eeh_pe *pe); void eeh_pe_state_clear(struct eeh_pe *pe, int state); void eeh_pe_state_mark_with_cfg(struct eeh_pe *pe, int state); void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode); diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 52fadded5c1e..7d04d60a39c9 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -32,9 +32,9 @@ /* Default SMT priority is set to 3. Use 11- 13bits to save priority. */ #define PPR_PRIORITY 3 #ifdef __ASSEMBLY__ -#define INIT_PPR (PPR_PRIORITY << 50) +#define DEFAULT_PPR (PPR_PRIORITY << 50) #else -#define INIT_PPR ((u64)PPR_PRIORITY << 50) +#define DEFAULT_PPR ((u64)PPR_PRIORITY << 50) #endif /* __ASSEMBLY__ */ #endif /* CONFIG_PPC64 */ @@ -273,6 +273,7 @@ struct thread_struct { #endif /* CONFIG_HAVE_HW_BREAKPOINT */ struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */ unsigned long trap_nr; /* last trap # on this thread */ + u8 load_slb; /* Ages out SLB preload cache entries */ u8 load_fp; #ifdef CONFIG_ALTIVEC u8 load_vec; @@ -341,7 +342,6 @@ struct thread_struct { * onwards. */ int dscr_inherit; - unsigned long ppr; /* used to save/restore SMT priority */ unsigned long tidr; #endif #ifdef CONFIG_PPC_BOOK3S_64 @@ -389,7 +389,6 @@ struct thread_struct { .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \ .addr_limit = KERNEL_DS, \ .fpexc_mode = 0, \ - .ppr = INIT_PPR, \ .fscr = FSCR_TAR | FSCR_EBB \ } #endif diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h deleted file mode 100644 index bef56141a549..000000000000 --- a/arch/powerpc/include/asm/pte-common.h +++ /dev/null @@ -1,219 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Included from asm/pgtable-*.h only ! */ - -/* - * Some bits are only used on some cpu families... Make sure that all - * the undefined gets a sensible default - */ -#ifndef _PAGE_HASHPTE -#define _PAGE_HASHPTE 0 -#endif -#ifndef _PAGE_HWWRITE -#define _PAGE_HWWRITE 0 -#endif -#ifndef _PAGE_EXEC -#define _PAGE_EXEC 0 -#endif -#ifndef _PAGE_ENDIAN -#define _PAGE_ENDIAN 0 -#endif -#ifndef _PAGE_COHERENT -#define _PAGE_COHERENT 0 -#endif -#ifndef _PAGE_WRITETHRU -#define _PAGE_WRITETHRU 0 -#endif -#ifndef _PAGE_4K_PFN -#define _PAGE_4K_PFN 0 -#endif -#ifndef _PAGE_SAO -#define _PAGE_SAO 0 -#endif -#ifndef _PAGE_PSIZE -#define _PAGE_PSIZE 0 -#endif -/* _PAGE_RO and _PAGE_RW shall not be defined at the same time */ -#ifndef _PAGE_RO -#define _PAGE_RO 0 -#else -#define _PAGE_RW 0 -#endif - -#ifndef _PAGE_PTE -#define _PAGE_PTE 0 -#endif -/* At least one of _PAGE_PRIVILEGED or _PAGE_USER must be defined */ -#ifndef _PAGE_PRIVILEGED -#define _PAGE_PRIVILEGED 0 -#else -#ifndef _PAGE_USER -#define _PAGE_USER 0 -#endif -#endif -#ifndef _PAGE_NA -#define _PAGE_NA 0 -#endif -#ifndef _PAGE_HUGE -#define _PAGE_HUGE 0 -#endif - -#ifndef _PMD_PRESENT_MASK -#define _PMD_PRESENT_MASK _PMD_PRESENT -#endif -#ifndef _PMD_USER -#define _PMD_USER 0 -#endif -#ifndef _PAGE_KERNEL_RO -#define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_RO) -#endif -#ifndef _PAGE_KERNEL_ROX -#define _PAGE_KERNEL_ROX (_PAGE_PRIVILEGED | _PAGE_RO | _PAGE_EXEC) -#endif -#ifndef _PAGE_KERNEL_RW -#define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \ - _PAGE_HWWRITE) -#endif -#ifndef _PAGE_KERNEL_RWX -#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \ - _PAGE_HWWRITE | _PAGE_EXEC) -#endif -#ifndef _PAGE_HPTEFLAGS -#define _PAGE_HPTEFLAGS _PAGE_HASHPTE -#endif -#ifndef _PTE_NONE_MASK -#define _PTE_NONE_MASK _PAGE_HPTEFLAGS -#endif - -#ifndef __ASSEMBLY__ - -/* - * Don't just check for any non zero bits in __PAGE_USER, since for book3e - * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in - * _PAGE_USER. Need to explicitly match _PAGE_BAP_UR bit in that case too. - */ -static inline bool pte_user(pte_t pte) -{ - return (pte_val(pte) & (_PAGE_USER | _PAGE_PRIVILEGED)) == _PAGE_USER; -} -#endif /* __ASSEMBLY__ */ - -/* Location of the PFN in the PTE. Most 32-bit platforms use the same - * as _PAGE_SHIFT here (ie, naturally aligned). - * Platform who don't just pre-define the value so we don't override it here - */ -#ifndef PTE_RPN_SHIFT -#define PTE_RPN_SHIFT (PAGE_SHIFT) -#endif - -/* The mask covered by the RPN must be a ULL on 32-bit platforms with - * 64-bit PTEs - */ -#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) -#define PTE_RPN_MASK (~((1ULL<<PTE_RPN_SHIFT)-1)) -#else -#define PTE_RPN_MASK (~((1UL<<PTE_RPN_SHIFT)-1)) -#endif - -/* _PAGE_CHG_MASK masks of bits that are to be preserved across - * pgprot changes - */ -#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ - _PAGE_ACCESSED | _PAGE_SPECIAL) - -/* Mask of bits returned by pte_pgprot() */ -#define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \ - _PAGE_WRITETHRU | _PAGE_ENDIAN | _PAGE_4K_PFN | \ - _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | _PAGE_NA | \ - _PAGE_PRIVILEGED | \ - _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC) - -/* - * We define 2 sets of base prot bits, one for basic pages (ie, - * cacheable kernel and user pages) and one for non cacheable - * pages. We always set _PAGE_COHERENT when SMP is enabled or - * the processor might need it for DMA coherency. - */ -#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE) -#if defined(CONFIG_SMP) || defined(CONFIG_PPC_STD_MMU) || \ - defined(CONFIG_PPC_E500MC) -#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT) -#else -#define _PAGE_BASE (_PAGE_BASE_NC) -#endif - -/* Permission masks used to generate the __P and __S table, - * - * Note:__pgprot is defined in arch/powerpc/include/asm/page.h - * - * Write permissions imply read permissions for now (we could make write-only - * pages on BookE but we don't bother for now). Execute permission control is - * possible on platforms that define _PAGE_EXEC - * - * Note due to the way vm flags are laid out, the bits are XWR - */ -#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_NA) -#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \ - _PAGE_EXEC) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO | \ - _PAGE_EXEC) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO | \ - _PAGE_EXEC) - -#define __P000 PAGE_NONE -#define __P001 PAGE_READONLY -#define __P010 PAGE_COPY -#define __P011 PAGE_COPY -#define __P100 PAGE_READONLY_X -#define __P101 PAGE_READONLY_X -#define __P110 PAGE_COPY_X -#define __P111 PAGE_COPY_X - -#define __S000 PAGE_NONE -#define __S001 PAGE_READONLY -#define __S010 PAGE_SHARED -#define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_X -#define __S101 PAGE_READONLY_X -#define __S110 PAGE_SHARED_X -#define __S111 PAGE_SHARED_X - -/* Permission masks used for kernel mappings */ -#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) -#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ - _PAGE_NO_CACHE) -#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ - _PAGE_NO_CACHE | _PAGE_GUARDED) -#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX) -#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO) -#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX) - -/* Protection used for kernel text. We want the debuggers to be able to - * set breakpoints anywhere, so don't write protect the kernel text - * on platforms where such control is possible. - */ -#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\ - defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE) -#define PAGE_KERNEL_TEXT PAGE_KERNEL_X -#else -#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX -#endif - -/* Make modules code happy. We don't set RO yet */ -#define PAGE_KERNEL_EXEC PAGE_KERNEL_X - -/* Advertise special mapping type for AGP */ -#define PAGE_AGP (PAGE_KERNEL_NC) -#define HAVE_PAGE_AGP - -#ifndef _PAGE_READ -/* if not defined, we should not find _PAGE_WRITE too */ -#define _PAGE_READ 0 -#define _PAGE_WRITE _PAGE_RW -#endif - -#ifndef H_PAGE_4K_PFN -#define H_PAGE_4K_PFN 0 -#endif diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 5b480e1d5909..f73886a1a7f5 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -26,6 +26,37 @@ #include <uapi/asm/ptrace.h> #include <asm/asm-const.h> +#ifndef __ASSEMBLY__ +struct pt_regs +{ + union { + struct user_pt_regs user_regs; + struct { + unsigned long gpr[32]; + unsigned long nip; + unsigned long msr; + unsigned long orig_gpr3; + unsigned long ctr; + unsigned long link; + unsigned long xer; + unsigned long ccr; +#ifdef CONFIG_PPC64 + unsigned long softe; +#else + unsigned long mq; +#endif + unsigned long trap; + unsigned long dar; + unsigned long dsisr; + unsigned long result; + }; + }; + +#ifdef CONFIG_PPC64 + unsigned long ppr; +#endif +}; +#endif #ifdef __powerpc64__ @@ -102,6 +133,11 @@ static inline long regs_return_value(struct pt_regs *regs) return -regs->gpr[3]; } +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->gpr[3] = rc; +} + #ifdef __powerpc64__ #define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1) #else diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c90698972f42..de52c3166ba4 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -118,11 +118,16 @@ #define MSR_TS_S __MASK(MSR_TS_S_LG) /* Transaction Suspended */ #define MSR_TS_T __MASK(MSR_TS_T_LG) /* Transaction Transactional */ #define MSR_TS_MASK (MSR_TS_T | MSR_TS_S) /* Transaction State bits */ -#define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */ #define MSR_TM_RESV(x) (((x) & MSR_TS_MASK) == MSR_TS_MASK) /* Reserved */ #define MSR_TM_TRANSACTIONAL(x) (((x) & MSR_TS_MASK) == MSR_TS_T) #define MSR_TM_SUSPENDED(x) (((x) & MSR_TS_MASK) == MSR_TS_S) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +#define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */ +#else +#define MSR_TM_ACTIVE(x) 0 +#endif + #if defined(CONFIG_PPC_BOOK3S_64) #define MSR_64BIT MSR_SF diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 71e393c46a49..bb38dd67d47d 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -125,6 +125,7 @@ struct rtas_suspend_me_data { #define RTAS_TYPE_INFO 0xE2 #define RTAS_TYPE_DEALLOC 0xE3 #define RTAS_TYPE_DUMP 0xE4 +#define RTAS_TYPE_HOTPLUG 0xE5 /* I don't add PowerMGM events right now, this is a different topic */ #define RTAS_TYPE_PMGM_POWER_SW_ON 0x60 #define RTAS_TYPE_PMGM_POWER_SW_OFF 0x61 @@ -185,11 +186,23 @@ static inline uint8_t rtas_error_disposition(const struct rtas_error_log *elog) return (elog->byte1 & 0x18) >> 3; } +static inline +void rtas_set_disposition_recovered(struct rtas_error_log *elog) +{ + elog->byte1 &= ~0x18; + elog->byte1 |= (RTAS_DISP_FULLY_RECOVERED << 3); +} + static inline uint8_t rtas_error_extended(const struct rtas_error_log *elog) { return (elog->byte1 & 0x04) >> 2; } +static inline uint8_t rtas_error_initiator(const struct rtas_error_log *elog) +{ + return (elog->byte2 & 0xf0) >> 4; +} + #define rtas_error_type(x) ((x)->byte3) static inline @@ -275,6 +288,7 @@ inline uint32_t rtas_ext_event_company_id(struct rtas_ext_event_log_v6 *ext_log) #define PSERIES_ELOG_SECT_ID_CALL_HOME (('C' << 8) | 'H') #define PSERIES_ELOG_SECT_ID_USER_DEF (('U' << 8) | 'D') #define PSERIES_ELOG_SECT_ID_HOTPLUG (('H' << 8) | 'P') +#define PSERIES_ELOG_SECT_ID_MCE (('M' << 8) | 'C') /* Vendor specific Platform Event Log Format, Version 6, section header */ struct pseries_errorlog { @@ -316,6 +330,7 @@ struct pseries_hp_errorlog { #define PSERIES_HP_ELOG_RESOURCE_MEM 2 #define PSERIES_HP_ELOG_RESOURCE_SLOT 3 #define PSERIES_HP_ELOG_RESOURCE_PHB 4 +#define PSERIES_HP_ELOG_RESOURCE_PMEM 6 #define PSERIES_HP_ELOG_ACTION_ADD 1 #define PSERIES_HP_ELOG_ACTION_REMOVE 2 diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h index e40406cf5628..a595461c9cb0 100644 --- a/arch/powerpc/include/asm/slice.h +++ b/arch/powerpc/include/asm/slice.h @@ -32,6 +32,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start, unsigned long len, unsigned int psize); void slice_init_new_context_exec(struct mm_struct *mm); +void slice_setup_new_exec(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 95b66a0c639b..41695745032c 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -100,6 +100,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys) DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_var_t, cpu_l2_cache_map); DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); +DECLARE_PER_CPU(cpumask_var_t, cpu_smallcore_map); static inline struct cpumask *cpu_sibling_mask(int cpu) { @@ -116,6 +117,11 @@ static inline struct cpumask *cpu_l2_cache_mask(int cpu) return per_cpu(cpu_l2_cache_map, cpu); } +static inline struct cpumask *cpu_smallcore_mask(int cpu) +{ + return per_cpu(cpu_smallcore_map, cpu); +} + extern int cpu_to_core_id(int cpu); /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers. @@ -166,6 +172,11 @@ static inline const struct cpumask *cpu_sibling_mask(int cpu) return cpumask_of(cpu); } +static inline const struct cpumask *cpu_smallcore_mask(int cpu) +{ + return cpumask_of(cpu); +} + #endif /* CONFIG_SMP */ #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index 28f5dae25db6..68da49320592 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -9,17 +9,6 @@ * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space */ #define SECTION_SIZE_BITS 24 -/* - * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS - * if we increase SECTIONS_WIDTH we will not store node details in page->flags and - * page_to_nid does a page->section->node lookup - * Hence only increase for VMEMMAP. - */ -#ifdef CONFIG_SPARSEMEM_VMEMMAP -#define MAX_PHYSMEM_BITS 47 -#else -#define MAX_PHYSMEM_BITS 46 -#endif #endif /* CONFIG_SPARSEMEM */ diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h new file mode 100644 index 000000000000..1c8460e23583 --- /dev/null +++ b/arch/powerpc/include/asm/stackprotector.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * GCC stack protector support. + * + */ + +#ifndef _ASM_STACKPROTECTOR_H +#define _ASM_STACKPROTECTOR_H + +#include <linux/random.h> +#include <linux/version.h> +#include <asm/reg.h> +#include <asm/current.h> +#include <asm/paca.h> + +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + unsigned long canary; + + /* Try to get a semi random initial value. */ + canary = get_random_canary(); + canary ^= mftb(); + canary ^= LINUX_VERSION_CODE; + canary &= CANARY_MASK; + + current->stack_canary = canary; +#ifdef CONFIG_PPC64 + get_paca()->canary = canary; +#endif +} + +#endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 3c0002044bc9..544cac0474cb 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -29,6 +29,7 @@ #include <asm/page.h> #include <asm/accounting.h> +#define SLB_PRELOAD_NR 16U /* * low level task data. */ @@ -44,6 +45,10 @@ struct thread_info { #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC32) struct cpu_accounting_data accounting; #endif + unsigned char slb_preload_nr; + unsigned char slb_preload_tail; + u32 slb_preload_esid[SLB_PRELOAD_NR]; + /* low level flags - has atomic operations done on it */ unsigned long flags ____cacheline_aligned_in_smp; }; @@ -72,6 +77,12 @@ static inline struct thread_info *current_thread_info(void) } extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); + +#ifdef CONFIG_PPC_BOOK3S_64 +void arch_setup_new_exec(void); +#define arch_setup_new_exec arch_setup_new_exec +#endif + #endif /* __ASSEMBLY__ */ /* @@ -81,7 +92,7 @@ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ #define TIF_FSCHECK 3 /* Check FS is USER_DS on return */ -#define TIF_32BIT 4 /* 32 bit binary */ +#define TIF_SYSCALL_EMU 4 /* syscall emulation active */ #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ #define TIF_PATCH_PENDING 6 /* pending live patching update */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ @@ -100,6 +111,7 @@ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src #define TIF_ELF2ABI 18 /* function descriptors must die! */ #endif #define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */ +#define TIF_32BIT 20 /* 32 bit binary */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -120,9 +132,10 @@ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) #define _TIF_NOHZ (1<<TIF_NOHZ) #define _TIF_FSCHECK (1<<TIF_FSCHECK) +#define _TIF_SYSCALL_EMU (1<<TIF_SYSCALL_EMU) #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ - _TIF_NOHZ) + _TIF_NOHZ | _TIF_SYSCALL_EMU) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index d018e8602694..58ef8c43a89d 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -201,6 +201,21 @@ TRACE_EVENT(tlbie, __entry->r) ); +TRACE_EVENT(tlbia, + + TP_PROTO(unsigned long id), + TP_ARGS(id), + TP_STRUCT__entry( + __field(unsigned long, id) + ), + + TP_fast_assign( + __entry->id = id; + ), + + TP_printk("ctx.id=0x%lx", __entry->id) +); + #endif /* _TRACE_POWERPC_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index bac225bb7f64..15bea9a0f260 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -260,7 +260,7 @@ do { \ ({ \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ @@ -274,7 +274,7 @@ do { \ ({ \ long __gu_err = -EFAULT; \ __long_type(*(ptr)) __gu_val = 0; \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ might_fault(); \ if (access_ok(VERIFY_READ, __gu_addr, (size))) { \ barrier_nospec(); \ @@ -288,7 +288,7 @@ do { \ ({ \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ barrier_nospec(); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ diff --git a/arch/powerpc/include/asm/user.h b/arch/powerpc/include/asm/user.h index 5c0e082eae7b..99443b8594e7 100644 --- a/arch/powerpc/include/asm/user.h +++ b/arch/powerpc/include/asm/user.h @@ -31,7 +31,7 @@ * to write an integer number of pages. */ struct user { - struct pt_regs regs; /* entire machine state */ + struct user_pt_regs regs; /* entire machine state */ size_t u_tsize; /* text size (pages) */ size_t u_dsize; /* data size (pages) */ size_t u_ssize; /* stack size (pages) */ diff --git a/arch/powerpc/include/uapi/asm/ptrace.h b/arch/powerpc/include/uapi/asm/ptrace.h index 5e3edc2a7634..f5f1ccc740fc 100644 --- a/arch/powerpc/include/uapi/asm/ptrace.h +++ b/arch/powerpc/include/uapi/asm/ptrace.h @@ -29,7 +29,12 @@ #ifndef __ASSEMBLY__ -struct pt_regs { +#ifdef __KERNEL__ +struct user_pt_regs +#else +struct pt_regs +#endif +{ unsigned long gpr[32]; unsigned long nip; unsigned long msr; @@ -160,6 +165,10 @@ struct pt_regs { #define PTRACE_GETVSRREGS 0x1b #define PTRACE_SETVSRREGS 0x1c +/* Syscall emulation defines */ +#define PTRACE_SYSEMU 0x1d +#define PTRACE_SYSEMU_SINGLESTEP 0x1e + /* * Get or set a debug register. The first 16 are DABR registers and the * second 16 are IABR registers. diff --git a/arch/powerpc/include/uapi/asm/sigcontext.h b/arch/powerpc/include/uapi/asm/sigcontext.h index 2fbe485acdb4..630aeda56d59 100644 --- a/arch/powerpc/include/uapi/asm/sigcontext.h +++ b/arch/powerpc/include/uapi/asm/sigcontext.h @@ -22,7 +22,11 @@ struct sigcontext { #endif unsigned long handler; unsigned long oldmask; - struct pt_regs __user *regs; +#ifdef __KERNEL__ + struct user_pt_regs __user *regs; +#else + struct pt_regs *regs; +#endif #ifdef __powerpc64__ elf_gregset_t gp_regs; elf_fpregset_t fp_regs; diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 3b66f2c19c84..53d4b8d5b54d 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -5,7 +5,8 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' -subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror +# Disable clang warning for using setjmp without setjmp.h header +CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header) ifdef CONFIG_PPC64 CFLAGS_prom_init.o += $(NO_MINIMAL_TOC) @@ -20,12 +21,14 @@ CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector) + ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code -CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_cputable.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_prom_init.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_btext.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_prom.o = $(CC_FLAGS_FTRACE) endif obj-y := cputable.o ptrace.o syscalls.o \ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d68b9ef38328..9ffc72ded73a 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -79,11 +79,16 @@ int main(void) { OFFSET(THREAD, task_struct, thread); OFFSET(MM, task_struct, mm); +#ifdef CONFIG_STACKPROTECTOR + OFFSET(TASK_CANARY, task_struct, stack_canary); +#ifdef CONFIG_PPC64 + OFFSET(PACA_CANARY, paca_struct, canary); +#endif +#endif OFFSET(MMCONTEXTID, mm_struct, context.id); #ifdef CONFIG_PPC64 DEFINE(SIGSEGV, SIGSEGV); DEFINE(NMI_MASK, NMI_MASK); - OFFSET(TASKTHREADPPR, task_struct, thread.ppr); #else OFFSET(THREAD_INFO, task_struct, stack); DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16)); @@ -173,7 +178,6 @@ int main(void) OFFSET(PACAKSAVE, paca_struct, kstack); OFFSET(PACACURRENT, paca_struct, __current); OFFSET(PACASAVEDMSR, paca_struct, saved_msr); - OFFSET(PACASTABRR, paca_struct, stab_rr); OFFSET(PACAR1, paca_struct, saved_r1); OFFSET(PACATOC, paca_struct, kernel_toc); OFFSET(PACAKBASE, paca_struct, kernelbase); @@ -212,6 +216,7 @@ int main(void) #ifdef CONFIG_PPC_BOOK3S_64 OFFSET(PACASLBCACHE, paca_struct, slb_cache); OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr); + OFFSET(PACASTABRR, paca_struct, stab_rr); OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp); #ifdef CONFIG_PPC_MM_SLICES OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp); @@ -274,11 +279,6 @@ int main(void) /* Interrupt register frame */ DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); -#ifdef CONFIG_PPC64 - /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ - DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); - DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); -#endif /* CONFIG_PPC64 */ STACK_PT_REGS_OFFSET(GPR0, gpr[0]); STACK_PT_REGS_OFFSET(GPR1, gpr[1]); STACK_PT_REGS_OFFSET(GPR2, gpr[2]); @@ -322,10 +322,7 @@ int main(void) STACK_PT_REGS_OFFSET(_ESR, dsisr); #else /* CONFIG_PPC64 */ STACK_PT_REGS_OFFSET(SOFTE, softe); - - /* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */ - DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)); - DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8); + STACK_PT_REGS_OFFSET(_PPR, ppr); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_PPC32) diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index b2072d5bbf2b..b4241ed1456e 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -163,7 +163,7 @@ void btext_map(void) offset = ((unsigned long) dispDeviceBase) - base; size = dispDeviceRowBytes * dispDeviceRect[3] + offset + dispDeviceRect[0]; - vbase = __ioremap(base, size, pgprot_val(pgprot_noncached_wc(__pgprot(0)))); + vbase = ioremap_wc(base, size); if (!vbase) return; logicalDisplayBase = vbase + offset; diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index a8f20e5928e1..be57bd07596d 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -20,6 +20,8 @@ #include <linux/percpu.h> #include <linux/slab.h> #include <asm/prom.h> +#include <asm/cputhreads.h> +#include <asm/smp.h> #include "cacheinfo.h" @@ -627,17 +629,48 @@ static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char * static struct kobj_attribute cache_level_attr = __ATTR(level, 0444, level_show, NULL); +static unsigned int index_dir_to_cpu(struct cache_index_dir *index) +{ + struct kobject *index_dir_kobj = &index->kobj; + struct kobject *cache_dir_kobj = index_dir_kobj->parent; + struct kobject *cpu_dev_kobj = cache_dir_kobj->parent; + struct device *dev = kobj_to_dev(cpu_dev_kobj); + + return dev->id; +} + +/* + * On big-core systems, each core has two groups of CPUs each of which + * has its own L1-cache. The thread-siblings which share l1-cache with + * @cpu can be obtained via cpu_smallcore_mask(). + */ +static const struct cpumask *get_big_core_shared_cpu_map(int cpu, struct cache *cache) +{ + if (cache->level == 1) + return cpu_smallcore_mask(cpu); + + return &cache->shared_cpu_map; +} + static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf) { struct cache_index_dir *index; struct cache *cache; - int ret; + const struct cpumask *mask; + int ret, cpu; index = kobj_to_cache_index_dir(k); cache = index->cache; + if (has_big_cores) { + cpu = index_dir_to_cpu(index); + mask = get_big_core_shared_cpu_map(cpu, cache); + } else { + mask = &cache->shared_cpu_map; + } + ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb\n", - cpumask_pr_args(&cache->shared_cpu_map)); + cpumask_pr_args(mask)); buf[ret++] = '\n'; buf[ret] = '\0'; return ret; diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index d10ad258d41a..bbdc4706c159 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -110,7 +110,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, vaddr = __va(paddr); csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf); } else { - vaddr = __ioremap(paddr, PAGE_SIZE, 0); + vaddr = ioremap_cache(paddr, PAGE_SIZE); csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf); iounmap(vaddr); } diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 88f3963ca30f..5fc335f4d9cd 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -11,7 +11,7 @@ * */ -#include <linux/dma-mapping.h> +#include <linux/dma-direct.h> #include <linux/memblock.h> #include <linux/pfn.h> #include <linux/of_platform.h> @@ -59,7 +59,7 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = { .sync_single_for_device = swiotlb_sync_single_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = swiotlb_sync_sg_for_device, - .mapping_error = swiotlb_dma_mapping_error, + .mapping_error = dma_direct_mapping_error, .get_required_mask = swiotlb_powerpc_get_required, }; diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 6ebba3e48b01..6cae6b56ffd6 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -169,6 +169,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) int n = 0, l = 0; char buffer[128]; + if (!pdn) { + pr_warn("EEH: Note: No error log for absent device.\n"); + return 0; + } + n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", pdn->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); @@ -399,7 +404,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) } /* Isolate the PHB and send event */ - eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); + eeh_pe_mark_isolated(phb_pe); eeh_serialize_unlock(flags); pr_err("EEH: PHB#%x failure detected, location: %s\n", @@ -558,7 +563,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev) * with other functions on this device, and functions under * bridges. */ - eeh_pe_state_mark(pe, EEH_PE_ISOLATED); + eeh_pe_mark_isolated(pe); eeh_serialize_unlock(flags); /* Most EEH events are due to device driver bugs. Having @@ -676,7 +681,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) /* Check if the request is finished successfully */ if (active_flag) { - rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); + rc = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); if (rc < 0) return rc; @@ -825,7 +830,8 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat eeh_pe_state_clear(pe, EEH_PE_ISOLATED); break; case pcie_hot_reset: - eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); + eeh_pe_mark_isolated(pe); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); if (!(pe->type & EEH_PE_VF)) @@ -833,7 +839,8 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat eeh_ops->reset(pe, EEH_RESET_HOT); break; case pcie_warm_reset: - eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); + eeh_pe_mark_isolated(pe); + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); if (!(pe->type & EEH_PE_VF)) @@ -913,16 +920,15 @@ int eeh_pe_reset_full(struct eeh_pe *pe) break; /* Wait until the PE is in a functioning state */ - state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if (eeh_state_active(state)) - break; - + state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); if (state < 0) { pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x", __func__, pe->phb->global_number, pe->addr); ret = -ENOTRECOVERABLE; break; } + if (eeh_state_active(state)) + break; /* Set error in case this is our last attempt */ ret = -EIO; @@ -1036,6 +1042,11 @@ void eeh_probe_devices(void) pdn = hose->pci_data; traverse_pci_dn(pdn, eeh_ops->probe, NULL); } + if (eeh_enabled()) + pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); + else + pr_info("EEH: No capable adapters found\n"); + } /** @@ -1079,18 +1090,7 @@ static int eeh_init(void) eeh_dev_phb_init_dynamic(hose); /* Initialize EEH event */ - ret = eeh_event_init(); - if (ret) - return ret; - - eeh_probe_devices(); - - if (eeh_enabled()) - pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); - else if (!eeh_has_flag(EEH_POSTPONED_PROBE)) - pr_info("EEH: No capable adapters found\n"); - - return ret; + return eeh_event_init(); } core_initcall_sync(eeh_init); diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index a34e6912c15e..d8c90f3284b5 100644 --- a/arch/powerpc/kernel/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c @@ -60,8 +60,6 @@ struct eeh_dev *eeh_dev_init(struct pci_dn *pdn) /* Associate EEH device with OF node */ pdn->edev = edev; edev->pdn = pdn; - INIT_LIST_HEAD(&edev->list); - INIT_LIST_HEAD(&edev->rmv_list); return edev; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 67619b4b3f96..9446248eb6b8 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -35,8 +35,8 @@ #include <asm/rtas.h> struct eeh_rmv_data { - struct list_head edev_list; - int removed; + struct list_head removed_vf_list; + int removed_dev_count; }; static int eeh_result_priority(enum pci_ers_result result) @@ -281,6 +281,10 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn, struct pci_driver *driver; enum pci_ers_result new_result; + if (!edev->pdev) { + eeh_edev_info(edev, "no device"); + return; + } device_lock(&edev->pdev->dev); if (eeh_edev_actionable(edev)) { driver = eeh_pcid_get(edev->pdev); @@ -400,7 +404,7 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata) * EEH device is created. */ if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) { - if (list_is_last(&edev->list, &edev->pe->edevs)) + if (list_is_last(&edev->entry, &edev->pe->edevs)) eeh_pe_restore_bars(edev->pe); return NULL; @@ -465,10 +469,9 @@ static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev, return rc; } -static void *eeh_add_virt_device(void *data, void *userdata) +static void *eeh_add_virt_device(struct eeh_dev *edev) { struct pci_driver *driver; - struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_dn *pdn = eeh_dev_to_pdn(edev); @@ -499,7 +502,6 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata) struct pci_driver *driver; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata; - int *removed = rmv_data ? &rmv_data->removed : NULL; /* * Actually, we should remove the PCI bridges as well. @@ -521,7 +523,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata) if (eeh_dev_removed(edev)) return NULL; - if (removed) { + if (rmv_data) { if (eeh_pe_passed(edev->pe)) return NULL; driver = eeh_pcid_get(dev); @@ -539,10 +541,9 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata) /* Remove it from PCI subsystem */ pr_debug("EEH: Removing %s without EEH sensitive driver\n", pci_name(dev)); - edev->bus = dev->bus; edev->mode |= EEH_DEV_DISCONNECTED; - if (removed) - (*removed)++; + if (rmv_data) + rmv_data->removed_dev_count++; if (edev->physfn) { #ifdef CONFIG_PCI_IOV @@ -558,7 +559,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata) pdn->pe_number = IODA_INVALID_PE; #endif if (rmv_data) - list_add(&edev->rmv_list, &rmv_data->edev_list); + list_add(&edev->rmv_entry, &rmv_data->removed_vf_list); } else { pci_lock_rescan_remove(); pci_stop_and_remove_bus_device(dev); @@ -727,7 +728,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, * the device up before the scripts have taken it down, * potentially weird things happen. */ - if (!driver_eeh_aware || rmv_data->removed) { + if (!driver_eeh_aware || rmv_data->removed_dev_count) { pr_info("EEH: Sleep 5s ahead of %s hotplug\n", (driver_eeh_aware ? "partial" : "complete")); ssleep(5); @@ -737,10 +738,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, * PE. We should disconnect it so the binding can be * rebuilt when adding PCI devices. */ - edev = list_first_entry(&pe->edevs, struct eeh_dev, list); + edev = list_first_entry(&pe->edevs, struct eeh_dev, entry); eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); if (pe->type & EEH_PE_VF) { - eeh_add_virt_device(edev, NULL); + eeh_add_virt_device(edev); } else { if (!driver_eeh_aware) eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); @@ -789,7 +790,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe) struct eeh_pe *tmp_pe; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; - struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0}; + struct eeh_rmv_data rmv_data = + {LIST_HEAD_INIT(rmv_data.removed_vf_list), 0}; bus = eeh_pe_bus_get(pe); if (!bus) { @@ -806,10 +808,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe) pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n", pe->phb->global_number, pe->addr, pe->freeze_count); - goto hard_fail; + result = PCI_ERS_RESULT_DISCONNECT; } - pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n", - pe->freeze_count, eeh_max_freezes); /* Walk the various device drivers attached to this slot through * a reset sequence, giving each an opportunity to do what it needs @@ -821,31 +821,39 @@ void eeh_handle_normal_event(struct eeh_pe *pe) * the error. Override the result if necessary to have partially * hotplug for this case. */ - pr_info("EEH: Notify device drivers to shutdown\n"); - eeh_set_channel_state(pe, pci_channel_io_frozen); - eeh_set_irq_state(pe, false); - eeh_pe_report("error_detected(IO frozen)", pe, eeh_report_error, - &result); - if ((pe->type & EEH_PE_PHB) && - result != PCI_ERS_RESULT_NONE && - result != PCI_ERS_RESULT_NEED_RESET) - result = PCI_ERS_RESULT_NEED_RESET; + if (result != PCI_ERS_RESULT_DISCONNECT) { + pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n", + pe->freeze_count, eeh_max_freezes); + pr_info("EEH: Notify device drivers to shutdown\n"); + eeh_set_channel_state(pe, pci_channel_io_frozen); + eeh_set_irq_state(pe, false); + eeh_pe_report("error_detected(IO frozen)", pe, + eeh_report_error, &result); + if ((pe->type & EEH_PE_PHB) && + result != PCI_ERS_RESULT_NONE && + result != PCI_ERS_RESULT_NEED_RESET) + result = PCI_ERS_RESULT_NEED_RESET; + } /* Get the current PCI slot state. This can take a long time, * sometimes over 300 seconds for certain systems. */ - rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); - if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { - pr_warn("EEH: Permanent failure\n"); - goto hard_fail; + if (result != PCI_ERS_RESULT_DISCONNECT) { + rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); + if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { + pr_warn("EEH: Permanent failure\n"); + result = PCI_ERS_RESULT_DISCONNECT; + } } /* Since rtas may enable MMIO when posting the error log, * don't post the error log until after all dev drivers * have been informed. */ - pr_info("EEH: Collect temporary log\n"); - eeh_slot_error_detail(pe, EEH_LOG_TEMP); + if (result != PCI_ERS_RESULT_DISCONNECT) { + pr_info("EEH: Collect temporary log\n"); + eeh_slot_error_detail(pe, EEH_LOG_TEMP); + } /* If all device drivers were EEH-unaware, then shut * down all of the device drivers, and hope they @@ -857,7 +865,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) if (rc) { pr_warn("%s: Unable to reset, err=%d\n", __func__, rc); - goto hard_fail; + result = PCI_ERS_RESULT_DISCONNECT; } } @@ -866,9 +874,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Enable I/O for affected devices\n"); rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); - if (rc < 0) - goto hard_fail; - if (rc) { + if (rc < 0) { + result = PCI_ERS_RESULT_DISCONNECT; + } else if (rc) { result = PCI_ERS_RESULT_NEED_RESET; } else { pr_info("EEH: Notify device drivers to resume I/O\n"); @@ -882,9 +890,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Enabled DMA for affected devices\n"); rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); - if (rc < 0) - goto hard_fail; - if (rc) { + if (rc < 0) { + result = PCI_ERS_RESULT_DISCONNECT; + } else if (rc) { result = PCI_ERS_RESULT_NEED_RESET; } else { /* @@ -897,12 +905,6 @@ void eeh_handle_normal_event(struct eeh_pe *pe) } } - /* If any device has a hard failure, then shut off everything. */ - if (result == PCI_ERS_RESULT_DISCONNECT) { - pr_warn("EEH: Device driver gave up\n"); - goto hard_fail; - } - /* If any device called out for a reset, then reset the slot */ if (result == PCI_ERS_RESULT_NEED_RESET) { pr_info("EEH: Reset without hotplug activity\n"); @@ -910,88 +912,81 @@ void eeh_handle_normal_event(struct eeh_pe *pe) if (rc) { pr_warn("%s: Cannot reset, err=%d\n", __func__, rc); - goto hard_fail; + result = PCI_ERS_RESULT_DISCONNECT; + } else { + result = PCI_ERS_RESULT_NONE; + eeh_set_channel_state(pe, pci_channel_io_normal); + eeh_set_irq_state(pe, true); + eeh_pe_report("slot_reset", pe, eeh_report_reset, + &result); } - - pr_info("EEH: Notify device drivers " - "the completion of reset\n"); - result = PCI_ERS_RESULT_NONE; - eeh_set_channel_state(pe, pci_channel_io_normal); - eeh_set_irq_state(pe, true); - eeh_pe_report("slot_reset", pe, eeh_report_reset, &result); - } - - /* All devices should claim they have recovered by now. */ - if ((result != PCI_ERS_RESULT_RECOVERED) && - (result != PCI_ERS_RESULT_NONE)) { - pr_warn("EEH: Not recovered\n"); - goto hard_fail; - } - - /* - * For those hot removed VFs, we should add back them after PF get - * recovered properly. - */ - list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) { - eeh_add_virt_device(edev, NULL); - list_del(&edev->rmv_list); } - /* Tell all device drivers that they can resume operations */ - pr_info("EEH: Notify device driver to resume\n"); - eeh_set_channel_state(pe, pci_channel_io_normal); - eeh_set_irq_state(pe, true); - eeh_pe_report("resume", pe, eeh_report_resume, NULL); - eeh_for_each_pe(pe, tmp_pe) { - eeh_pe_for_each_dev(tmp_pe, edev, tmp) { - edev->mode &= ~EEH_DEV_NO_HANDLER; - edev->in_error = false; + if ((result == PCI_ERS_RESULT_RECOVERED) || + (result == PCI_ERS_RESULT_NONE)) { + /* + * For those hot removed VFs, we should add back them after PF + * get recovered properly. + */ + list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list, + rmv_entry) { + eeh_add_virt_device(edev); + list_del(&edev->rmv_entry); } - } - pr_info("EEH: Recovery successful.\n"); - goto final; + /* Tell all device drivers that they can resume operations */ + pr_info("EEH: Notify device driver to resume\n"); + eeh_set_channel_state(pe, pci_channel_io_normal); + eeh_set_irq_state(pe, true); + eeh_pe_report("resume", pe, eeh_report_resume, NULL); + eeh_for_each_pe(pe, tmp_pe) { + eeh_pe_for_each_dev(tmp_pe, edev, tmp) { + edev->mode &= ~EEH_DEV_NO_HANDLER; + edev->in_error = false; + } + } -hard_fail: - /* - * About 90% of all real-life EEH failures in the field - * are due to poorly seated PCI cards. Only 10% or so are - * due to actual, failed cards. - */ - pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" - "Please try reseating or replacing it\n", - pe->phb->global_number, pe->addr); + pr_info("EEH: Recovery successful.\n"); + } else { + /* + * About 90% of all real-life EEH failures in the field + * are due to poorly seated PCI cards. Only 10% or so are + * due to actual, failed cards. + */ + pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" + "Please try reseating or replacing it\n", + pe->phb->global_number, pe->addr); - eeh_slot_error_detail(pe, EEH_LOG_PERM); + eeh_slot_error_detail(pe, EEH_LOG_PERM); - /* Notify all devices that they're about to go down. */ - eeh_set_channel_state(pe, pci_channel_io_perm_failure); - eeh_set_irq_state(pe, false); - eeh_pe_report("error_detected(permanent failure)", pe, - eeh_report_failure, NULL); + /* Notify all devices that they're about to go down. */ + eeh_set_channel_state(pe, pci_channel_io_perm_failure); + eeh_set_irq_state(pe, false); + eeh_pe_report("error_detected(permanent failure)", pe, + eeh_report_failure, NULL); - /* Mark the PE to be removed permanently */ - eeh_pe_state_mark(pe, EEH_PE_REMOVED); + /* Mark the PE to be removed permanently */ + eeh_pe_state_mark(pe, EEH_PE_REMOVED); - /* - * Shut down the device drivers for good. We mark - * all removed devices correctly to avoid access - * the their PCI config any more. - */ - if (pe->type & EEH_PE_VF) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - } else { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + /* + * Shut down the device drivers for good. We mark + * all removed devices correctly to avoid access + * the their PCI config any more. + */ + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - pci_lock_rescan_remove(); - pci_hp_remove_devices(bus); - pci_unlock_rescan_remove(); - /* The passed PE should no longer be used */ - return; + pci_lock_rescan_remove(); + pci_hp_remove_devices(bus); + pci_unlock_rescan_remove(); + /* The passed PE should no longer be used */ + return; + } } -final: eeh_pe_state_clear(pe, EEH_PE_RECOVERING); } @@ -1026,7 +1021,7 @@ void eeh_handle_special_event(void) phb_pe = eeh_phb_pe_get(hose); if (!phb_pe) continue; - eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); + eeh_pe_mark_isolated(phb_pe); } eeh_serialize_unlock(flags); @@ -1041,11 +1036,9 @@ void eeh_handle_special_event(void) /* Purge all events of the PHB */ eeh_remove_event(pe, true); - if (rc == EEH_NEXT_ERR_DEAD_PHB) - eeh_pe_state_mark(pe, EEH_PE_ISOLATED); - else - eeh_pe_state_mark(pe, - EEH_PE_ISOLATED | EEH_PE_RECOVERING); + if (rc != EEH_NEXT_ERR_DEAD_PHB) + eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + eeh_pe_mark_isolated(pe); eeh_serialize_unlock(flags); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 1b238ecc553e..6fa2032e0594 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -75,7 +75,6 @@ static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type) pe->type = type; pe->phb = phb; INIT_LIST_HEAD(&pe->child_list); - INIT_LIST_HEAD(&pe->child); INIT_LIST_HEAD(&pe->edevs); pe->data = (void *)pe + ALIGN(sizeof(struct eeh_pe), @@ -110,6 +109,57 @@ int eeh_phb_pe_create(struct pci_controller *phb) } /** + * eeh_wait_state - Wait for PE state + * @pe: EEH PE + * @max_wait: maximal period in millisecond + * + * Wait for the state of associated PE. It might take some time + * to retrieve the PE's state. + */ +int eeh_wait_state(struct eeh_pe *pe, int max_wait) +{ + int ret; + int mwait; + + /* + * According to PAPR, the state of PE might be temporarily + * unavailable. Under the circumstance, we have to wait + * for indicated time determined by firmware. The maximal + * wait time is 5 minutes, which is acquired from the original + * EEH implementation. Also, the original implementation + * also defined the minimal wait time as 1 second. + */ +#define EEH_STATE_MIN_WAIT_TIME (1000) +#define EEH_STATE_MAX_WAIT_TIME (300 * 1000) + + while (1) { + ret = eeh_ops->get_state(pe, &mwait); + + if (ret != EEH_STATE_UNAVAILABLE) + return ret; + + if (max_wait <= 0) { + pr_warn("%s: Timeout when getting PE's state (%d)\n", + __func__, max_wait); + return EEH_STATE_NOT_SUPPORT; + } + + if (mwait < EEH_STATE_MIN_WAIT_TIME) { + pr_warn("%s: Firmware returned bad wait value %d\n", + __func__, mwait); + mwait = EEH_STATE_MIN_WAIT_TIME; + } else if (mwait > EEH_STATE_MAX_WAIT_TIME) { + pr_warn("%s: Firmware returned too long wait value %d\n", + __func__, mwait); + mwait = EEH_STATE_MAX_WAIT_TIME; + } + + msleep(min(mwait, max_wait)); + max_wait -= mwait; + } +} + +/** * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB * @phb: PCI controller * @@ -360,7 +410,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) edev->pe = pe; /* Put the edev to PE */ - list_add_tail(&edev->list, &pe->edevs); + list_add_tail(&edev->entry, &pe->edevs); pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n", pdn->phb->global_number, pdn->busno, @@ -369,7 +419,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) pe->addr); return 0; } else if (pe && (pe->type & EEH_PE_INVALID)) { - list_add_tail(&edev->list, &pe->edevs); + list_add_tail(&edev->entry, &pe->edevs); edev->pe = pe; /* * We're running to here because of PCI hotplug caused by @@ -379,7 +429,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) while (parent) { if (!(parent->type & EEH_PE_INVALID)) break; - parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP); + parent->type &= ~EEH_PE_INVALID; parent = parent->parent; } @@ -429,7 +479,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * link the EEH device accordingly. */ list_add_tail(&pe->child, &parent->child_list); - list_add_tail(&edev->list, &pe->edevs); + list_add_tail(&edev->entry, &pe->edevs); edev->pe = pe; pr_debug("EEH: Add %04x:%02x:%02x.%01x to " "Device PE#%x, Parent PE#%x\n", @@ -457,7 +507,8 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev) int cnt; struct pci_dn *pdn = eeh_dev_to_pdn(edev); - if (!edev->pe) { + pe = eeh_dev_to_pe(edev); + if (!pe) { pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n", __func__, pdn->phb->global_number, pdn->busno, @@ -467,9 +518,8 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev) } /* Remove the EEH device */ - pe = eeh_dev_to_pe(edev); edev->pe = NULL; - list_del(&edev->list); + list_del(&edev->entry); /* * Check if the parent PE includes any EEH devices. @@ -541,56 +591,50 @@ void eeh_pe_update_time_stamp(struct eeh_pe *pe) } /** - * __eeh_pe_state_mark - Mark the state for the PE - * @data: EEH PE - * @flag: state + * eeh_pe_state_mark - Mark specified state for PE and its associated device + * @pe: EEH PE * - * The function is used to mark the indicated state for the given - * PE. Also, the associated PCI devices will be put into IO frozen - * state as well. + * EEH error affects the current PE and its child PEs. The function + * is used to mark appropriate state for the affected PEs and the + * associated devices. */ -static void *__eeh_pe_state_mark(struct eeh_pe *pe, void *flag) +void eeh_pe_state_mark(struct eeh_pe *root, int state) { - int state = *((int *)flag); - struct eeh_dev *edev, *tmp; - struct pci_dev *pdev; - - /* Keep the state of permanently removed PE intact */ - if (pe->state & EEH_PE_REMOVED) - return NULL; - - pe->state |= state; - - /* Offline PCI devices if applicable */ - if (!(state & EEH_PE_ISOLATED)) - return NULL; - - eeh_pe_for_each_dev(pe, edev, tmp) { - pdev = eeh_dev_to_pci_dev(edev); - if (pdev) - pdev->error_state = pci_channel_io_frozen; - } - - /* Block PCI config access if required */ - if (pe->state & EEH_PE_CFG_RESTRICTED) - pe->state |= EEH_PE_CFG_BLOCKED; + struct eeh_pe *pe; - return NULL; + eeh_for_each_pe(root, pe) + if (!(pe->state & EEH_PE_REMOVED)) + pe->state |= state; } +EXPORT_SYMBOL_GPL(eeh_pe_state_mark); /** - * eeh_pe_state_mark - Mark specified state for PE and its associated device + * eeh_pe_mark_isolated * @pe: EEH PE * - * EEH error affects the current PE and its child PEs. The function - * is used to mark appropriate state for the affected PEs and the - * associated devices. + * Record that a PE has been isolated by marking the PE and it's children as + * EEH_PE_ISOLATED (and EEH_PE_CFG_BLOCKED, if required) and their PCI devices + * as pci_channel_io_frozen. */ -void eeh_pe_state_mark(struct eeh_pe *pe, int state) +void eeh_pe_mark_isolated(struct eeh_pe *root) { - eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); + struct eeh_pe *pe; + struct eeh_dev *edev; + struct pci_dev *pdev; + + eeh_pe_state_mark(root, EEH_PE_ISOLATED); + eeh_for_each_pe(root, pe) { + list_for_each_entry(edev, &pe->edevs, entry) { + pdev = eeh_dev_to_pci_dev(edev); + if (pdev) + pdev->error_state = pci_channel_io_frozen; + } + /* Block PCI config access if required */ + if (pe->state & EEH_PE_CFG_RESTRICTED) + pe->state |= EEH_PE_CFG_BLOCKED; + } } -EXPORT_SYMBOL_GPL(eeh_pe_state_mark); +EXPORT_SYMBOL_GPL(eeh_pe_mark_isolated); static void *__eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag) { @@ -671,28 +715,6 @@ void eeh_pe_state_clear(struct eeh_pe *pe, int state) eeh_pe_traverse(pe, __eeh_pe_state_clear, &state); } -/** - * eeh_pe_state_mark_with_cfg - Mark PE state with unblocked config space - * @pe: PE - * @state: PE state to be set - * - * Set specified flag to PE and its child PEs. The PCI config space - * of some PEs is blocked automatically when EEH_PE_ISOLATED is set, - * which isn't needed in some situations. The function allows to set - * the specified flag to indicated PEs without blocking their PCI - * config space. - */ -void eeh_pe_state_mark_with_cfg(struct eeh_pe *pe, int state) -{ - eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); - if (!(state & EEH_PE_ISOLATED)) - return; - - /* Clear EEH_PE_CFG_BLOCKED, which might be set just now */ - state = EEH_PE_CFG_BLOCKED; - eeh_pe_traverse(pe, __eeh_pe_state_clear, &state); -} - /* * Some PCI bridges (e.g. PLX bridges) have primary/secondary * buses assigned explicitly by firmware, and we probably have @@ -945,7 +967,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) return pe->bus; /* Retrieve the parent PCI bus of first (top) PCI device */ - edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list); + edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry); pdev = eeh_dev_to_pci_dev(edev); if (pdev) return pdev->bus; diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index e58c3f467db5..77decded1175 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -794,7 +794,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_601) lis r10,MSR_KERNEL@h ori r10,r10,MSR_KERNEL@l bl transfer_to_handler_full - .long nonrecoverable_exception + .long unrecoverable_exception .long ret_from_except #endif @@ -1297,7 +1297,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_601) rlwinm r3,r3,0,0,30 stw r3,_TRAP(r1) 4: addi r3,r1,STACK_FRAME_OVERHEAD - bl nonrecoverable_exception + bl unrecoverable_exception /* shouldn't return */ b 4b diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 2206912ea4f0..7b1693adff2a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -171,7 +171,7 @@ system_call: /* label this so stack traces look sane */ * based on caller's run-mode / personality. */ ld r11,SYS_CALL_TABLE@toc(2) - andi. r10,r10,_TIF_32BIT + andis. r10,r10,_TIF_32BIT@h beq 15f addi r11,r11,8 /* use 32-bit syscall entries */ clrldi r3,r3,32 @@ -386,10 +386,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 4: /* Anything else left to do? */ BEGIN_FTR_SECTION - lis r3,INIT_PPR@highest /* Set thread.ppr = 3 */ - ld r10,PACACURRENT(r13) + lis r3,DEFAULT_PPR@highest /* Set default PPR */ sldi r3,r3,32 /* bits 11-13 are used for ppr */ - std r3,TASKTHREADPPR(r10) + std r3,_PPR(r1) END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP) @@ -624,6 +623,10 @@ _GLOBAL(_switch) addi r6,r4,-THREAD /* Convert THREAD to 'current' */ std r6,PACACURRENT(r13) /* Set new 'current' */ +#if defined(CONFIG_STACKPROTECTOR) + ld r6, TASK_CANARY(r6) + std r6, PACA_CANARY(r13) +#endif ld r8,KSP(r4) /* new stack pointer */ #ifdef CONFIG_PPC_BOOK3S_64 @@ -672,7 +675,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) isync slbie r6 +BEGIN_FTR_SECTION slbie r6 /* Workaround POWER5 < DD2.1 issue */ +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) slbmte r7,r0 isync 2: @@ -936,12 +941,6 @@ fast_exception_return: andi. r0,r3,MSR_RI beq- .Lunrecov_restore - /* Load PPR from thread struct before we clear MSR:RI */ -BEGIN_FTR_SECTION - ld r2,PACACURRENT(r13) - ld r2,TASKTHREADPPR(r2) -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - /* * Clear RI before restoring r13. If we are returning to * userspace and we take an exception after restoring r13, @@ -962,7 +961,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) andi. r0,r3,MSR_PR beq 1f BEGIN_FTR_SECTION - mtspr SPRN_PPR,r2 /* Restore PPR */ + /* Restore PPR */ + ld r2,_PPR(r1) + mtspr SPRN_PPR,r2 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ACCOUNT_CPU_USER_EXIT(r13, r2, r4) REST_GPR(13, r1) @@ -1118,7 +1119,7 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return); _GLOBAL(enter_rtas) mflr r0 std r0,16(r1) - stdu r1,-RTAS_FRAME_SIZE(r1) /* Save SP and create stack space. */ + stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space. */ /* Because RTAS is running in 32b mode, it clobbers the high order half * of all registers that it saves. We therefore save those registers @@ -1250,7 +1251,7 @@ rtas_restore_regs: ld r8,_DSISR(r1) mtdsisr r8 - addi r1,r1,RTAS_FRAME_SIZE /* Unstack our frame */ + addi r1,r1,SWITCH_FRAME_SIZE /* Unstack our frame */ ld r0,16(r1) /* get return address */ mtlr r0 @@ -1261,7 +1262,7 @@ rtas_restore_regs: _GLOBAL(enter_prom) mflr r0 std r0,16(r1) - stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */ + stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */ /* Because PROM is running in 32b mode, it clobbers the high order half * of all registers that it saves. We therefore save those registers @@ -1318,8 +1319,8 @@ _GLOBAL(enter_prom) REST_10GPRS(22, r1) ld r4,_CCR(r1) mtcr r4 - - addi r1,r1,PROM_FRAME_SIZE + + addi r1,r1,SWITCH_FRAME_SIZE ld r0,16(r1) mtlr r0 blr diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 2d8fc8c9da7a..89d32bb79d5e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -244,14 +244,13 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100) SET_SCRATCH0(r13) /* save r13 */ EXCEPTION_PROLOG_0(PACA_EXMC) BEGIN_FTR_SECTION - b machine_check_powernv_early + b machine_check_common_early FTR_SECTION_ELSE b machine_check_pSeries_0 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) EXC_REAL_END(machine_check, 0x200, 0x100) EXC_VIRT_NONE(0x4200, 0x100) -TRAMP_REAL_BEGIN(machine_check_powernv_early) -BEGIN_FTR_SECTION +TRAMP_REAL_BEGIN(machine_check_common_early) EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) /* * Register contents: @@ -305,7 +304,9 @@ BEGIN_FTR_SECTION /* Save r9 through r13 from EXMC save area to stack frame. */ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) mfmsr r11 /* get MSR value */ +BEGIN_FTR_SECTION ori r11,r11,MSR_ME /* turn on ME bit */ +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) ori r11,r11,MSR_RI /* turn on RI bit */ LOAD_HANDLER(r12, machine_check_handle_early) 1: mtspr SPRN_SRR0,r12 @@ -324,13 +325,15 @@ BEGIN_FTR_SECTION andc r11,r11,r10 /* Turn off MSR_ME */ b 1b b . /* prevent speculative execution */ -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) TRAMP_REAL_BEGIN(machine_check_pSeries) .globl machine_check_fwnmi machine_check_fwnmi: SET_SCRATCH0(r13) /* save r13 */ EXCEPTION_PROLOG_0(PACA_EXMC) +BEGIN_FTR_SECTION + b machine_check_common_early +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) machine_check_pSeries_0: EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200) /* @@ -440,6 +443,9 @@ EXC_COMMON_BEGIN(machine_check_handle_early) bl machine_check_early std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) +BEGIN_FTR_SECTION + b 4f +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) #ifdef CONFIG_PPC_P7_NAP /* @@ -463,11 +469,12 @@ EXC_COMMON_BEGIN(machine_check_handle_early) */ rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */ beq 5f - andi. r11,r12,MSR_PR /* See if coming from user. */ +4: andi. r11,r12,MSR_PR /* See if coming from user. */ bne 9f /* continue in V mode if we are. */ 5: #ifdef CONFIG_KVM_BOOK3S_64_HANDLER +BEGIN_FTR_SECTION /* * We are coming from kernel context. Check if we are coming from * guest. if yes, then we can continue. We will fall through @@ -476,6 +483,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) lbz r11,HSTATE_IN_GUEST(r13) cmpwi r11,0 /* Check if coming from guest */ bne 9f /* continue if we are. */ +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) #endif /* * At this point we are not sure about what context we come from. @@ -510,6 +518,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) cmpdi r3,0 /* see if we handled MCE successfully */ beq 1b /* if !handled then panic */ +BEGIN_FTR_SECTION /* * Return from MC interrupt. * Queue up the MCE event so that we can log it later, while @@ -518,10 +527,24 @@ EXC_COMMON_BEGIN(machine_check_handle_early) bl machine_check_queue_event MACHINE_CHECK_HANDLER_WINDUP RFI_TO_USER_OR_KERNEL +FTR_SECTION_ELSE + /* + * pSeries: Return from MC interrupt. Before that stay on emergency + * stack and call machine_check_exception to log the MCE event. + */ + LOAD_HANDLER(r10,mce_return) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + mtspr SPRN_SRR1,r10 + RFI_TO_KERNEL + b . +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) 9: /* Deliver the machine check to host kernel in V mode. */ MACHINE_CHECK_HANDLER_WINDUP - b machine_check_pSeries + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_0(PACA_EXMC) + b machine_check_pSeries_0 EXC_COMMON_BEGIN(unrecover_mce) /* Invoke machine_check_exception to print MCE event and panic. */ @@ -535,6 +558,13 @@ EXC_COMMON_BEGIN(unrecover_mce) bl unrecoverable_exception b 1b +EXC_COMMON_BEGIN(mce_return) + /* Invoke machine_check_exception to print MCE event and return. */ + addi r3,r1,STACK_FRAME_OVERHEAD + bl machine_check_exception + MACHINE_CHECK_HANDLER_WINDUP + RFI_TO_KERNEL + b . EXC_REAL(data_access, 0x300, 0x80) EXC_VIRT(data_access, 0x4300, 0x80, 0x300) @@ -566,28 +596,36 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXSLB) - EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380) - mr r12,r3 /* save r3 */ - mfspr r3,SPRN_DAR - mfspr r11,SPRN_SRR1 - crset 4*cr6+eq - BRANCH_TO_COMMON(r10, slb_miss_common) +EXCEPTION_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, KVMTEST_PR, 0x380); EXC_REAL_END(data_access_slb, 0x380, 0x80) EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXSLB) - EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380) - mr r12,r3 /* save r3 */ - mfspr r3,SPRN_DAR - mfspr r11,SPRN_SRR1 - crset 4*cr6+eq - BRANCH_TO_COMMON(r10, slb_miss_common) +EXCEPTION_RELON_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, NOTEST, 0x380); EXC_VIRT_END(data_access_slb, 0x4380, 0x80) + TRAMP_KVM_SKIP(PACA_EXSLB, 0x380) +EXC_COMMON_BEGIN(data_access_slb_common) + mfspr r10,SPRN_DAR + std r10,PACA_EXSLB+EX_DAR(r13) + EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB) + ld r4,PACA_EXSLB+EX_DAR(r13) + std r4,_DAR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_slb_fault + cmpdi r3,0 + bne- 1f + b fast_exception_return +1: /* Error case */ + std r3,RESULT(r1) + bl save_nvgprs + RECONCILE_IRQ_STATE(r10, r11) + ld r4,_DAR(r1) + ld r5,RESULT(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_bad_slb_fault + b ret_from_except + EXC_REAL(instruction_access, 0x400, 0x80) EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400) @@ -610,160 +648,34 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80) - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXSLB) - EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) - mr r12,r3 /* save r3 */ - mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ - mfspr r11,SPRN_SRR1 - crclr 4*cr6+eq - BRANCH_TO_COMMON(r10, slb_miss_common) +EXCEPTION_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, KVMTEST_PR, 0x480); EXC_REAL_END(instruction_access_slb, 0x480, 0x80) EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXSLB) - EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480) - mr r12,r3 /* save r3 */ - mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ - mfspr r11,SPRN_SRR1 - crclr 4*cr6+eq - BRANCH_TO_COMMON(r10, slb_miss_common) +EXCEPTION_RELON_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, NOTEST, 0x480); EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) -TRAMP_KVM(PACA_EXSLB, 0x480) - - -/* - * This handler is used by the 0x380 and 0x480 SLB miss interrupts, as well as - * the virtual mode 0x4380 and 0x4480 interrupts if AIL is enabled. - */ -EXC_COMMON_BEGIN(slb_miss_common) - /* - * r13 points to the PACA, r9 contains the saved CR, - * r12 contains the saved r3, - * r11 contain the saved SRR1, SRR0 is still ready for return - * r3 has the faulting address - * r9 - r13 are saved in paca->exslb. - * cr6.eq is set for a D-SLB miss, clear for a I-SLB miss - * We assume we aren't going to take any exceptions during this - * procedure. - */ - mflr r10 - stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ - std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ - - andi. r9,r11,MSR_PR // Check for exception from userspace - cmpdi cr4,r9,MSR_PR // And save the result in CR4 for later - - /* - * Test MSR_RI before calling slb_allocate_realmode, because the - * MSR in r11 gets clobbered. However we still want to allocate - * SLB in case MSR_RI=0, to minimise the risk of getting stuck in - * recursive SLB faults. So use cr5 for this, which is preserved. - */ - andi. r11,r11,MSR_RI /* check for unrecoverable exception */ - cmpdi cr5,r11,MSR_RI - - crset 4*cr0+eq -#ifdef CONFIG_PPC_BOOK3S_64 -BEGIN_MMU_FTR_SECTION - bl slb_allocate -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) -#endif - - ld r10,PACA_EXSLB+EX_LR(r13) - lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ - mtlr r10 - - /* - * Large address, check whether we have to allocate new contexts. - */ - beq- 8f - bne- cr5,2f /* if unrecoverable exception, oops */ - - /* All done -- return from exception. */ - - bne cr4,1f /* returning to kernel */ - - mtcrf 0x80,r9 - mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ - mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ - mtcrf 0x02,r9 /* I/D indication is in cr6 */ - mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ - - RESTORE_CTR(r9, PACA_EXSLB) - RESTORE_PPR_PACA(PACA_EXSLB, r9) - mr r3,r12 - ld r9,PACA_EXSLB+EX_R9(r13) - ld r10,PACA_EXSLB+EX_R10(r13) - ld r11,PACA_EXSLB+EX_R11(r13) - ld r12,PACA_EXSLB+EX_R12(r13) - ld r13,PACA_EXSLB+EX_R13(r13) - RFI_TO_USER - b . /* prevent speculative execution */ -1: - mtcrf 0x80,r9 - mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ - mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ - mtcrf 0x02,r9 /* I/D indication is in cr6 */ - mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ - - RESTORE_CTR(r9, PACA_EXSLB) - RESTORE_PPR_PACA(PACA_EXSLB, r9) - mr r3,r12 - ld r9,PACA_EXSLB+EX_R9(r13) - ld r10,PACA_EXSLB+EX_R10(r13) - ld r11,PACA_EXSLB+EX_R11(r13) - ld r12,PACA_EXSLB+EX_R12(r13) - ld r13,PACA_EXSLB+EX_R13(r13) - RFI_TO_KERNEL - b . /* prevent speculative execution */ - - -2: std r3,PACA_EXSLB+EX_DAR(r13) - mr r3,r12 - mfspr r11,SPRN_SRR0 - mfspr r12,SPRN_SRR1 - LOAD_HANDLER(r10,unrecov_slb) - mtspr SPRN_SRR0,r10 - ld r10,PACAKMSR(r13) - mtspr SPRN_SRR1,r10 - RFI_TO_KERNEL - b . - -8: std r3,PACA_EXSLB+EX_DAR(r13) - mr r3,r12 - mfspr r11,SPRN_SRR0 - mfspr r12,SPRN_SRR1 - LOAD_HANDLER(r10, large_addr_slb) - mtspr SPRN_SRR0,r10 - ld r10,PACAKMSR(r13) - mtspr SPRN_SRR1,r10 - RFI_TO_KERNEL - b . +TRAMP_KVM(PACA_EXSLB, 0x480) -EXC_COMMON_BEGIN(unrecov_slb) - EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) - RECONCILE_IRQ_STATE(r10, r11) +EXC_COMMON_BEGIN(instruction_access_slb_common) + EXCEPTION_PROLOG_COMMON(0x480, PACA_EXSLB) + ld r4,_NIP(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_slb_fault + cmpdi r3,0 + bne- 1f + b fast_exception_return +1: /* Error case */ + std r3,RESULT(r1) bl save_nvgprs -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception - b 1b - -EXC_COMMON_BEGIN(large_addr_slb) - EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB) RECONCILE_IRQ_STATE(r10, r11) - ld r3, PACA_EXSLB+EX_DAR(r13) - std r3, _DAR(r1) - beq cr6, 2f - li r10, 0x481 /* fix trap number for I-SLB miss */ - std r10, _TRAP(r1) -2: bl save_nvgprs - addi r3, r1, STACK_FRAME_OVERHEAD - bl slb_miss_large_addr + ld r4,_NIP(r1) + ld r5,RESULT(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl do_bad_slb_fault b ret_from_except + EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) .globl hardware_interrupt_hv; hardware_interrupt_hv: diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index a711d22339ea..761b28b1427d 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1444,8 +1444,8 @@ static ssize_t fadump_register_store(struct kobject *kobj, break; case 1: if (fw_dump.dump_registered == 1) { - ret = -EEXIST; - goto unlock_out; + /* Un-register Firmware-assisted dump */ + fadump_unregister_dump(&fdm); } /* Register Firmware-assisted dump */ ret = register_fadump(); diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 6582f824d620..134a573a9f2d 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -642,7 +642,7 @@ DTLBMissIMMR: mtspr SPRN_MD_TWC, r10 mfspr r10, SPRN_IMMR /* Get current IMMR */ rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ - ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ + ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ _PAGE_PRESENT | _PAGE_NO_CACHE mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ @@ -660,7 +660,7 @@ DTLBMissLinear: li r11, MD_PS8MEG | MD_SVALID | M_APG2 mtspr SPRN_MD_TWC, r11 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ - ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ + ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ _PAGE_PRESENT mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ @@ -679,7 +679,7 @@ ITLBMissLinear: li r11, MI_PS8MEG | MI_SVALID | M_APG2 mtspr SPRN_MI_TWC, r11 rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ - ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \ + ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \ _PAGE_PRESENT mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index aa9f1b8261db..7e89d02a84e1 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -153,10 +153,10 @@ static const struct ppc_pci_io iowa_pci_io = { #ifdef CONFIG_PPC_INDIRECT_MMIO static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size, - unsigned long flags, void *caller) + pgprot_t prot, void *caller) { struct iowa_bus *bus; - void __iomem *res = __ioremap_caller(addr, size, flags, caller); + void __iomem *res = __ioremap_caller(addr, size, prot, caller); int busno; bus = iowa_pci_find(0, (unsigned long)addr); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 19b4c628f3be..f0dc680e659a 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -785,9 +785,9 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, vaddr = page_address(page) + offset; uaddr = (unsigned long)vaddr; - npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl)); if (tbl) { + npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl)); align = 0; if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0) diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c index 1df6c74aa731..fda3ae48480c 100644 --- a/arch/powerpc/kernel/isa-bridge.c +++ b/arch/powerpc/kernel/isa-bridge.c @@ -110,14 +110,14 @@ static void pci_process_ISA_OF_ranges(struct device_node *isa_node, size = 0x10000; __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, - size, pgprot_val(pgprot_noncached(__pgprot(0)))); + size, pgprot_noncached(PAGE_KERNEL)); return; inval_range: printk(KERN_ERR "no ISA IO ranges or unexpected isa range, " "mapping 64k\n"); __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, - 0x10000, pgprot_val(pgprot_noncached(__pgprot(0)))); + 0x10000, pgprot_noncached(PAGE_KERNEL)); } @@ -253,7 +253,7 @@ void __init isa_bridge_init_non_pci(struct device_node *np) */ isa_io_base = ISA_IO_BASE; __ioremap_at(pbase, (void *)ISA_IO_BASE, - size, pgprot_val(pgprot_noncached(__pgprot(0)))); + size, pgprot_noncached(PAGE_KERNEL)); pr_debug("ISA: Non-PCI bridge is %pOF\n", np); } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 35e240a0a408..59c578f865aa 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -24,6 +24,7 @@ #include <asm/processor.h> #include <asm/machdep.h> #include <asm/debug.h> +#include <asm/code-patching.h> #include <linux/slab.h> /* @@ -144,7 +145,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs) if (kgdb_handle_exception(1, SIGTRAP, 0, regs) != 0) return 0; - if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr)) + if (*(u32 *)regs->nip == BREAK_INSTR) regs->nip += BREAK_INSTR_SIZE; return 1; @@ -441,16 +442,42 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, return -1; } +int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) +{ + int err; + unsigned int instr; + unsigned int *addr = (unsigned int *)bpt->bpt_addr; + + err = probe_kernel_address(addr, instr); + if (err) + return err; + + err = patch_instruction(addr, BREAK_INSTR); + if (err) + return -EFAULT; + + *(unsigned int *)bpt->saved_instr = instr; + + return 0; +} + +int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) +{ + int err; + unsigned int instr = *(unsigned int *)bpt->saved_instr; + unsigned int *addr = (unsigned int *)bpt->bpt_addr; + + err = patch_instruction(addr, instr); + if (err) + return -EFAULT; + + return 0; +} + /* * Global data */ -struct kgdb_arch arch_kgdb_ops = { -#ifdef __LITTLE_ENDIAN__ - .gdb_bpt_instr = {0x08, 0x10, 0x82, 0x7d}, -#else - .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08}, -#endif -}; +struct kgdb_arch arch_kgdb_ops; static int kgdb_not_implemented(struct pt_regs *regs) { diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index efdd16a79075..bd933a75f0bc 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -488,10 +488,11 @@ long machine_check_early(struct pt_regs *regs) { long handled = 0; - __this_cpu_inc(irq_stat.mce_exceptions); - - if (cur_cpu_spec && cur_cpu_spec->machine_check_early) - handled = cur_cpu_spec->machine_check_early(regs); + /* + * See if platform is capable of handling machine check. + */ + if (ppc_md.machine_check_early) + handled = ppc_md.machine_check_early(regs); return handled; } diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 3497c8329c1d..6b800eec31f2 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -60,7 +60,7 @@ static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) /* flush SLBs and reload */ #ifdef CONFIG_PPC_BOOK3S_64 -static void flush_and_reload_slb(void) +void flush_and_reload_slb(void) { /* Invalidate all SLBs */ slb_flush_all_realmode(); @@ -89,6 +89,13 @@ static void flush_and_reload_slb(void) static void flush_erat(void) { +#ifdef CONFIG_PPC_BOOK3S_64 + if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { + flush_and_reload_slb(); + return; + } +#endif + /* PPC_INVALIDATE_ERAT can only be used on ISA v3 and newer */ asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); } diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 77371c9ef3d8..2d861a36662e 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -74,6 +74,14 @@ int module_finalize(const Elf_Ehdr *hdr, (void *)sect->sh_addr + sect->sh_size); #endif /* CONFIG_PPC64 */ +#ifdef PPC64_ELF_ABI_v1 + sect = find_section(hdr, sechdrs, ".opd"); + if (sect != NULL) { + me->arch.start_opd = sect->sh_addr; + me->arch.end_opd = sect->sh_addr + sect->sh_size; + } +#endif /* PPC64_ELF_ABI_v1 */ + #ifdef CONFIG_PPC_BARRIER_NOSPEC sect = find_section(hdr, sechdrs, "__spec_barrier_fixup"); if (sect != NULL) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index b8d61e019d06..8661eea78503 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -360,11 +360,6 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0) dedotify_versions((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size); - else if (!strcmp(secstrings + sechdrs[i].sh_name, ".opd")) { - me->arch.start_opd = sechdrs[i].sh_addr; - me->arch.end_opd = sechdrs[i].sh_addr + - sechdrs[i].sh_size; - } /* We don't handle .init for the moment: rename to _init */ while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init"))) @@ -685,7 +680,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_PPC64_REL32: /* 32 bits relative (used by relative exception tables) */ - *(u32 *)location = value - (unsigned long)location; + /* Convert value to relative */ + value -= (unsigned long)location; + if (value + 0x80000000 > 0xffffffff) { + pr_err("%s: REL32 %li out of range!\n", + me->name, (long int)value); + return -ENOEXEC; + } + *(u32 *)location = value; break; case R_PPC64_TOCSAVE: diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index d63b488d34d7..4da8ed576229 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -17,7 +17,6 @@ #include <linux/of.h> #include <linux/slab.h> #include <linux/export.h> -#include <linux/syscalls.h> #include <asm/processor.h> #include <asm/io.h> diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index dff28f903512..9d8c10d55407 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -159,7 +159,7 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose) /* Establish the mapping */ if (__ioremap_at(phys_page, area->addr, size_page, - pgprot_val(pgprot_noncached(__pgprot(0)))) == NULL) + pgprot_noncached(PAGE_KERNEL)) == NULL) return -ENOMEM; /* Fixup hose IO resource */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5d983d8bac27..4d5322cfad25 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -43,6 +43,7 @@ #include <linux/uaccess.h> #include <linux/elf-randomize.h> #include <linux/pkeys.h> +#include <linux/seq_buf.h> #include <asm/pgtable.h> #include <asm/io.h> @@ -65,6 +66,7 @@ #include <asm/livepatch.h> #include <asm/cpu_has_feature.h> #include <asm/asm-prototypes.h> +#include <asm/stacktrace.h> #include <linux/kprobes.h> #include <linux/kdebug.h> @@ -102,24 +104,18 @@ static void check_if_tm_restore_required(struct task_struct *tsk) } } -static inline bool msr_tm_active(unsigned long msr) -{ - return MSR_TM_ACTIVE(msr); -} - static bool tm_active_with_fp(struct task_struct *tsk) { - return msr_tm_active(tsk->thread.regs->msr) && + return MSR_TM_ACTIVE(tsk->thread.regs->msr) && (tsk->thread.ckpt_regs.msr & MSR_FP); } static bool tm_active_with_altivec(struct task_struct *tsk) { - return msr_tm_active(tsk->thread.regs->msr) && + return MSR_TM_ACTIVE(tsk->thread.regs->msr) && (tsk->thread.ckpt_regs.msr & MSR_VEC); } #else -static inline bool msr_tm_active(unsigned long msr) { return false; } static inline void check_if_tm_restore_required(struct task_struct *tsk) { } static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; } static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; } @@ -247,7 +243,8 @@ void enable_kernel_fp(void) * giveup as this would save to the 'live' structure not the * checkpointed structure. */ - if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr)) + if (!MSR_TM_ACTIVE(cpumsr) && + MSR_TM_ACTIVE(current->thread.regs->msr)) return; __giveup_fpu(current); } @@ -311,7 +308,8 @@ void enable_kernel_altivec(void) * giveup as this would save to the 'live' structure not the * checkpointed structure. */ - if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr)) + if (!MSR_TM_ACTIVE(cpumsr) && + MSR_TM_ACTIVE(current->thread.regs->msr)) return; __giveup_altivec(current); } @@ -397,7 +395,8 @@ void enable_kernel_vsx(void) * giveup as this would save to the 'live' structure not the * checkpointed structure. */ - if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr)) + if (!MSR_TM_ACTIVE(cpumsr) && + MSR_TM_ACTIVE(current->thread.regs->msr)) return; __giveup_vsx(current); } @@ -530,7 +529,7 @@ void restore_math(struct pt_regs *regs) { unsigned long msr; - if (!msr_tm_active(regs->msr) && + if (!MSR_TM_ACTIVE(regs->msr) && !current->thread.load_fp && !loadvec(current->thread)) return; @@ -1252,17 +1251,16 @@ struct task_struct *__switch_to(struct task_struct *prev, return last; } -static int instructions_to_print = 16; +#define NR_INSN_TO_PRINT 16 static void show_instructions(struct pt_regs *regs) { int i; - unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 * - sizeof(int)); + unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int)); printk("Instruction dump:"); - for (i = 0; i < instructions_to_print; i++) { + for (i = 0; i < NR_INSN_TO_PRINT; i++) { int instr; if (!(i % 8)) @@ -1277,7 +1275,7 @@ static void show_instructions(struct pt_regs *regs) #endif if (!__kernel_text_address(pc) || - probe_kernel_address((unsigned int __user *)pc, instr)) { + probe_kernel_address((const void *)pc, instr)) { pr_cont("XXXXXXXX "); } else { if (regs->nip == pc) @@ -1295,43 +1293,43 @@ static void show_instructions(struct pt_regs *regs) void show_user_instructions(struct pt_regs *regs) { unsigned long pc; - int i; + int n = NR_INSN_TO_PRINT; + struct seq_buf s; + char buf[96]; /* enough for 8 times 9 + 2 chars */ - pc = regs->nip - (instructions_to_print * 3 / 4 * sizeof(int)); + pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int)); /* * Make sure the NIP points at userspace, not kernel text/data or * elsewhere. */ - if (!__access_ok(pc, instructions_to_print * sizeof(int), USER_DS)) { + if (!__access_ok(pc, NR_INSN_TO_PRINT * sizeof(int), USER_DS)) { pr_info("%s[%d]: Bad NIP, not dumping instructions.\n", current->comm, current->pid); return; } - pr_info("%s[%d]: code: ", current->comm, current->pid); + seq_buf_init(&s, buf, sizeof(buf)); - for (i = 0; i < instructions_to_print; i++) { - int instr; + while (n) { + int i; - if (!(i % 8) && (i > 0)) { - pr_cont("\n"); - pr_info("%s[%d]: code: ", current->comm, current->pid); - } + seq_buf_clear(&s); - if (probe_kernel_address((unsigned int __user *)pc, instr)) { - pr_cont("XXXXXXXX "); - } else { - if (regs->nip == pc) - pr_cont("<%08x> ", instr); - else - pr_cont("%08x ", instr); + for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) { + int instr; + + if (probe_kernel_address((const void *)pc, instr)) { + seq_buf_printf(&s, "XXXXXXXX "); + continue; + } + seq_buf_printf(&s, regs->nip == pc ? "<%08x> " : "%08x ", instr); } - pc += sizeof(int); + if (!seq_buf_has_overflowed(&s)) + pr_info("%s[%d]: code: %s\n", current->comm, + current->pid, s.buffer); } - - pr_cont("\n"); } struct regbit { @@ -1485,6 +1483,15 @@ void flush_thread(void) #endif /* CONFIG_HAVE_HW_BREAKPOINT */ } +#ifdef CONFIG_PPC_BOOK3S_64 +void arch_setup_new_exec(void) +{ + if (radix_enabled()) + return; + hash__setup_new_exec(); +} +#endif + int set_thread_uses_vas(void) { #ifdef CONFIG_PPC_BOOK3S_64 @@ -1705,7 +1712,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.dscr = mfspr(SPRN_DSCR); } if (cpu_has_feature(CPU_FTR_HAS_PPR)) - p->thread.ppr = INIT_PPR; + childregs->ppr = DEFAULT_PPR; p->thread.tidr = 0; #endif @@ -1713,6 +1720,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, return 0; } +void preload_new_slb_context(unsigned long start, unsigned long sp); + /* * Set up a thread for executing a new program */ @@ -1720,6 +1729,10 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) { #ifdef CONFIG_PPC64 unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */ + +#ifdef CONFIG_PPC_BOOK3S_64 + preload_new_slb_context(start, sp); +#endif #endif /* @@ -1810,6 +1823,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) #ifdef CONFIG_VSX current->thread.used_vsr = 0; #endif + current->thread.load_slb = 0; current->thread.load_fp = 0; memset(¤t->thread.fp_state, 0, sizeof(current->thread.fp_state)); current->thread.fp_save_area = NULL; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 9b38a2e5dd35..f33ff4163a51 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -43,11 +43,13 @@ #include <asm/btext.h> #include <asm/sections.h> #include <asm/machdep.h> -#include <asm/opal.h> #include <asm/asm-prototypes.h> #include <linux/linux_logo.h> +/* All of prom_init bss lives here */ +#define __prombss __section(.bss.prominit) + /* * Eventually bump that one up */ @@ -87,7 +89,7 @@ #define OF_WORKAROUNDS 0 #else #define OF_WORKAROUNDS of_workarounds -int of_workarounds; +static int of_workarounds __prombss; #endif #define OF_WA_CLAIM 1 /* do phys/virt claim separately, then map */ @@ -148,29 +150,31 @@ extern void copy_and_flush(unsigned long dest, unsigned long src, unsigned long size, unsigned long offset); /* prom structure */ -static struct prom_t __initdata prom; +static struct prom_t __prombss prom; -static unsigned long prom_entry __initdata; +static unsigned long __prombss prom_entry; #define PROM_SCRATCH_SIZE 256 -static char __initdata of_stdout_device[256]; -static char __initdata prom_scratch[PROM_SCRATCH_SIZE]; +static char __prombss of_stdout_device[256]; +static char __prombss prom_scratch[PROM_SCRATCH_SIZE]; -static unsigned long __initdata dt_header_start; -static unsigned long __initdata dt_struct_start, dt_struct_end; -static unsigned long __initdata dt_string_start, dt_string_end; +static unsigned long __prombss dt_header_start; +static unsigned long __prombss dt_struct_start, dt_struct_end; +static unsigned long __prombss dt_string_start, dt_string_end; -static unsigned long __initdata prom_initrd_start, prom_initrd_end; +static unsigned long __prombss prom_initrd_start, prom_initrd_end; #ifdef CONFIG_PPC64 -static int __initdata prom_iommu_force_on; -static int __initdata prom_iommu_off; -static unsigned long __initdata prom_tce_alloc_start; -static unsigned long __initdata prom_tce_alloc_end; +static int __prombss prom_iommu_force_on; +static int __prombss prom_iommu_off; +static unsigned long __prombss prom_tce_alloc_start; +static unsigned long __prombss prom_tce_alloc_end; #endif -static bool prom_radix_disable __initdata = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT); +#ifdef CONFIG_PPC_PSERIES +static bool __prombss prom_radix_disable; +#endif struct platform_support { bool hash_mmu; @@ -188,26 +192,25 @@ struct platform_support { #define PLATFORM_LPAR 0x0001 #define PLATFORM_POWERMAC 0x0400 #define PLATFORM_GENERIC 0x0500 -#define PLATFORM_OPAL 0x0600 -static int __initdata of_platform; +static int __prombss of_platform; -static char __initdata prom_cmd_line[COMMAND_LINE_SIZE]; +static char __prombss prom_cmd_line[COMMAND_LINE_SIZE]; -static unsigned long __initdata prom_memory_limit; +static unsigned long __prombss prom_memory_limit; -static unsigned long __initdata alloc_top; -static unsigned long __initdata alloc_top_high; -static unsigned long __initdata alloc_bottom; -static unsigned long __initdata rmo_top; -static unsigned long __initdata ram_top; +static unsigned long __prombss alloc_top; +static unsigned long __prombss alloc_top_high; +static unsigned long __prombss alloc_bottom; +static unsigned long __prombss rmo_top; +static unsigned long __prombss ram_top; -static struct mem_map_entry __initdata mem_reserve_map[MEM_RESERVE_MAP_SIZE]; -static int __initdata mem_reserve_cnt; +static struct mem_map_entry __prombss mem_reserve_map[MEM_RESERVE_MAP_SIZE]; +static int __prombss mem_reserve_cnt; -static cell_t __initdata regbuf[1024]; +static cell_t __prombss regbuf[1024]; -static bool rtas_has_query_cpu_stopped; +static bool __prombss rtas_has_query_cpu_stopped; /* @@ -522,8 +525,8 @@ static void add_string(char **str, const char *q) static char *tohex(unsigned int x) { - static char digits[] = "0123456789abcdef"; - static char result[9]; + static const char digits[] __initconst = "0123456789abcdef"; + static char result[9] __prombss; int i; result[8] = 0; @@ -664,6 +667,8 @@ static void __init early_cmdline_parse(void) #endif } +#ifdef CONFIG_PPC_PSERIES + prom_radix_disable = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT); opt = strstr(prom_cmd_line, "disable_radix"); if (opt) { opt += 13; @@ -679,9 +684,10 @@ static void __init early_cmdline_parse(void) } if (prom_radix_disable) prom_debug("Radix disabled from cmdline\n"); +#endif /* CONFIG_PPC_PSERIES */ } -#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) +#ifdef CONFIG_PPC_PSERIES /* * The architecture vector has an array of PVR mask/value pairs, * followed by # option vectors - 1, followed by the option vectors. @@ -782,7 +788,7 @@ struct ibm_arch_vec { struct option_vector6 vec6; } __packed; -struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { +static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .pvrs = { { .mask = cpu_to_be32(0xfffe0000), /* POWER5/POWER5+ */ @@ -920,9 +926,11 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { }, }; +static struct ibm_arch_vec __prombss ibm_architecture_vec ____cacheline_aligned; + /* Old method - ELF header with PT_NOTE sections only works on BE */ #ifdef __BIG_ENDIAN__ -static struct fake_elf { +static const struct fake_elf { Elf32_Ehdr elfhdr; Elf32_Phdr phdr[2]; struct chrpnote { @@ -955,7 +963,7 @@ static struct fake_elf { u32 ignore_me; } rpadesc; } rpanote; -} fake_elf = { +} fake_elf __initconst = { .elfhdr = { .e_ident = { 0x7f, 'E', 'L', 'F', ELFCLASS32, ELFDATA2MSB, EV_CURRENT }, @@ -1129,14 +1137,21 @@ static void __init prom_check_platform_support(void) }; int prop_len = prom_getproplen(prom.chosen, "ibm,arch-vec-5-platform-support"); + + /* First copy the architecture vec template */ + ibm_architecture_vec = ibm_architecture_vec_template; + if (prop_len > 1) { int i; - u8 vec[prop_len]; + u8 vec[8]; prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n", prop_len); + if (prop_len > sizeof(vec)) + prom_printf("WARNING: ibm,arch-vec-5-platform-support longer than expected (len: %d)\n", + prop_len); prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", &vec, sizeof(vec)); - for (i = 0; i < prop_len; i += 2) { + for (i = 0; i < sizeof(vec); i += 2) { prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2 , vec[i] , vec[i + 1]); @@ -1225,7 +1240,7 @@ static void __init prom_send_capabilities(void) } #endif /* __BIG_ENDIAN__ */ } -#endif /* #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ +#endif /* CONFIG_PPC_PSERIES */ /* * Memory allocation strategy... our layout is normally: @@ -1562,88 +1577,6 @@ static void __init prom_close_stdin(void) } } -#ifdef CONFIG_PPC_POWERNV - -#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL -static u64 __initdata prom_opal_base; -static u64 __initdata prom_opal_entry; -#endif - -/* - * Allocate room for and instantiate OPAL - */ -static void __init prom_instantiate_opal(void) -{ - phandle opal_node; - ihandle opal_inst; - u64 base, entry; - u64 size = 0, align = 0x10000; - __be64 val64; - u32 rets[2]; - - prom_debug("prom_instantiate_opal: start...\n"); - - opal_node = call_prom("finddevice", 1, 1, ADDR("/ibm,opal")); - prom_debug("opal_node: %x\n", opal_node); - if (!PHANDLE_VALID(opal_node)) - return; - - val64 = 0; - prom_getprop(opal_node, "opal-runtime-size", &val64, sizeof(val64)); - size = be64_to_cpu(val64); - if (size == 0) - return; - val64 = 0; - prom_getprop(opal_node, "opal-runtime-alignment", &val64,sizeof(val64)); - align = be64_to_cpu(val64); - - base = alloc_down(size, align, 0); - if (base == 0) { - prom_printf("OPAL allocation failed !\n"); - return; - } - - opal_inst = call_prom("open", 1, 1, ADDR("/ibm,opal")); - if (!IHANDLE_VALID(opal_inst)) { - prom_printf("opening opal package failed (%x)\n", opal_inst); - return; - } - - prom_printf("instantiating opal at 0x%llx...", base); - - if (call_prom_ret("call-method", 4, 3, rets, - ADDR("load-opal-runtime"), - opal_inst, - base >> 32, base & 0xffffffff) != 0 - || (rets[0] == 0 && rets[1] == 0)) { - prom_printf(" failed\n"); - return; - } - entry = (((u64)rets[0]) << 32) | rets[1]; - - prom_printf(" done\n"); - - reserve_mem(base, size); - - prom_debug("opal base = 0x%llx\n", base); - prom_debug("opal align = 0x%llx\n", align); - prom_debug("opal entry = 0x%llx\n", entry); - prom_debug("opal size = 0x%llx\n", size); - - prom_setprop(opal_node, "/ibm,opal", "opal-base-address", - &base, sizeof(base)); - prom_setprop(opal_node, "/ibm,opal", "opal-entry-address", - &entry, sizeof(entry)); - -#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL - prom_opal_base = base; - prom_opal_entry = entry; -#endif - prom_debug("prom_instantiate_opal: end...\n"); -} - -#endif /* CONFIG_PPC_POWERNV */ - /* * Allocate room for and instantiate RTAS */ @@ -2150,10 +2083,6 @@ static int __init prom_find_machine_type(void) } } #ifdef CONFIG_PPC64 - /* Try to detect OPAL */ - if (PHANDLE_VALID(call_prom("finddevice", 1, 1, ADDR("/ibm,opal")))) - return PLATFORM_OPAL; - /* Try to figure out if it's an IBM pSeries or any other * PAPR compliant platform. We assume it is if : * - /device_type is "chrp" (please, do NOT use that for future @@ -2202,7 +2131,7 @@ static void __init prom_check_displays(void) ihandle ih; int i; - static unsigned char default_colors[] = { + static const unsigned char default_colors[] __initconst = { 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0x00, 0xaa, 0x00, @@ -2398,7 +2327,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, char *namep, *prev_name, *sstart, *p, *ep, *lp, *path; unsigned long soff; unsigned char *valp; - static char pname[MAX_PROPERTY_NAME]; + static char pname[MAX_PROPERTY_NAME] __prombss; int l, room, has_phandle = 0; dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end); @@ -2481,14 +2410,11 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, has_phandle = 1; } - /* Add a "linux,phandle" property if no "phandle" property already - * existed (can happen with OPAL) - */ + /* Add a "phandle" property if none already exist */ if (!has_phandle) { - soff = dt_find_string("linux,phandle"); + soff = dt_find_string("phandle"); if (soff == 0) - prom_printf("WARNING: Can't find string index for" - " <linux-phandle> node %s\n", path); + prom_printf("WARNING: Can't find string index for <phandle> node %s\n", path); else { dt_push_token(OF_DT_PROP, mem_start, mem_end); dt_push_token(4, mem_start, mem_end); @@ -2548,9 +2474,9 @@ static void __init flatten_device_tree(void) dt_string_start = mem_start; mem_start += 4; /* hole */ - /* Add "linux,phandle" in there, we'll need it */ + /* Add "phandle" in there, we'll need it */ namep = make_room(&mem_start, &mem_end, 16, 1); - strcpy(namep, "linux,phandle"); + strcpy(namep, "phandle"); mem_start = (unsigned long)namep + strlen(namep) + 1; /* Build string array */ @@ -3172,7 +3098,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ early_cmdline_parse(); -#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) +#ifdef CONFIG_PPC_PSERIES /* * On pSeries, inform the firmware about our capabilities */ @@ -3216,15 +3142,9 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * On non-powermacs, try to instantiate RTAS. PowerMacs don't * have a usable RTAS implementation. */ - if (of_platform != PLATFORM_POWERMAC && - of_platform != PLATFORM_OPAL) + if (of_platform != PLATFORM_POWERMAC) prom_instantiate_rtas(); -#ifdef CONFIG_PPC_POWERNV - if (of_platform == PLATFORM_OPAL) - prom_instantiate_opal(); -#endif /* CONFIG_PPC_POWERNV */ - #ifdef CONFIG_PPC64 /* instantiate sml */ prom_instantiate_sml(); @@ -3237,8 +3157,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * * (This must be done after instanciating RTAS) */ - if (of_platform != PLATFORM_POWERMAC && - of_platform != PLATFORM_OPAL) + if (of_platform != PLATFORM_POWERMAC) prom_hold_cpus(); /* @@ -3282,11 +3201,9 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, /* * in case stdin is USB and still active on IBM machines... * Unfortunately quiesce crashes on some powermacs if we have - * closed stdin already (in particular the powerbook 101). It - * appears that the OPAL version of OFW doesn't like it either. + * closed stdin already (in particular the powerbook 101). */ - if (of_platform != PLATFORM_POWERMAC && - of_platform != PLATFORM_OPAL) + if (of_platform != PLATFORM_POWERMAC) prom_close_stdin(); /* @@ -3304,10 +3221,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, hdr = dt_header_start; /* Don't print anything after quiesce under OPAL, it crashes OFW */ - if (of_platform != PLATFORM_OPAL) { - prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase); - prom_debug("->dt_header_start=0x%lx\n", hdr); - } + prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase); + prom_debug("->dt_header_start=0x%lx\n", hdr); #ifdef CONFIG_PPC32 reloc_got2(-offset); @@ -3315,13 +3230,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unreloc_toc(); #endif -#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL - /* OPAL early debug gets the OPAL base & entry in r8 and r9 */ - __start(hdr, kbase, 0, 0, 0, - prom_opal_base, prom_opal_entry); -#else __start(hdr, kbase, 0, 0, 0, 0, 0); -#endif return 0; } diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index acb6b9226352..667df97d2595 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -28,6 +28,18 @@ OBJ="$2" ERROR=0 +function check_section() +{ + file=$1 + section=$2 + size=$(objdump -h -j $section $file 2>/dev/null | awk "\$2 == \"$section\" {print \$3}") + size=${size:-0} + if [ $size -ne 0 ]; then + ERROR=1 + echo "Error: Section $section not empty in prom_init.c" >&2 + fi +} + for UNDEF in $($NM -u $OBJ | awk '{print $2}') do # On 64-bit nm gives us the function descriptors, which have @@ -66,4 +78,8 @@ do fi done +check_section $OBJ .data +check_section $OBJ .bss +check_section $OBJ .init.data + exit $ERROR diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 9667666eb18e..afb819f4ca68 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -297,7 +297,7 @@ int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data) } #endif - if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) { + if (regno < (sizeof(struct user_pt_regs) / sizeof(unsigned long))) { *data = ((unsigned long *)task->thread.regs)[regno]; return 0; } @@ -360,10 +360,10 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset, ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.regs->orig_gpr3, offsetof(struct pt_regs, orig_gpr3), - sizeof(struct pt_regs)); + sizeof(struct user_pt_regs)); if (!ret) ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - sizeof(struct pt_regs), -1); + sizeof(struct user_pt_regs), -1); return ret; } @@ -853,10 +853,10 @@ static int tm_cgpr_get(struct task_struct *target, ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.ckpt_regs.orig_gpr3, offsetof(struct pt_regs, orig_gpr3), - sizeof(struct pt_regs)); + sizeof(struct user_pt_regs)); if (!ret) ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - sizeof(struct pt_regs), -1); + sizeof(struct user_pt_regs), -1); return ret; } @@ -1609,7 +1609,7 @@ static int ppr_get(struct task_struct *target, void *kbuf, void __user *ubuf) { return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.ppr, 0, sizeof(u64)); + &target->thread.regs->ppr, 0, sizeof(u64)); } static int ppr_set(struct task_struct *target, @@ -1618,7 +1618,7 @@ static int ppr_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.ppr, 0, sizeof(u64)); + &target->thread.regs->ppr, 0, sizeof(u64)); } static int dscr_get(struct task_struct *target, @@ -2508,6 +2508,7 @@ void ptrace_disable(struct task_struct *child) { /* make sure the single step bit is not set. */ user_disable_single_step(child); + clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); } #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -3130,7 +3131,7 @@ long arch_ptrace(struct task_struct *child, long request, case PTRACE_GETREGS: /* Get all pt_regs from the child. */ return copy_regset_to_user(child, &user_ppc_native_view, REGSET_GPR, - 0, sizeof(struct pt_regs), + 0, sizeof(struct user_pt_regs), datavp); #ifdef CONFIG_PPC64 @@ -3139,7 +3140,7 @@ long arch_ptrace(struct task_struct *child, long request, case PTRACE_SETREGS: /* Set all gp regs in the child. */ return copy_regset_from_user(child, &user_ppc_native_view, REGSET_GPR, - 0, sizeof(struct pt_regs), + 0, sizeof(struct user_pt_regs), datavp); case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */ @@ -3264,6 +3265,16 @@ long do_syscall_trace_enter(struct pt_regs *regs) { user_exit(); + if (test_thread_flag(TIF_SYSCALL_EMU)) { + ptrace_report_syscall(regs); + /* + * Returning -1 will skip the syscall execution. We want to + * avoid clobbering any register also, thus, not 'gotoing' + * skip label. + */ + return -1; + } + /* * The tracer may decide to abort the syscall, if so tracehook * will return !0. Note that the tracer may also just change @@ -3324,3 +3335,42 @@ void do_syscall_trace_leave(struct pt_regs *regs) user_enter(); } + +void __init pt_regs_check(void) +{ + BUILD_BUG_ON(offsetof(struct pt_regs, gpr) != + offsetof(struct user_pt_regs, gpr)); + BUILD_BUG_ON(offsetof(struct pt_regs, nip) != + offsetof(struct user_pt_regs, nip)); + BUILD_BUG_ON(offsetof(struct pt_regs, msr) != + offsetof(struct user_pt_regs, msr)); + BUILD_BUG_ON(offsetof(struct pt_regs, msr) != + offsetof(struct user_pt_regs, msr)); + BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != + offsetof(struct user_pt_regs, orig_gpr3)); + BUILD_BUG_ON(offsetof(struct pt_regs, ctr) != + offsetof(struct user_pt_regs, ctr)); + BUILD_BUG_ON(offsetof(struct pt_regs, link) != + offsetof(struct user_pt_regs, link)); + BUILD_BUG_ON(offsetof(struct pt_regs, xer) != + offsetof(struct user_pt_regs, xer)); + BUILD_BUG_ON(offsetof(struct pt_regs, ccr) != + offsetof(struct user_pt_regs, ccr)); +#ifdef __powerpc64__ + BUILD_BUG_ON(offsetof(struct pt_regs, softe) != + offsetof(struct user_pt_regs, softe)); +#else + BUILD_BUG_ON(offsetof(struct pt_regs, mq) != + offsetof(struct user_pt_regs, mq)); +#endif + BUILD_BUG_ON(offsetof(struct pt_regs, trap) != + offsetof(struct user_pt_regs, trap)); + BUILD_BUG_ON(offsetof(struct pt_regs, dar) != + offsetof(struct user_pt_regs, dar)); + BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) != + offsetof(struct user_pt_regs, dsisr)); + BUILD_BUG_ON(offsetof(struct pt_regs, result) != + offsetof(struct user_pt_regs, result)); + + BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs)); +} diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 8afd146bc9c7..de35bd8f047f 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -981,7 +981,15 @@ int rtas_ibm_suspend_me(u64 handle) goto out; } - stop_topology_update(); + cpu_hotplug_disable(); + + /* Check if we raced with a CPU-Offline Operation */ + if (unlikely(!cpumask_equal(cpu_present_mask, cpu_online_mask))) { + pr_err("%s: Raced against a concurrent CPU-Offline\n", + __func__); + atomic_set(&data.error, -EBUSY); + goto out_hotplug_enable; + } /* Call function on all CPUs. One of us will make the * rtas call @@ -994,7 +1002,8 @@ int rtas_ibm_suspend_me(u64 handle) if (atomic_read(&data.error) != 0) printk(KERN_ERR "Error doing global join\n"); - start_topology_update(); +out_hotplug_enable: + cpu_hotplug_enable(); /* Take down CPUs not online prior to suspend */ cpuret = rtas_offline_cpus_mask(offline_mask); diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 44d66c33d59d..38cadae4ca4f 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -91,6 +91,8 @@ static char *rtas_event_type(int type) return "Dump Notification Event"; case RTAS_TYPE_PRRN: return "Platform Resource Reassignment Event"; + case RTAS_TYPE_HOTPLUG: + return "Hotplug Event"; } return rtas_type[0]; @@ -150,8 +152,10 @@ static void printk_log_rtas(char *buf, int len) } else { struct rtas_error_log *errlog = (struct rtas_error_log *)buf; - printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n", - error_log_cnt, rtas_event_type(rtas_error_type(errlog)), + printk(RTAS_DEBUG "event: %d, Type: %s (%d), Severity: %d\n", + error_log_cnt, + rtas_event_type(rtas_error_type(errlog)), + rtas_error_type(errlog), rtas_error_severity(errlog)); } } @@ -274,27 +278,16 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) } #ifdef CONFIG_PPC_PSERIES -static s32 prrn_update_scope; - -static void prrn_work_fn(struct work_struct *work) +static void handle_prrn_event(s32 scope) { /* * For PRRN, we must pass the negative of the scope value in * the RTAS event. */ - pseries_devicetree_update(-prrn_update_scope); + pseries_devicetree_update(-scope); numa_update_cpu_topology(false); } -static DECLARE_WORK(prrn_work, prrn_work_fn); - -static void prrn_schedule_update(u32 scope) -{ - flush_work(&prrn_work); - prrn_update_scope = scope; - schedule_work(&prrn_work); -} - static void handle_rtas_event(const struct rtas_error_log *log) { if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled()) @@ -303,7 +296,7 @@ static void handle_rtas_event(const struct rtas_error_log *log) /* For PRRN Events the extended log length is used to denote * the scope for calling rtas update-nodes. */ - prrn_schedule_update(rtas_error_extended_log_length(log)); + handle_prrn_event(rtas_error_extended_log_length(log)); } #else diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 93fa0c99681e..9ca9db707bcb 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -33,6 +33,7 @@ #include <linux/serial_8250.h> #include <linux/percpu.h> #include <linux/memblock.h> +#include <linux/bootmem.h> #include <linux/of_platform.h> #include <linux/hugetlb.h> #include <asm/debugfs.h> @@ -966,6 +967,8 @@ void __init setup_arch(char **cmdline_p) initmem_init(); + early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); + #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6a501b25dd85..faf00222b324 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -243,13 +243,19 @@ static void cpu_ready_for_interrupts(void) } /* - * Fixup HFSCR:TM based on CPU features. The bit is set by our - * early asm init because at that point we haven't updated our - * CPU features from firmware and device-tree. Here we have, - * so let's do it. + * Set HFSCR:TM based on CPU features: + * In the special case of TM no suspend (P9N DD2.1), Linux is + * told TM is off via the dt-ftrs but told to (partially) use + * it via OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED. So HFSCR[TM] + * will be off from dt-ftrs but we need to turn it on for the + * no suspend case. */ - if (cpu_has_feature(CPU_FTR_HVMODE) && !cpu_has_feature(CPU_FTR_TM_COMP)) - mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM); + if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (cpu_has_feature(CPU_FTR_TM_COMP)) + mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) | HFSCR_TM); + else + mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM); + } /* Set IR and DR in PACA MSR */ get_paca()->kernel_msr = MSR_KERNEL; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 61c1fadbc644..3f15edf25a0d 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -34,6 +34,8 @@ #include <linux/topology.h> #include <linux/profile.h> #include <linux/processor.h> +#include <linux/random.h> +#include <linux/stackprotector.h> #include <asm/ptrace.h> #include <linux/atomic.h> @@ -74,14 +76,32 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 }; #endif struct thread_info *secondary_ti; +bool has_big_cores; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); +DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map); DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); +EXPORT_SYMBOL_GPL(has_big_cores); + +#define MAX_THREAD_LIST_SIZE 8 +#define THREAD_GROUP_SHARE_L1 1 +struct thread_groups { + unsigned int property; + unsigned int nr_groups; + unsigned int threads_per_group; + unsigned int thread_list[MAX_THREAD_LIST_SIZE]; +}; + +/* + * On big-cores system, cpu_l1_cache_map for each CPU corresponds to + * the set its siblings that share the L1-cache. + */ +DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map); /* SMP operations for this machine */ struct smp_ops_t *smp_ops; @@ -674,6 +694,185 @@ static void set_cpus_unrelated(int i, int j, } #endif +/* + * parse_thread_groups: Parses the "ibm,thread-groups" device tree + * property for the CPU device node @dn and stores + * the parsed output in the thread_groups + * structure @tg if the ibm,thread-groups[0] + * matches @property. + * + * @dn: The device node of the CPU device. + * @tg: Pointer to a thread group structure into which the parsed + * output of "ibm,thread-groups" is stored. + * @property: The property of the thread-group that the caller is + * interested in. + * + * ibm,thread-groups[0..N-1] array defines which group of threads in + * the CPU-device node can be grouped together based on the property. + * + * ibm,thread-groups[0] tells us the property based on which the + * threads are being grouped together. If this value is 1, it implies + * that the threads in the same group share L1, translation cache. + * + * ibm,thread-groups[1] tells us how many such thread groups exist. + * + * ibm,thread-groups[2] tells us the number of threads in each such + * group. + * + * ibm,thread-groups[3..N-1] is the list of threads identified by + * "ibm,ppc-interrupt-server#s" arranged as per their membership in + * the grouping. + * + * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it + * implies that there are 2 groups of 4 threads each, where each group + * of threads share L1, translation cache. + * + * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8} + * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10, + * 11, 12} structure + * + * Returns 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. + */ +static int parse_thread_groups(struct device_node *dn, + struct thread_groups *tg, + unsigned int property) +{ + int i; + u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE]; + u32 *thread_list; + size_t total_threads; + int ret; + + ret = of_property_read_u32_array(dn, "ibm,thread-groups", + thread_group_array, 3); + if (ret) + return ret; + + tg->property = thread_group_array[0]; + tg->nr_groups = thread_group_array[1]; + tg->threads_per_group = thread_group_array[2]; + if (tg->property != property || + tg->nr_groups < 1 || + tg->threads_per_group < 1) + return -ENODATA; + + total_threads = tg->nr_groups * tg->threads_per_group; + + ret = of_property_read_u32_array(dn, "ibm,thread-groups", + thread_group_array, + 3 + total_threads); + if (ret) + return ret; + + thread_list = &thread_group_array[3]; + + for (i = 0 ; i < total_threads; i++) + tg->thread_list[i] = thread_list[i]; + + return 0; +} + +/* + * get_cpu_thread_group_start : Searches the thread group in tg->thread_list + * that @cpu belongs to. + * + * @cpu : The logical CPU whose thread group is being searched. + * @tg : The thread-group structure of the CPU node which @cpu belongs + * to. + * + * Returns the index to tg->thread_list that points to the the start + * of the thread_group that @cpu belongs to. + * + * Returns -1 if cpu doesn't belong to any of the groups pointed to by + * tg->thread_list. + */ +static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg) +{ + int hw_cpu_id = get_hard_smp_processor_id(cpu); + int i, j; + + for (i = 0; i < tg->nr_groups; i++) { + int group_start = i * tg->threads_per_group; + + for (j = 0; j < tg->threads_per_group; j++) { + int idx = group_start + j; + + if (tg->thread_list[idx] == hw_cpu_id) + return group_start; + } + } + + return -1; +} + +static int init_cpu_l1_cache_map(int cpu) + +{ + struct device_node *dn = of_get_cpu_node(cpu, NULL); + struct thread_groups tg = {.property = 0, + .nr_groups = 0, + .threads_per_group = 0}; + int first_thread = cpu_first_thread_sibling(cpu); + int i, cpu_group_start = -1, err = 0; + + if (!dn) + return -ENODATA; + + err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1); + if (err) + goto out; + + zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu), + GFP_KERNEL, + cpu_to_node(cpu)); + + cpu_group_start = get_cpu_thread_group_start(cpu, &tg); + + if (unlikely(cpu_group_start == -1)) { + WARN_ON_ONCE(1); + err = -ENODATA; + goto out; + } + + for (i = first_thread; i < first_thread + threads_per_core; i++) { + int i_group_start = get_cpu_thread_group_start(i, &tg); + + if (unlikely(i_group_start == -1)) { + WARN_ON_ONCE(1); + err = -ENODATA; + goto out; + } + + if (i_group_start == cpu_group_start) + cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu)); + } + +out: + of_node_put(dn); + return err; +} + +static int init_big_cores(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + int err = init_cpu_l1_cache_map(cpu); + + if (err) + return err; + + zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu), + GFP_KERNEL, + cpu_to_node(cpu)); + } + + has_big_cores = true; + return 0; +} + void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned int cpu; @@ -712,6 +911,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus) cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid)); cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); + init_big_cores(); + if (has_big_cores) { + cpumask_set_cpu(boot_cpuid, + cpu_smallcore_mask(boot_cpuid)); + } + if (smp_ops && smp_ops->probe) smp_ops->probe(); } @@ -995,10 +1200,28 @@ static void remove_cpu_from_masks(int cpu) set_cpus_unrelated(cpu, i, cpu_core_mask); set_cpus_unrelated(cpu, i, cpu_l2_cache_mask); set_cpus_unrelated(cpu, i, cpu_sibling_mask); + if (has_big_cores) + set_cpus_unrelated(cpu, i, cpu_smallcore_mask); } } #endif +static inline void add_cpu_to_smallcore_masks(int cpu) +{ + struct cpumask *this_l1_cache_map = per_cpu(cpu_l1_cache_map, cpu); + int i, first_thread = cpu_first_thread_sibling(cpu); + + if (!has_big_cores) + return; + + cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu)); + + for (i = first_thread; i < first_thread + threads_per_core; i++) { + if (cpu_online(i) && cpumask_test_cpu(i, this_l1_cache_map)) + set_cpus_related(i, cpu, cpu_smallcore_mask); + } +} + static void add_cpu_to_masks(int cpu) { int first_thread = cpu_first_thread_sibling(cpu); @@ -1015,6 +1238,7 @@ static void add_cpu_to_masks(int cpu) if (cpu_online(i)) set_cpus_related(i, cpu, cpu_sibling_mask); + add_cpu_to_smallcore_masks(cpu); /* * Copy the thread sibling mask into the cache sibling mask * and mark any CPUs that share an L2 with this CPU. @@ -1044,6 +1268,7 @@ static bool shared_caches; void start_secondary(void *unused) { unsigned int cpu = smp_processor_id(); + struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; mmgrab(&init_mm); current->active_mm = &init_mm; @@ -1069,11 +1294,13 @@ void start_secondary(void *unused) /* Update topology CPU masks */ add_cpu_to_masks(cpu); + if (has_big_cores) + sibling_mask = cpu_smallcore_mask; /* * Check for any shared caches. Note that this must be done on a * per-core basis because one core in the pair might be disabled. */ - if (!cpumask_equal(cpu_l2_cache_mask(cpu), cpu_sibling_mask(cpu))) + if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu))) shared_caches = true; set_numa_node(numa_cpu_lookup_table[cpu]); @@ -1083,6 +1310,8 @@ void start_secondary(void *unused) notify_cpu_starting(cpu); set_cpu_online(cpu, true); + boot_init_stack_canary(); + local_irq_enable(); /* We can enable ftrace for secondary cpus now */ @@ -1140,6 +1369,13 @@ static const struct cpumask *shared_cache_mask(int cpu) return cpu_l2_cache_mask(cpu); } +#ifdef CONFIG_SCHED_SMT +static const struct cpumask *smallcore_smt_mask(int cpu) +{ + return cpu_smallcore_mask(cpu); +} +#endif + static struct sched_domain_topology_level power9_topology[] = { #ifdef CONFIG_SCHED_SMT { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, @@ -1167,6 +1403,13 @@ void __init smp_cpus_done(unsigned int max_cpus) shared_proc_topology_init(); dump_numa_cpu_topology(); +#ifdef CONFIG_SCHED_SMT + if (has_big_cores) { + pr_info("Using small cores at SMT level\n"); + power9_topology[0].mask = smallcore_smt_mask; + powerpc_topology[0].mask = smallcore_smt_mask; + } +#endif /* * If any CPU detects that it's sharing a cache with another CPU then * use the deeper topology that is aware of this sharing. diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index f83bf6f72cb0..185216becb8b 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -262,7 +262,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR) addi r1,r1,-128 #ifdef CONFIG_PPC_BOOK3S_64 - bl slb_flush_and_rebolt + bl slb_flush_and_restore_bolted #endif bl do_after_copyback addi r1,r1,128 diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 70f145e02487..3646affae963 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -111,6 +111,7 @@ struct clock_event_device decrementer_clockevent = { .rating = 200, .irq = 0, .set_next_event = decrementer_set_next_event, + .set_state_oneshot_stopped = decrementer_shutdown, .set_state_shutdown = decrementer_shutdown, .tick_resume = decrementer_shutdown, .features = CLOCK_EVT_FEAT_ONESHOT | @@ -175,7 +176,7 @@ static void calc_cputime_factors(void) * Read the SPURR on systems that have it, otherwise the PURR, * or if that doesn't exist return the timebase value passed in. */ -static unsigned long read_spurr(unsigned long tb) +static inline unsigned long read_spurr(unsigned long tb) { if (cpu_has_feature(CPU_FTR_SPURR)) return mfspr(SPRN_SPURR); @@ -281,26 +282,17 @@ static inline u64 calculate_stolen_time(u64 stop_tb) * Account time for a transition between system, hard irq * or soft irq state. */ -static unsigned long vtime_delta(struct task_struct *tsk, - unsigned long *stime_scaled, - unsigned long *steal_time) +static unsigned long vtime_delta_scaled(struct cpu_accounting_data *acct, + unsigned long now, unsigned long stime) { - unsigned long now, nowscaled, deltascaled; - unsigned long stime; + unsigned long stime_scaled = 0; +#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME + unsigned long nowscaled, deltascaled; unsigned long utime, utime_scaled; - struct cpu_accounting_data *acct = get_accounting(tsk); - WARN_ON_ONCE(!irqs_disabled()); - - now = mftb(); nowscaled = read_spurr(now); - stime = now - acct->starttime; - acct->starttime = now; deltascaled = nowscaled - acct->startspurr; acct->startspurr = nowscaled; - - *steal_time = calculate_stolen_time(now); - utime = acct->utime - acct->utime_sspurr; acct->utime_sspurr = acct->utime; @@ -314,17 +306,38 @@ static unsigned long vtime_delta(struct task_struct *tsk, * the user ticks get saved up in paca->user_time_scaled to be * used by account_process_tick. */ - *stime_scaled = stime; + stime_scaled = stime; utime_scaled = utime; if (deltascaled != stime + utime) { if (utime) { - *stime_scaled = deltascaled * stime / (stime + utime); - utime_scaled = deltascaled - *stime_scaled; + stime_scaled = deltascaled * stime / (stime + utime); + utime_scaled = deltascaled - stime_scaled; } else { - *stime_scaled = deltascaled; + stime_scaled = deltascaled; } } acct->utime_scaled += utime_scaled; +#endif + + return stime_scaled; +} + +static unsigned long vtime_delta(struct task_struct *tsk, + unsigned long *stime_scaled, + unsigned long *steal_time) +{ + unsigned long now, stime; + struct cpu_accounting_data *acct = get_accounting(tsk); + + WARN_ON_ONCE(!irqs_disabled()); + + now = mftb(); + stime = now - acct->starttime; + acct->starttime = now; + + *stime_scaled = vtime_delta_scaled(acct, now, stime); + + *steal_time = calculate_stolen_time(now); return stime; } @@ -341,7 +354,9 @@ void vtime_account_system(struct task_struct *tsk) if ((tsk->flags & PF_VCPU) && !irq_count()) { acct->gtime += stime; +#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME acct->utime_scaled += stime_scaled; +#endif } else { if (hardirq_count()) acct->hardirq_time += stime; @@ -350,7 +365,9 @@ void vtime_account_system(struct task_struct *tsk) else acct->stime += stime; +#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME acct->stime_scaled += stime_scaled; +#endif } } EXPORT_SYMBOL_GPL(vtime_account_system); @@ -364,6 +381,21 @@ void vtime_account_idle(struct task_struct *tsk) acct->idle_time += stime + steal_time; } +static void vtime_flush_scaled(struct task_struct *tsk, + struct cpu_accounting_data *acct) +{ +#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME + if (acct->utime_scaled) + tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled); + if (acct->stime_scaled) + tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled); + + acct->utime_scaled = 0; + acct->utime_sspurr = 0; + acct->stime_scaled = 0; +#endif +} + /* * Account the whole cputime accumulated in the paca * Must be called with interrupts disabled. @@ -378,14 +410,13 @@ void vtime_flush(struct task_struct *tsk) if (acct->utime) account_user_time(tsk, cputime_to_nsecs(acct->utime)); - if (acct->utime_scaled) - tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled); - if (acct->gtime) account_guest_time(tsk, cputime_to_nsecs(acct->gtime)); - if (acct->steal_time) + if (IS_ENABLED(CONFIG_PPC_SPLPAR) && acct->steal_time) { account_steal_time(cputime_to_nsecs(acct->steal_time)); + acct->steal_time = 0; + } if (acct->idle_time) account_idle_time(cputime_to_nsecs(acct->idle_time)); @@ -393,8 +424,6 @@ void vtime_flush(struct task_struct *tsk) if (acct->stime) account_system_index_time(tsk, cputime_to_nsecs(acct->stime), CPUTIME_SYSTEM); - if (acct->stime_scaled) - tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled); if (acct->hardirq_time) account_system_index_time(tsk, cputime_to_nsecs(acct->hardirq_time), @@ -403,14 +432,12 @@ void vtime_flush(struct task_struct *tsk) account_system_index_time(tsk, cputime_to_nsecs(acct->softirq_time), CPUTIME_SOFTIRQ); + vtime_flush_scaled(tsk, acct); + acct->utime = 0; - acct->utime_scaled = 0; - acct->utime_sspurr = 0; acct->gtime = 0; - acct->steal_time = 0; acct->idle_time = 0; acct->stime = 0; - acct->stime_scaled = 0; acct->hardirq_time = 0; acct->softirq_time = 0; } @@ -984,10 +1011,14 @@ static void register_decrementer_clockevent(int cpu) *dec = decrementer_clockevent; dec->cpumask = cpumask_of(cpu); + clockevents_config_and_register(dec, ppc_tb_freq, 2, decrementer_max); + printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n", dec->name, dec->mult, dec->shift, cpu); - clockevents_register_device(dec); + /* Set values for KVM, see kvm_emulate_dec() */ + decrementer_clockevent.mult = dec->mult; + decrementer_clockevent.shift = dec->shift; } static void enable_large_decrementer(void) @@ -1035,18 +1066,7 @@ static void __init set_decrementer_max(void) static void __init init_decrementer_clockevent(void) { - int cpu = smp_processor_id(); - - clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4); - - decrementer_clockevent.max_delta_ns = - clockevent_delta2ns(decrementer_max, &decrementer_clockevent); - decrementer_clockevent.max_delta_ticks = decrementer_max; - decrementer_clockevent.min_delta_ns = - clockevent_delta2ns(2, &decrementer_clockevent); - decrementer_clockevent.min_delta_ticks = 2; - - register_decrementer_clockevent(cpu); + register_decrementer_clockevent(smp_processor_id()); } void secondary_cpu_time_init(void) diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 7716374786bd..9fabdce255cd 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -92,13 +92,14 @@ _GLOBAL(tm_abort) blr EXPORT_SYMBOL_GPL(tm_abort); -/* void tm_reclaim(struct thread_struct *thread, +/* + * void tm_reclaim(struct thread_struct *thread, * uint8_t cause) * * - Performs a full reclaim. This destroys outstanding - * transactions and updates thread->regs.tm_ckpt_* with the - * original checkpointed state. Note that thread->regs is - * unchanged. + * transactions and updates thread.ckpt_regs, thread.ckfp_state and + * thread.ckvr_state with the original checkpointed state. Note that + * thread->regs is unchanged. * * Purpose is to both abort transactions of, and preserve the state of, * a transactions at a context switch. We preserve/restore both sets of process @@ -163,15 +164,16 @@ _GLOBAL(tm_reclaim) */ TRECLAIM(R4) /* Cause in r4 */ - /* ******************** GPRs ******************** */ - /* Stash the checkpointed r13 away in the scratch SPR and get the real - * paca + /* + * ******************** GPRs ******************** + * Stash the checkpointed r13 in the scratch SPR and get the real paca. */ SET_SCRATCH0(r13) GET_PACA(r13) - /* Stash the checkpointed r1 away in paca tm_scratch and get the real - * stack pointer back + /* + * Stash the checkpointed r1 away in paca->tm_scratch and get the real + * stack pointer back into r1. */ std r1, PACATMSCRATCH(r13) ld r1, PACAR1(r13) @@ -209,14 +211,15 @@ _GLOBAL(tm_reclaim) addi r7, r12, PT_CKPT_REGS /* Thread's ckpt_regs */ - /* Make r7 look like an exception frame so that we - * can use the neat GPRx(n) macros. r7 is NOT a pt_regs ptr! + /* + * Make r7 look like an exception frame so that we can use the neat + * GPRx(n) macros. r7 is NOT a pt_regs ptr! */ subi r7, r7, STACK_FRAME_OVERHEAD /* Sync the userland GPRs 2-12, 14-31 to thread->regs: */ SAVE_GPR(0, r7) /* user r0 */ - SAVE_GPR(2, r7) /* user r2 */ + SAVE_GPR(2, r7) /* user r2 */ SAVE_4GPRS(3, r7) /* user r3-r6 */ SAVE_GPR(8, r7) /* user r8 */ SAVE_GPR(9, r7) /* user r9 */ @@ -237,7 +240,8 @@ _GLOBAL(tm_reclaim) /* ******************** NIP ******************** */ mfspr r3, SPRN_TFHAR std r3, _NIP(r7) /* Returns to failhandler */ - /* The checkpointed NIP is ignored when rescheduling/rechkpting, + /* + * The checkpointed NIP is ignored when rescheduling/rechkpting, * but is used in signal return to 'wind back' to the abort handler. */ @@ -260,12 +264,13 @@ _GLOBAL(tm_reclaim) std r3, THREAD_TM_TAR(r12) std r4, THREAD_TM_DSCR(r12) - /* MSR and flags: We don't change CRs, and we don't need to alter - * MSR. + /* + * MSR and flags: We don't change CRs, and we don't need to alter MSR. */ - /* ******************** FPR/VR/VSRs ************ + /* + * ******************** FPR/VR/VSRs ************ * After reclaiming, capture the checkpointed FPRs/VRs. * * We enabled VEC/FP/VSX in the msr above, so we can execute these @@ -275,7 +280,7 @@ _GLOBAL(tm_reclaim) /* Altivec (VEC/VMX/VR)*/ addi r7, r3, THREAD_CKVRSTATE - SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */ + SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 ckvr_state */ mfvscr v0 li r6, VRSTATE_VSCR stvx v0, r7, r6 @@ -286,12 +291,13 @@ _GLOBAL(tm_reclaim) /* Floating Point (FP) */ addi r7, r3, THREAD_CKFPSTATE - SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */ + SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 ckfp_state */ mffs fr0 stfd fr0,FPSTATE_FPSCR(r7) - /* TM regs, incl TEXASR -- these live in thread_struct. Note they've + /* + * TM regs, incl TEXASR -- these live in thread_struct. Note they've * been updated by the treclaim, to explain to userland the failure * cause (aborted). */ @@ -327,7 +333,7 @@ _GLOBAL(tm_reclaim) blr - /* + /* * void __tm_recheckpoint(struct thread_struct *thread) * - Restore the checkpointed register state saved by tm_reclaim * when we switch_to a process. @@ -343,7 +349,8 @@ _GLOBAL(__tm_recheckpoint) std r2, STK_GOT(r1) stdu r1, -TM_FRAME_SIZE(r1) - /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. + /* + * We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. * This is used for backing up the NVGPRs: */ SAVE_NVGPRS(r1) @@ -352,8 +359,9 @@ _GLOBAL(__tm_recheckpoint) addi r7, r3, PT_CKPT_REGS /* Thread's ckpt_regs */ - /* Make r7 look like an exception frame so that we - * can use the neat GPRx(n) macros. r7 is now NOT a pt_regs ptr! + /* + * Make r7 look like an exception frame so that we can use the neat + * GPRx(n) macros. r7 is now NOT a pt_regs ptr! */ subi r7, r7, STACK_FRAME_OVERHEAD @@ -421,14 +429,15 @@ restore_gprs: REST_NVGPRS(r7) /* GPR14-31 */ - /* Load up PPR and DSCR here so we don't run with user values for long - */ + /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 mtspr SPRN_PPR, r6 - /* Do final sanity check on TEXASR to make sure FS is set. Do this + /* + * Do final sanity check on TEXASR to make sure FS is set. Do this * here before we load up the userspace r1 so any bugs we hit will get - * a call chain */ + * a call chain. + */ mfspr r5, SPRN_TEXASR srdi r5, r5, 16 li r6, (TEXASR_FS)@h @@ -436,8 +445,9 @@ restore_gprs: 1: tdeqi r6, 0 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 - /* Do final sanity check on MSR to make sure we are not transactional - * or suspended + /* + * Do final sanity check on MSR to make sure we are not transactional + * or suspended. */ mfmsr r6 li r5, (MSR_TS_MASK)@higher @@ -453,8 +463,8 @@ restore_gprs: REST_GPR(6, r7) /* - * Store r1 and r5 on the stack so that we can access them - * after we clear MSR RI. + * Store r1 and r5 on the stack so that we can access them after we + * clear MSR RI. */ REST_GPR(5, r7) @@ -484,7 +494,8 @@ restore_gprs: HMT_MEDIUM - /* Our transactional state has now changed. + /* + * Our transactional state has now changed. * * Now just get out of here. Transactional (current) state will be * updated once restore is called on the return path in the _switch-ed diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile index d22d8bafb643..b1725ad3e13d 100644 --- a/arch/powerpc/kernel/trace/Makefile +++ b/arch/powerpc/kernel/trace/Makefile @@ -3,11 +3,9 @@ # Makefile for the powerpc trace subsystem # -subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror - ifdef CONFIG_FUNCTION_TRACER # do not trace tracer code -CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) endif obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_32.o diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 4bfbb54dee51..4bf051d3e21e 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -30,6 +30,16 @@ #ifdef CONFIG_DYNAMIC_FTRACE + +/* + * We generally only have a single long_branch tramp and at most 2 or 3 plt + * tramps generated. But, we don't use the plt tramps currently. We also allot + * 2 tramps after .text and .init.text. So, we only end up with around 3 usable + * tramps in total. Set aside 8 just to be sure. + */ +#define NUM_FTRACE_TRAMPS 8 +static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; + static unsigned int ftrace_call_replace(unsigned long ip, unsigned long addr, int link) { @@ -85,13 +95,16 @@ static int test_24bit_addr(unsigned long ip, unsigned long addr) return create_branch((unsigned int *)ip, addr, 0); } -#ifdef CONFIG_MODULES - static int is_bl_op(unsigned int op) { return (op & 0xfc000003) == 0x48000001; } +static int is_b_op(unsigned int op) +{ + return (op & 0xfc000003) == 0x48000000; +} + static unsigned long find_bl_target(unsigned long ip, unsigned int op) { static int offset; @@ -104,6 +117,7 @@ static unsigned long find_bl_target(unsigned long ip, unsigned int op) return ip + (long)offset; } +#ifdef CONFIG_MODULES #ifdef CONFIG_PPC64 static int __ftrace_make_nop(struct module *mod, @@ -270,6 +284,146 @@ __ftrace_make_nop(struct module *mod, #endif /* PPC64 */ #endif /* CONFIG_MODULES */ +static unsigned long find_ftrace_tramp(unsigned long ip) +{ + int i; + + /* + * We have the compiler generated long_branch tramps at the end + * and we prefer those + */ + for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--) + if (!ftrace_tramps[i]) + continue; + else if (create_branch((void *)ip, ftrace_tramps[i], 0)) + return ftrace_tramps[i]; + + return 0; +} + +static int add_ftrace_tramp(unsigned long tramp) +{ + int i; + + for (i = 0; i < NUM_FTRACE_TRAMPS; i++) + if (!ftrace_tramps[i]) { + ftrace_tramps[i] = tramp; + return 0; + } + + return -1; +} + +/* + * If this is a compiler generated long_branch trampoline (essentially, a + * trampoline that has a branch to _mcount()), we re-write the branch to + * instead go to ftrace_[regs_]caller() and note down the location of this + * trampoline. + */ +static int setup_mcount_compiler_tramp(unsigned long tramp) +{ + int i, op; + unsigned long ptr; + static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS]; + + /* Is this a known long jump tramp? */ + for (i = 0; i < NUM_FTRACE_TRAMPS; i++) + if (!ftrace_tramps[i]) + break; + else if (ftrace_tramps[i] == tramp) + return 0; + + /* Is this a known plt tramp? */ + for (i = 0; i < NUM_FTRACE_TRAMPS; i++) + if (!ftrace_plt_tramps[i]) + break; + else if (ftrace_plt_tramps[i] == tramp) + return -1; + + /* New trampoline -- read where this goes */ + if (probe_kernel_read(&op, (void *)tramp, sizeof(int))) { + pr_debug("Fetching opcode failed.\n"); + return -1; + } + + /* Is this a 24 bit branch? */ + if (!is_b_op(op)) { + pr_debug("Trampoline is not a long branch tramp.\n"); + return -1; + } + + /* lets find where the pointer goes */ + ptr = find_bl_target(tramp, op); + + if (ptr != ppc_global_function_entry((void *)_mcount)) { + pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr); + return -1; + } + + /* Let's re-write the tramp to go to ftrace_[regs_]caller */ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + ptr = ppc_global_function_entry((void *)ftrace_regs_caller); +#else + ptr = ppc_global_function_entry((void *)ftrace_caller); +#endif + if (!create_branch((void *)tramp, ptr, 0)) { + pr_debug("%ps is not reachable from existing mcount tramp\n", + (void *)ptr); + return -1; + } + + if (patch_branch((unsigned int *)tramp, ptr, 0)) { + pr_debug("REL24 out of range!\n"); + return -1; + } + + if (add_ftrace_tramp(tramp)) { + pr_debug("No tramp locations left\n"); + return -1; + } + + return 0; +} + +static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long tramp, ip = rec->ip; + unsigned int op; + + /* Read where this goes */ + if (probe_kernel_read(&op, (void *)ip, sizeof(int))) { + pr_err("Fetching opcode failed.\n"); + return -EFAULT; + } + + /* Make sure that that this is still a 24bit jump */ + if (!is_bl_op(op)) { + pr_err("Not expected bl: opcode is %x\n", op); + return -EINVAL; + } + + /* Let's find where the pointer goes */ + tramp = find_bl_target(ip, op); + + pr_devel("ip:%lx jumps to %lx", ip, tramp); + + if (setup_mcount_compiler_tramp(tramp)) { + /* Are other trampolines reachable? */ + if (!find_ftrace_tramp(ip)) { + pr_err("No ftrace trampolines reachable from %ps\n", + (void *)ip); + return -EINVAL; + } + } + + if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) { + pr_err("Patching NOP failed.\n"); + return -EPERM; + } + + return 0; +} + int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { @@ -286,7 +440,8 @@ int ftrace_make_nop(struct module *mod, old = ftrace_call_replace(ip, addr, 1); new = PPC_INST_NOP; return ftrace_modify_code(ip, old, new); - } + } else if (core_kernel_text(ip)) + return __ftrace_make_nop_kernel(rec, addr); #ifdef CONFIG_MODULES /* @@ -456,6 +611,53 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) #endif /* CONFIG_PPC64 */ #endif /* CONFIG_MODULES */ +static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned int op; + void *ip = (void *)rec->ip; + unsigned long tramp, entry, ptr; + + /* Make sure we're being asked to patch branch to a known ftrace addr */ + entry = ppc_global_function_entry((void *)ftrace_caller); + ptr = ppc_global_function_entry((void *)addr); + + if (ptr != entry) { +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + entry = ppc_global_function_entry((void *)ftrace_regs_caller); + if (ptr != entry) { +#endif + pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr); + return -EINVAL; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + } +#endif + } + + /* Make sure we have a nop */ + if (probe_kernel_read(&op, ip, sizeof(op))) { + pr_err("Unable to read ftrace location %p\n", ip); + return -EFAULT; + } + + if (op != PPC_INST_NOP) { + pr_err("Unexpected call sequence at %p: %x\n", ip, op); + return -EINVAL; + } + + tramp = find_ftrace_tramp((unsigned long)ip); + if (!tramp) { + pr_err("No ftrace trampolines reachable from %ps\n", ip); + return -EINVAL; + } + + if (patch_branch(ip, tramp, BRANCH_SET_LINK)) { + pr_err("Error patching branch to ftrace tramp!\n"); + return -EINVAL; + } + + return 0; +} + int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long ip = rec->ip; @@ -471,7 +673,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) old = PPC_INST_NOP; new = ftrace_call_replace(ip, addr, 1); return ftrace_modify_code(ip, old, new); - } + } else if (core_kernel_text(ip)) + return __ftrace_make_call_kernel(rec, addr); #ifdef CONFIG_MODULES /* @@ -603,6 +806,12 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, old = ftrace_call_replace(ip, old_addr, 1); new = ftrace_call_replace(ip, addr, 1); return ftrace_modify_code(ip, old, new); + } else if (core_kernel_text(ip)) { + /* + * We always patch out of range locations to go to the regs + * variant, so there is nothing to do here + */ + return 0; } #ifdef CONFIG_MODULES @@ -654,10 +863,54 @@ void arch_ftrace_update_code(int command) ftrace_modify_all_code(command); } +#ifdef CONFIG_PPC64 +#define PACATOC offsetof(struct paca_struct, kernel_toc) + +#define PPC_LO(v) ((v) & 0xffff) +#define PPC_HI(v) (((v) >> 16) & 0xffff) +#define PPC_HA(v) PPC_HI ((v) + 0x8000) + +extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; + +int __init ftrace_dyn_arch_init(void) +{ + int i; + unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init }; + u32 stub_insns[] = { + 0xe98d0000 | PACATOC, /* ld r12,PACATOC(r13) */ + 0x3d8c0000, /* addis r12,r12,<high> */ + 0x398c0000, /* addi r12,r12,<low> */ + 0x7d8903a6, /* mtctr r12 */ + 0x4e800420, /* bctr */ + }; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + unsigned long addr = ppc_global_function_entry((void *)ftrace_regs_caller); +#else + unsigned long addr = ppc_global_function_entry((void *)ftrace_caller); +#endif + long reladdr = addr - kernel_toc_addr(); + + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + pr_err("Address of %ps out of range of kernel_toc.\n", + (void *)addr); + return -1; + } + + for (i = 0; i < 2; i++) { + memcpy(tramp[i], stub_insns, sizeof(stub_insns)); + tramp[i][1] |= PPC_HA(reladdr); + tramp[i][2] |= PPC_LO(reladdr); + add_ftrace_tramp((unsigned long)tramp[i]); + } + + return 0; +} +#else int __init ftrace_dyn_arch_init(void) { return 0; } +#endif #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_64.S index e25f77c10a72..1782af2d1496 100644 --- a/arch/powerpc/kernel/trace/ftrace_64.S +++ b/arch/powerpc/kernel/trace/ftrace_64.S @@ -14,6 +14,18 @@ #include <asm/ppc-opcode.h> #include <asm/export.h> +.pushsection ".tramp.ftrace.text","aw",@progbits; +.globl ftrace_tramp_text +ftrace_tramp_text: + .space 64 +.popsection + +.pushsection ".tramp.ftrace.init","aw",@progbits; +.globl ftrace_tramp_init +ftrace_tramp_init: + .space 64 +.popsection + _GLOBAL(mcount) _GLOBAL(_mcount) EXPORT_SYMBOL(_mcount) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index ab1bd06d7c44..9a86572db1ef 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -247,8 +247,6 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, mdelay(MSEC_PER_SEC); } - if (in_interrupt()) - panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); do_exit(signr); @@ -535,10 +533,10 @@ int machine_check_e500mc(struct pt_regs *regs) printk("Caused by (from MCSR=%lx): ", reason); if (reason & MCSR_MCP) - printk("Machine Check Signal\n"); + pr_cont("Machine Check Signal\n"); if (reason & MCSR_ICPERR) { - printk("Instruction Cache Parity Error\n"); + pr_cont("Instruction Cache Parity Error\n"); /* * This is recoverable by invalidating the i-cache. @@ -556,7 +554,7 @@ int machine_check_e500mc(struct pt_regs *regs) } if (reason & MCSR_DCPERR_MC) { - printk("Data Cache Parity Error\n"); + pr_cont("Data Cache Parity Error\n"); /* * In write shadow mode we auto-recover from the error, but it @@ -575,38 +573,38 @@ int machine_check_e500mc(struct pt_regs *regs) } if (reason & MCSR_L2MMU_MHIT) { - printk("Hit on multiple TLB entries\n"); + pr_cont("Hit on multiple TLB entries\n"); recoverable = 0; } if (reason & MCSR_NMI) - printk("Non-maskable interrupt\n"); + pr_cont("Non-maskable interrupt\n"); if (reason & MCSR_IF) { - printk("Instruction Fetch Error Report\n"); + pr_cont("Instruction Fetch Error Report\n"); recoverable = 0; } if (reason & MCSR_LD) { - printk("Load Error Report\n"); + pr_cont("Load Error Report\n"); recoverable = 0; } if (reason & MCSR_ST) { - printk("Store Error Report\n"); + pr_cont("Store Error Report\n"); recoverable = 0; } if (reason & MCSR_LDG) { - printk("Guarded Load Error Report\n"); + pr_cont("Guarded Load Error Report\n"); recoverable = 0; } if (reason & MCSR_TLBSYNC) - printk("Simultaneous tlbsync operations\n"); + pr_cont("Simultaneous tlbsync operations\n"); if (reason & MCSR_BSL2_ERR) { - printk("Level 2 Cache Error\n"); + pr_cont("Level 2 Cache Error\n"); recoverable = 0; } @@ -616,7 +614,7 @@ int machine_check_e500mc(struct pt_regs *regs) addr = mfspr(SPRN_MCAR); addr |= (u64)mfspr(SPRN_MCARU) << 32; - printk("Machine Check %s Address: %#llx\n", + pr_cont("Machine Check %s Address: %#llx\n", reason & MCSR_MEA ? "Effective" : "Physical", addr); } @@ -640,29 +638,29 @@ int machine_check_e500(struct pt_regs *regs) printk("Caused by (from MCSR=%lx): ", reason); if (reason & MCSR_MCP) - printk("Machine Check Signal\n"); + pr_cont("Machine Check Signal\n"); if (reason & MCSR_ICPERR) - printk("Instruction Cache Parity Error\n"); + pr_cont("Instruction Cache Parity Error\n"); if (reason & MCSR_DCP_PERR) - printk("Data Cache Push Parity Error\n"); + pr_cont("Data Cache Push Parity Error\n"); if (reason & MCSR_DCPERR) - printk("Data Cache Parity Error\n"); + pr_cont("Data Cache Parity Error\n"); if (reason & MCSR_BUS_IAERR) - printk("Bus - Instruction Address Error\n"); + pr_cont("Bus - Instruction Address Error\n"); if (reason & MCSR_BUS_RAERR) - printk("Bus - Read Address Error\n"); + pr_cont("Bus - Read Address Error\n"); if (reason & MCSR_BUS_WAERR) - printk("Bus - Write Address Error\n"); + pr_cont("Bus - Write Address Error\n"); if (reason & MCSR_BUS_IBERR) - printk("Bus - Instruction Data Error\n"); + pr_cont("Bus - Instruction Data Error\n"); if (reason & MCSR_BUS_RBERR) - printk("Bus - Read Data Bus Error\n"); + pr_cont("Bus - Read Data Bus Error\n"); if (reason & MCSR_BUS_WBERR) - printk("Bus - Write Data Bus Error\n"); + pr_cont("Bus - Write Data Bus Error\n"); if (reason & MCSR_BUS_IPERR) - printk("Bus - Instruction Parity Error\n"); + pr_cont("Bus - Instruction Parity Error\n"); if (reason & MCSR_BUS_RPERR) - printk("Bus - Read Parity Error\n"); + pr_cont("Bus - Read Parity Error\n"); return 0; } @@ -680,19 +678,19 @@ int machine_check_e200(struct pt_regs *regs) printk("Caused by (from MCSR=%lx): ", reason); if (reason & MCSR_MCP) - printk("Machine Check Signal\n"); + pr_cont("Machine Check Signal\n"); if (reason & MCSR_CP_PERR) - printk("Cache Push Parity Error\n"); + pr_cont("Cache Push Parity Error\n"); if (reason & MCSR_CPERR) - printk("Cache Parity Error\n"); + pr_cont("Cache Parity Error\n"); if (reason & MCSR_EXCP_ERR) - printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n"); + pr_cont("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n"); if (reason & MCSR_BUS_IRERR) - printk("Bus - Read Bus Error on instruction fetch\n"); + pr_cont("Bus - Read Bus Error on instruction fetch\n"); if (reason & MCSR_BUS_DRERR) - printk("Bus - Read Bus Error on data load\n"); + pr_cont("Bus - Read Bus Error on data load\n"); if (reason & MCSR_BUS_WRERR) - printk("Bus - Write Bus Error on buffered store or cache line push\n"); + pr_cont("Bus - Write Bus Error on buffered store or cache line push\n"); return 0; } @@ -705,30 +703,30 @@ int machine_check_generic(struct pt_regs *regs) printk("Caused by (from SRR1=%lx): ", reason); switch (reason & 0x601F0000) { case 0x80000: - printk("Machine check signal\n"); + pr_cont("Machine check signal\n"); break; case 0: /* for 601 */ case 0x40000: case 0x140000: /* 7450 MSS error and TEA */ - printk("Transfer error ack signal\n"); + pr_cont("Transfer error ack signal\n"); break; case 0x20000: - printk("Data parity error signal\n"); + pr_cont("Data parity error signal\n"); break; case 0x10000: - printk("Address parity error signal\n"); + pr_cont("Address parity error signal\n"); break; case 0x20000000: - printk("L1 Data Cache error\n"); + pr_cont("L1 Data Cache error\n"); break; case 0x40000000: - printk("L1 Instruction Cache error\n"); + pr_cont("L1 Instruction Cache error\n"); break; case 0x00100000: - printk("L2 data cache parity error\n"); + pr_cont("L2 data cache parity error\n"); break; default: - printk("Unknown values in msr\n"); + pr_cont("Unknown values in msr\n"); } return 0; } @@ -741,9 +739,7 @@ void machine_check_exception(struct pt_regs *regs) if (!nested) nmi_enter(); - /* 64s accounts the mce in machine_check_early when in HVMODE */ - if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !cpu_has_feature(CPU_FTR_HVMODE)) - __this_cpu_inc(irq_stat.mce_exceptions); + __this_cpu_inc(irq_stat.mce_exceptions); add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); @@ -767,12 +763,17 @@ void machine_check_exception(struct pt_regs *regs) if (check_io_access(regs)) goto bail; - die("Machine check", regs, SIGBUS); - /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) nmi_panic(regs, "Unrecoverable Machine check"); + if (!nested) + nmi_exit(); + + die("Machine check", regs, SIGBUS); + + return; + bail: if (!nested) nmi_exit(); @@ -1433,7 +1434,7 @@ void program_check_exception(struct pt_regs *regs) goto bail; } else { printk(KERN_EMERG "Unexpected TM Bad Thing exception " - "at %lx (msr 0x%x)\n", regs->nip, reason); + "at %lx (msr 0x%lx)\n", regs->nip, regs->msr); die("Unrecoverable exception", regs, SIGABRT); } } @@ -1547,14 +1548,6 @@ void StackOverflow(struct pt_regs *regs) panic("kernel stack overflow"); } -void nonrecoverable_exception(struct pt_regs *regs) -{ - printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n", - regs->nip, regs->msr); - debugger(regs); - die("nonrecoverable exception", regs, SIGKILL); -} - void kernel_fp_unavailable_exception(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); @@ -1750,16 +1743,20 @@ void fp_unavailable_tm(struct pt_regs *regs) * checkpointed FP registers need to be loaded. */ tm_reclaim_current(TM_CAUSE_FAC_UNAV); - /* Reclaim didn't save out any FPRs to transact_fprs. */ + + /* + * Reclaim initially saved out bogus (lazy) FPRs to ckfp_state, and + * then it was overwrite by the thr->fp_state by tm_reclaim_thread(). + * + * At this point, ck{fp,vr}_state contains the exact values we want to + * recheckpoint. + */ /* Enable FP for the task: */ current->thread.load_fp = 1; - /* This loads and recheckpoints the FP registers from - * thread.fpr[]. They will remain in registers after the - * checkpoint so we don't need to reload them after. - * If VMX is in use, the VRs now hold checkpointed values, - * so we don't want to load the VRs from the thread_struct. + /* + * Recheckpoint all the checkpointed ckpt, ck{fp, vr}_state registers. */ tm_recheckpoint(¤t->thread); } @@ -2086,8 +2083,8 @@ void SPEFloatingPointRoundException(struct pt_regs *regs) */ void unrecoverable_exception(struct pt_regs *regs) { - printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n", - regs->trap, regs->nip); + pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n", + regs->trap, regs->nip, regs->msr); die("Unrecoverable exception", regs, SIGABRT); } NOKPROBE_SYMBOL(unrecoverable_exception); diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S index 3745113fcc65..2a7eb5452aba 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -37,6 +37,7 @@ data_page_branch: mtlr r0 addi r3, r3, __kernel_datapage_offset-data_page_branch lwz r0,0(r3) + .cfi_restore lr add r3,r0,r3 blr .cfi_endproc diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index 769c2624e0a6..1e0bc5955a40 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -139,6 +139,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) */ 99: li r0,__NR_clock_gettime + .cfi_restore lr sc blr .cfi_endproc diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index abf17feffe40..bf9668691511 100644 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -37,6 +37,7 @@ data_page_branch: mtlr r0 addi r3, r3, __kernel_datapage_offset-data_page_branch lwz r0,0(r3) + .cfi_restore lr add r3,r0,r3 blr .cfi_endproc diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index c002adcc694c..a4ed9edfd5f0 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -169,6 +169,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) */ 99: li r0,__NR_clock_gettime + .cfi_restore lr sc blr .cfi_endproc diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 105a976323aa..434581bcd5b4 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -4,6 +4,9 @@ #else #define PROVIDE32(x) PROVIDE(x) #endif + +#define BSS_FIRST_SECTIONS *(.bss.prominit) + #include <asm/page.h> #include <asm-generic/vmlinux.lds.h> #include <asm/cache.h> @@ -99,6 +102,9 @@ SECTIONS #endif /* careful! __ftr_alt_* sections need to be close to .text */ *(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text); +#ifdef CONFIG_PPC64 + *(.tramp.ftrace.text); +#endif SCHED_TEXT CPUIDLE_TEXT LOCK_TEXT @@ -181,7 +187,15 @@ SECTIONS */ . = ALIGN(STRICT_ALIGN_SIZE); __init_begin = .; - INIT_TEXT_SECTION(PAGE_SIZE) :kernel + . = ALIGN(PAGE_SIZE); + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { + _sinittext = .; + INIT_TEXT + _einittext = .; +#ifdef CONFIG_PPC64 + *(.tramp.ftrace.init); +#endif + } :kernel /* .exit.text is discarded at runtime, not link time, * to deal with references from __bug_table diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index e814f40ab836..64f1135e7732 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -3,8 +3,6 @@ # Makefile for Kernel-based Virtual Machine module # -subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror - ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm KVM := ../../../virt/kvm diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 670286808928..3bf9fc6fd36c 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -3,8 +3,6 @@ # Makefile for ppc-specific library files.. # -subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror - ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE) @@ -14,6 +12,8 @@ obj-y += string.o alloc.o code-patching.o feature-fixups.o obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o strlen_32.o +obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o + # See corresponding test in arch/powerpc/Makefile # 64-bit linker creates .sfpr on demand for final link (vmlinux), # so it is only needed for modules, and only for older linkers which diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 5ffee298745f..89502cbccb1b 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -98,8 +98,7 @@ static int map_patch_area(void *addr, unsigned long text_poke_addr) else pfn = __pa_symbol(addr) >> PAGE_SHIFT; - err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), - pgprot_val(PAGE_KERNEL)); + err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL); pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err); if (err) diff --git a/arch/powerpc/lib/error-inject.c b/arch/powerpc/lib/error-inject.c new file mode 100644 index 000000000000..407b992fb02f --- /dev/null +++ b/arch/powerpc/lib/error-inject.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <linux/error-injection.h> +#include <linux/kprobes.h> +#include <linux/uaccess.h> + +void override_function_with_return(struct pt_regs *regs) +{ + /* + * Emulate 'blr'. 'regs' represents the state on entry of a predefined + * function in the kernel/module, captured on a kprobe. We don't need + * to worry about 32-bit userspace on a 64-bit kernel. + */ + regs->nip = regs->link; +} +NOKPROBE_SYMBOL(override_function_with_return); diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S index ec531de99996..3c3be02f33b7 100644 --- a/arch/powerpc/lib/mem_64.S +++ b/arch/powerpc/lib/mem_64.S @@ -40,7 +40,7 @@ _GLOBAL(memset) .Lms: PPC_MTOCRF(1,r0) mr r6,r3 blt cr1,8f - beq+ 3f /* if already 8-byte aligned */ + beq 3f /* if already 8-byte aligned */ subf r5,r0,r5 bf 31,1f stb r4,0(r6) @@ -85,7 +85,7 @@ _GLOBAL(memset) addi r6,r6,8 8: cmpwi r5,0 PPC_MTOCRF(1,r5) - beqlr+ + beqlr bf 29,9f stw r4,0(r6) addi r6,r6,4 diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index cf77d755246d..36484a2ef915 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -67,7 +67,7 @@ void __init MMU_init_hw(void) /* PIN up to the 3 first 8Mb after IMMR in DTLB table */ #ifdef CONFIG_PIN_TLB_DATA unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; - unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY; + unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY; #ifdef CONFIG_PIN_TLB_IMMR int i = 29; #else @@ -91,11 +91,10 @@ static void __init mmu_mapin_immr(void) { unsigned long p = PHYS_IMMR_BASE; unsigned long v = VIRT_IMMR_BASE; - unsigned long f = pgprot_val(PAGE_KERNEL_NCG); int offset; for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE) - map_kernel_page(v + offset, p + offset, f); + map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG); } /* Address of instructions to patch */ diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index cdf6a9960046..ca96e7be4d0e 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -3,10 +3,10 @@ # Makefile for the linux ppc-specific parts of the memory manager. # -subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror - ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) +CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE) + obj-y := fault.o mem.o pgtable.o mmap.o \ init_$(BITS).o pgtable_$(BITS).o \ init-common.o mmu_context.o drmem.o @@ -15,7 +15,7 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o -obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o +obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o @@ -43,5 +43,12 @@ obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o +ifdef CONFIG_PPC_PTDUMP +obj-$(CONFIG_4xx) += dump_linuxpagetables-generic.o +obj-$(CONFIG_PPC_8xx) += dump_linuxpagetables-8xx.o +obj-$(CONFIG_PPC_BOOK3E_MMU) += dump_linuxpagetables-generic.o +obj-$(CONFIG_PPC_BOOK3S_32) += dump_linuxpagetables-generic.o +obj-$(CONFIG_PPC_BOOK3S_64) += dump_linuxpagetables-book3s64.o +endif obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index 382528475433..b6e7b5952ab5 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -228,7 +228,7 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t do { SetPageReserved(page); map_kernel_page(vaddr, page_to_phys(page), - pgprot_val(pgprot_noncached(PAGE_KERNEL))); + pgprot_noncached(PAGE_KERNEL)); page++; vaddr += PAGE_SIZE; } while (size -= PAGE_SIZE); diff --git a/arch/powerpc/mm/dump_linuxpagetables-8xx.c b/arch/powerpc/mm/dump_linuxpagetables-8xx.c new file mode 100644 index 000000000000..ab9e3f24db2f --- /dev/null +++ b/arch/powerpc/mm/dump_linuxpagetables-8xx.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * From split of dump_linuxpagetables.c + * Copyright 2016, Rashmica Gupta, IBM Corp. + * + */ +#include <linux/kernel.h> +#include <asm/pgtable.h> + +#include "dump_linuxpagetables.h" + +static const struct flag_info flag_array[] = { + { + .mask = _PAGE_SH, + .val = 0, + .set = "user", + .clear = " ", + }, { + .mask = _PAGE_RO | _PAGE_NA, + .val = 0, + .set = "rw", + }, { + .mask = _PAGE_RO | _PAGE_NA, + .val = _PAGE_RO, + .set = "r ", + }, { + .mask = _PAGE_RO | _PAGE_NA, + .val = _PAGE_NA, + .set = " ", + }, { + .mask = _PAGE_EXEC, + .val = _PAGE_EXEC, + .set = " X ", + .clear = " ", + }, { + .mask = _PAGE_PRESENT, + .val = _PAGE_PRESENT, + .set = "present", + .clear = " ", + }, { + .mask = _PAGE_GUARDED, + .val = _PAGE_GUARDED, + .set = "guarded", + .clear = " ", + }, { + .mask = _PAGE_DIRTY, + .val = _PAGE_DIRTY, + .set = "dirty", + .clear = " ", + }, { + .mask = _PAGE_ACCESSED, + .val = _PAGE_ACCESSED, + .set = "accessed", + .clear = " ", + }, { + .mask = _PAGE_NO_CACHE, + .val = _PAGE_NO_CACHE, + .set = "no cache", + .clear = " ", + }, { + .mask = _PAGE_SPECIAL, + .val = _PAGE_SPECIAL, + .set = "special", + } +}; + +struct pgtable_level pg_level[5] = { + { + }, { /* pgd */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* pud */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* pmd */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* pte */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, +}; diff --git a/arch/powerpc/mm/dump_linuxpagetables-book3s64.c b/arch/powerpc/mm/dump_linuxpagetables-book3s64.c new file mode 100644 index 000000000000..ed6fcf78256e --- /dev/null +++ b/arch/powerpc/mm/dump_linuxpagetables-book3s64.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * From split of dump_linuxpagetables.c + * Copyright 2016, Rashmica Gupta, IBM Corp. + * + */ +#include <linux/kernel.h> +#include <asm/pgtable.h> + +#include "dump_linuxpagetables.h" + +static const struct flag_info flag_array[] = { + { + .mask = _PAGE_PRIVILEGED, + .val = 0, + .set = "user", + .clear = " ", + }, { + .mask = _PAGE_READ, + .val = _PAGE_READ, + .set = "r", + .clear = " ", + }, { + .mask = _PAGE_WRITE, + .val = _PAGE_WRITE, + .set = "w", + .clear = " ", + }, { + .mask = _PAGE_EXEC, + .val = _PAGE_EXEC, + .set = " X ", + .clear = " ", + }, { + .mask = _PAGE_PTE, + .val = _PAGE_PTE, + .set = "pte", + .clear = " ", + }, { + .mask = _PAGE_PRESENT, + .val = _PAGE_PRESENT, + .set = "valid", + .clear = " ", + }, { + .mask = _PAGE_PRESENT | _PAGE_INVALID, + .val = 0, + .set = " ", + .clear = "present", + }, { + .mask = H_PAGE_HASHPTE, + .val = H_PAGE_HASHPTE, + .set = "hpte", + .clear = " ", + }, { + .mask = _PAGE_DIRTY, + .val = _PAGE_DIRTY, + .set = "dirty", + .clear = " ", + }, { + .mask = _PAGE_ACCESSED, + .val = _PAGE_ACCESSED, + .set = "accessed", + .clear = " ", + }, { + .mask = _PAGE_NON_IDEMPOTENT, + .val = _PAGE_NON_IDEMPOTENT, + .set = "non-idempotent", + .clear = " ", + }, { + .mask = _PAGE_TOLERANT, + .val = _PAGE_TOLERANT, + .set = "tolerant", + .clear = " ", + }, { + .mask = H_PAGE_BUSY, + .val = H_PAGE_BUSY, + .set = "busy", + }, { +#ifdef CONFIG_PPC_64K_PAGES + .mask = H_PAGE_COMBO, + .val = H_PAGE_COMBO, + .set = "combo", + }, { + .mask = H_PAGE_4K_PFN, + .val = H_PAGE_4K_PFN, + .set = "4K_pfn", + }, { +#else /* CONFIG_PPC_64K_PAGES */ + .mask = H_PAGE_F_GIX, + .val = H_PAGE_F_GIX, + .set = "f_gix", + .is_val = true, + .shift = H_PAGE_F_GIX_SHIFT, + }, { + .mask = H_PAGE_F_SECOND, + .val = H_PAGE_F_SECOND, + .set = "f_second", + }, { +#endif /* CONFIG_PPC_64K_PAGES */ + .mask = _PAGE_SPECIAL, + .val = _PAGE_SPECIAL, + .set = "special", + } +}; + +struct pgtable_level pg_level[5] = { + { + }, { /* pgd */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* pud */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* pmd */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* pte */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, +}; diff --git a/arch/powerpc/mm/dump_linuxpagetables-generic.c b/arch/powerpc/mm/dump_linuxpagetables-generic.c new file mode 100644 index 000000000000..1e3829ec1348 --- /dev/null +++ b/arch/powerpc/mm/dump_linuxpagetables-generic.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * From split of dump_linuxpagetables.c + * Copyright 2016, Rashmica Gupta, IBM Corp. + * + */ +#include <linux/kernel.h> +#include <asm/pgtable.h> + +#include "dump_linuxpagetables.h" + +static const struct flag_info flag_array[] = { + { + .mask = _PAGE_USER, + .val = _PAGE_USER, + .set = "user", + .clear = " ", + }, { + .mask = _PAGE_RW, + .val = _PAGE_RW, + .set = "rw", + .clear = "r ", + }, { +#ifndef CONFIG_PPC_BOOK3S_32 + .mask = _PAGE_EXEC, + .val = _PAGE_EXEC, + .set = " X ", + .clear = " ", + }, { +#endif + .mask = _PAGE_PRESENT, + .val = _PAGE_PRESENT, + .set = "present", + .clear = " ", + }, { + .mask = _PAGE_GUARDED, + .val = _PAGE_GUARDED, + .set = "guarded", + .clear = " ", + }, { + .mask = _PAGE_DIRTY, + .val = _PAGE_DIRTY, + .set = "dirty", + .clear = " ", + }, { + .mask = _PAGE_ACCESSED, + .val = _PAGE_ACCESSED, + .set = "accessed", + .clear = " ", + }, { + .mask = _PAGE_WRITETHRU, + .val = _PAGE_WRITETHRU, + .set = "write through", + .clear = " ", + }, { + .mask = _PAGE_NO_CACHE, + .val = _PAGE_NO_CACHE, + .set = "no cache", + .clear = " ", + }, { + .mask = _PAGE_SPECIAL, + .val = _PAGE_SPECIAL, + .set = "special", + } +}; + +struct pgtable_level pg_level[5] = { + { + }, { /* pgd */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* pud */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* pmd */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, { /* pte */ + .flag = flag_array, + .num = ARRAY_SIZE(flag_array), + }, +}; diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c index 876e2a3c79f2..2b74f8adf4d0 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -27,6 +27,8 @@ #include <asm/page.h> #include <asm/pgalloc.h> +#include "dump_linuxpagetables.h" + #ifdef CONFIG_PPC32 #define KERN_VIRT_START 0 #endif @@ -101,159 +103,6 @@ static struct addr_marker address_markers[] = { { -1, NULL }, }; -struct flag_info { - u64 mask; - u64 val; - const char *set; - const char *clear; - bool is_val; - int shift; -}; - -static const struct flag_info flag_array[] = { - { - .mask = _PAGE_USER | _PAGE_PRIVILEGED, - .val = _PAGE_USER, - .set = "user", - .clear = " ", - }, { - .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA, - .val = _PAGE_RW, - .set = "rw", - }, { - .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA, - .val = _PAGE_RO, - .set = "ro", - }, { -#if _PAGE_NA != 0 - .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA, - .val = _PAGE_RO, - .set = "na", - }, { -#endif - .mask = _PAGE_EXEC, - .val = _PAGE_EXEC, - .set = " X ", - .clear = " ", - }, { - .mask = _PAGE_PTE, - .val = _PAGE_PTE, - .set = "pte", - .clear = " ", - }, { - .mask = _PAGE_PRESENT, - .val = _PAGE_PRESENT, - .set = "present", - .clear = " ", - }, { -#ifdef CONFIG_PPC_BOOK3S_64 - .mask = H_PAGE_HASHPTE, - .val = H_PAGE_HASHPTE, -#else - .mask = _PAGE_HASHPTE, - .val = _PAGE_HASHPTE, -#endif - .set = "hpte", - .clear = " ", - }, { -#ifndef CONFIG_PPC_BOOK3S_64 - .mask = _PAGE_GUARDED, - .val = _PAGE_GUARDED, - .set = "guarded", - .clear = " ", - }, { -#endif - .mask = _PAGE_DIRTY, - .val = _PAGE_DIRTY, - .set = "dirty", - .clear = " ", - }, { - .mask = _PAGE_ACCESSED, - .val = _PAGE_ACCESSED, - .set = "accessed", - .clear = " ", - }, { -#ifndef CONFIG_PPC_BOOK3S_64 - .mask = _PAGE_WRITETHRU, - .val = _PAGE_WRITETHRU, - .set = "write through", - .clear = " ", - }, { -#endif -#ifndef CONFIG_PPC_BOOK3S_64 - .mask = _PAGE_NO_CACHE, - .val = _PAGE_NO_CACHE, - .set = "no cache", - .clear = " ", - }, { -#else - .mask = _PAGE_NON_IDEMPOTENT, - .val = _PAGE_NON_IDEMPOTENT, - .set = "non-idempotent", - .clear = " ", - }, { - .mask = _PAGE_TOLERANT, - .val = _PAGE_TOLERANT, - .set = "tolerant", - .clear = " ", - }, { -#endif -#ifdef CONFIG_PPC_BOOK3S_64 - .mask = H_PAGE_BUSY, - .val = H_PAGE_BUSY, - .set = "busy", - }, { -#ifdef CONFIG_PPC_64K_PAGES - .mask = H_PAGE_COMBO, - .val = H_PAGE_COMBO, - .set = "combo", - }, { - .mask = H_PAGE_4K_PFN, - .val = H_PAGE_4K_PFN, - .set = "4K_pfn", - }, { -#else /* CONFIG_PPC_64K_PAGES */ - .mask = H_PAGE_F_GIX, - .val = H_PAGE_F_GIX, - .set = "f_gix", - .is_val = true, - .shift = H_PAGE_F_GIX_SHIFT, - }, { - .mask = H_PAGE_F_SECOND, - .val = H_PAGE_F_SECOND, - .set = "f_second", - }, { -#endif /* CONFIG_PPC_64K_PAGES */ -#endif - .mask = _PAGE_SPECIAL, - .val = _PAGE_SPECIAL, - .set = "special", - } -}; - -struct pgtable_level { - const struct flag_info *flag; - size_t num; - u64 mask; -}; - -static struct pgtable_level pg_level[] = { - { - }, { /* pgd */ - .flag = flag_array, - .num = ARRAY_SIZE(flag_array), - }, { /* pud */ - .flag = flag_array, - .num = ARRAY_SIZE(flag_array), - }, { /* pmd */ - .flag = flag_array, - .num = ARRAY_SIZE(flag_array), - }, { /* pte */ - .flag = flag_array, - .num = ARRAY_SIZE(flag_array), - }, -}; - static void dump_flag_info(struct pg_state *st, const struct flag_info *flag, u64 pte, int num) { @@ -418,12 +267,13 @@ static void walk_pagetables(struct pg_state *st) unsigned int i; unsigned long addr; + addr = st->start_address; + /* * Traverse the linux pagetable structure and dump pages that are in * the hash pagetable. */ - for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { - addr = KERN_VIRT_START + i * PGDIR_SIZE; + for (i = 0; i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { if (!pgd_none(*pgd) && !pgd_huge(*pgd)) /* pgd exists */ walk_pud(st, pgd, addr); @@ -472,9 +322,14 @@ static int ptdump_show(struct seq_file *m, void *v) { struct pg_state st = { .seq = m, - .start_address = KERN_VIRT_START, .marker = address_markers, }; + + if (radix_enabled()) + st.start_address = PAGE_OFFSET; + else + st.start_address = KERN_VIRT_START; + /* Traverse kernel page tables */ walk_pagetables(&st); note_page(&st, 0, 0, 0); diff --git a/arch/powerpc/mm/dump_linuxpagetables.h b/arch/powerpc/mm/dump_linuxpagetables.h new file mode 100644 index 000000000000..5d513636de73 --- /dev/null +++ b/arch/powerpc/mm/dump_linuxpagetables.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/types.h> + +struct flag_info { + u64 mask; + u64 val; + const char *set; + const char *clear; + bool is_val; + int shift; +}; + +struct pgtable_level { + const struct flag_info *flag; + size_t num; + u64 mask; +}; + +extern struct pgtable_level pg_level[5]; diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 729f02df8290..aaa28fd918fe 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -115,6 +115,8 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) tlbiel_hash_set_isa300(0, is, 0, 2, 1); asm volatile("ptesync": : :"memory"); + + asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); } void hash__tlbiel_all(unsigned int action) @@ -140,8 +142,6 @@ void hash__tlbiel_all(unsigned int action) tlbiel_all_isa206(POWER7_TLB_SETS, is); else WARN(1, "%s called on pre-POWER7 CPU\n", __func__); - - asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); } static inline unsigned long ___tlbie(unsigned long vpn, int psize, diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index f23a89d8e4ce..0cc7fbc3bd1c 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1001,9 +1001,9 @@ void __init hash__early_init_mmu(void) * 4k use hugepd format, so for hash set then to * zero */ - __pmd_val_bits = 0; - __pud_val_bits = 0; - __pgd_val_bits = 0; + __pmd_val_bits = HASH_PMD_VAL_BITS; + __pud_val_bits = HASH_PUD_VAL_BITS; + __pgd_val_bits = HASH_PGD_VAL_BITS; __kernel_virt_start = H_KERN_VIRT_START; __kernel_virt_size = H_KERN_VIRT_SIZE; @@ -1125,7 +1125,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { copy_mm_to_paca(mm); - slb_flush_and_rebolt(); + slb_flush_and_restore_bolted(); } } #endif /* CONFIG_PPC_64K_PAGES */ @@ -1197,7 +1197,7 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, if (user_region) { if (psize != get_paca_psize(ea)) { copy_mm_to_paca(mm); - slb_flush_and_rebolt(); + slb_flush_and_restore_bolted(); } } else if (get_paca()->vmalloc_sllp != mmu_psize_defs[mmu_vmalloc_psize].sllp) { @@ -1482,7 +1482,7 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea) #endif void hash_preload(struct mm_struct *mm, unsigned long ea, - unsigned long access, unsigned long trap) + bool is_exec, unsigned long trap) { int hugepage_shift; unsigned long vsid; @@ -1490,6 +1490,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, pte_t *ptep; unsigned long flags; int rc, ssize, update_flags = 0; + unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0); BUG_ON(REGION_ID(ea) != USER_REGION_ID); diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 01f213d2bcb9..dfbc3b32f09b 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -51,6 +51,12 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, new_pmd |= _PAGE_DIRTY; } while (!pmd_xchg(pmdp, __pmd(old_pmd), __pmd(new_pmd))); + /* + * Make sure this is thp or devmap entry + */ + if (!(old_pmd & (H_PAGE_THP_HUGE | _PAGE_DEVMAP))) + return 0; + rflags = htab_convert_pte_flags(new_pmd); #if 0 diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index b320f5097a06..2e6a8f9345d3 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -62,6 +62,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, new_pte |= _PAGE_DIRTY; } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); + /* Make sure this is a hugetlb entry */ + if (old_pte & (H_PAGE_THP_HUGE | _PAGE_DEVMAP)) + return 0; + rflags = htab_convert_pte_flags(new_pte); if (unlikely(mmu_psize == MMU_PAGE_16G)) offset = PTRS_PER_PUD; diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index e87f9ef9115b..a7226ed9cae6 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -19,6 +19,7 @@ #include <linux/moduleparam.h> #include <linux/swap.h> #include <linux/swapops.h> +#include <linux/kmemleak.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlb.h> @@ -95,7 +96,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, break; else { #ifdef CONFIG_PPC_BOOK3S_64 - *hpdp = __hugepd(__pa(new) | + *hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS | (shift_to_mmu_psize(pshift) << 2)); #elif defined(CONFIG_PPC_8xx) *hpdp = __hugepd(__pa(new) | _PMD_USER | @@ -112,6 +113,8 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, for (i = i - 1 ; i >= 0; i--, hpdp--) *hpdp = __hugepd(0); kmem_cache_free(cachep, new); + } else { + kmemleak_ignore(new); } spin_unlock(ptl); return 0; @@ -837,8 +840,12 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, ret_pte = (pte_t *) pmdp; goto out; } - - if (pmd_huge(pmd)) { + /* + * pmd_large check below will handle the swap pmd pte + * we need to do both the check because they are config + * dependent. + */ + if (pmd_huge(pmd) || pmd_large(pmd)) { ret_pte = (pte_t *) pmdp; goto out; } else if (is_hugepd(__hugepd(pmd_val(pmd)))) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 04ccb274a620..dd949d6649a2 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -309,11 +309,11 @@ void __init paging_init(void) unsigned long end = __fix_to_virt(FIX_HOLE); for (; v < end; v += PAGE_SIZE) - map_kernel_page(v, 0, 0); /* XXX gross */ + map_kernel_page(v, 0, __pgprot(0)); /* XXX gross */ #endif #ifdef CONFIG_HIGHMEM - map_kernel_page(PKMAP_BASE, 0, 0); /* XXX gross */ + map_kernel_page(PKMAP_BASE, 0, __pgprot(0)); /* XXX gross */ pkmap_page_table = virt_to_kpte(PKMAP_BASE); kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN)); @@ -509,7 +509,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, * We don't need to worry about _PAGE_PRESENT here because we are * called with either mm->page_table_lock held or ptl lock held */ - unsigned long access, trap; + unsigned long trap; + bool is_exec; if (radix_enabled()) { prefetch((void *)address); @@ -531,16 +532,16 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL; switch (trap) { case 0x300: - access = 0UL; + is_exec = false; break; case 0x400: - access = _PAGE_EXEC; + is_exec = true; break; default: return; } - hash_preload(vma->vm_mm, address, access, trap); + hash_preload(vma->vm_mm, address, is_exec, trap); #endif /* CONFIG_PPC_STD_MMU */ #if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \ && defined(CONFIG_HUGETLB_PAGE) diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index dbd8f762140b..510f103d7813 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -53,6 +53,8 @@ int hash__alloc_context_id(void) } EXPORT_SYMBOL_GPL(hash__alloc_context_id); +void slb_setup_new_exec(void); + static int hash__init_new_context(struct mm_struct *mm) { int index; @@ -84,6 +86,13 @@ static int hash__init_new_context(struct mm_struct *mm) return index; } +void hash__setup_new_exec(void) +{ + slice_setup_new_exec(); + + slb_setup_new_exec(); +} + static int radix__init_new_context(struct mm_struct *mm) { unsigned long rts_field; diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index e5d779eed181..8574fbbc45e0 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -22,6 +22,7 @@ #include <asm/mmu.h> #ifdef CONFIG_PPC_MMU_NOHASH +#include <asm/trace.h> /* * On 40x and 8xx, we directly inline tlbia and tlbivax @@ -30,10 +31,12 @@ static inline void _tlbil_all(void) { asm volatile ("sync; tlbia; isync" : : : "memory"); + trace_tlbia(MMU_NO_CONTEXT); } static inline void _tlbil_pid(unsigned int pid) { asm volatile ("sync; tlbia; isync" : : : "memory"); + trace_tlbia(pid); } #define _tlbil_pid_noind(pid) _tlbil_pid(pid) @@ -55,6 +58,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid, unsigned int tsize, unsigned int ind) { asm volatile ("tlbie %0; sync" : : "r" (address) : "memory"); + trace_tlbie(0, 0, address, pid, 0, 0, 0); } #elif defined(CONFIG_PPC_BOOK3E) extern void _tlbil_va(unsigned long address, unsigned int pid, @@ -82,7 +86,7 @@ static inline void _tlbivax_bcast(unsigned long address, unsigned int pid, #else /* CONFIG_PPC_MMU_NOHASH */ extern void hash_preload(struct mm_struct *mm, unsigned long ea, - unsigned long access, unsigned long trap); + bool is_exec, unsigned long trap); extern void _tlbie(unsigned long address); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 055b211b7126..693ae1c1acba 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1521,6 +1521,10 @@ int start_topology_update(void) } } + pr_info("Starting topology update%s%s\n", + (prrn_enabled ? " prrn_enabled" : ""), + (vphn_enabled ? " vphn_enabled" : "")); + return rc; } @@ -1542,6 +1546,8 @@ int stop_topology_update(void) rc = del_timer_sync(&topology_timer); } + pr_info("Stopping topology update\n"); + return rc; } diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/pgtable-book3e.c index a2298930f990..e0ccf36714b2 100644 --- a/arch/powerpc/mm/pgtable-book3e.c +++ b/arch/powerpc/mm/pgtable-book3e.c @@ -42,7 +42,7 @@ int __meminit vmemmap_create_mapping(unsigned long start, * thus must have the low bits clear */ for (i = 0; i < page_size; i += PAGE_SIZE) - BUG_ON(map_kernel_page(start + i, phys, flags)); + BUG_ON(map_kernel_page(start + i, phys, __pgprot(flags))); return 0; } @@ -70,7 +70,7 @@ static __ref void *early_alloc_pgtable(unsigned long size) * map_kernel_page adds an entry to the ioremap page table * and adds an entry to the HPT, possibly bolting it */ -int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags) +int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) { pgd_t *pgdp; pud_t *pudp; @@ -89,8 +89,6 @@ int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags) ptep = pte_alloc_kernel(pmdp, ea); if (!ptep) return -ENOMEM; - set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, - __pgprot(flags))); } else { pgdp = pgd_offset_k(ea); #ifndef __PAGETABLE_PUD_FOLDED @@ -113,9 +111,8 @@ int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags) pmd_populate_kernel(&init_mm, pmdp, ptep); } ptep = pte_offset_kernel(pmdp, ea); - set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, - __pgprot(flags))); } + set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot)); smp_wmb(); return 0; diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index 01d7c0f7c4f0..9f93c9f985c5 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -69,9 +69,14 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { #ifdef CONFIG_DEBUG_VM - WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp))); + /* + * Make sure hardware valid bit is not set. We don't do + * tlb flush for this update. + */ + + WARN_ON(pte_hw_valid(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp))); assert_spin_locked(pmd_lockptr(mm, pmdp)); - WARN_ON(!(pmd_trans_huge(pmd) || pmd_devmap(pmd))); + WARN_ON(!(pmd_large(pmd) || pmd_devmap(pmd))); #endif trace_hugepage_set_pmd(addr, pmd_val(pmd)); return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); @@ -106,7 +111,7 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, { unsigned long old_pmd; - old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0); + old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); /* * This ensures that generic code that rely on IRQ disabling diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c index 692bfc9e372c..c08d49046a96 100644 --- a/arch/powerpc/mm/pgtable-hash64.c +++ b/arch/powerpc/mm/pgtable-hash64.c @@ -142,7 +142,7 @@ void hash__vmemmap_remove_mapping(unsigned long start, * map_kernel_page adds an entry to the ioremap page table * and adds an entry to the HPT, possibly bolting it */ -int hash__map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags) +int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) { pgd_t *pgdp; pud_t *pudp; @@ -161,8 +161,7 @@ int hash__map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flag ptep = pte_alloc_kernel(pmdp, ea); if (!ptep) return -ENOMEM; - set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, - __pgprot(flags))); + set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot)); } else { /* * If the mm subsystem is not fully up, we cannot create a @@ -170,7 +169,7 @@ int hash__map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flag * entry in the hardware page table. * */ - if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags, + if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, pgprot_val(prot), mmu_io_psize, mmu_kernel_ssize)) { printk(KERN_ERR "Failed to do bolted mapping IO " "memory at %016lx !\n", pa); diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index c879979faa73..931156069a81 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -241,9 +241,8 @@ void radix__mark_initmem_nx(void) } #endif /* CONFIG_STRICT_KERNEL_RWX */ -static inline void __meminit print_mapping(unsigned long start, - unsigned long end, - unsigned long size) +static inline void __meminit +print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec) { char buf[10]; @@ -252,7 +251,17 @@ static inline void __meminit print_mapping(unsigned long start, string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf)); - pr_info("Mapped 0x%016lx-0x%016lx with %s pages\n", start, end, buf); + pr_info("Mapped 0x%016lx-0x%016lx with %s pages%s\n", start, end, buf, + exec ? " (exec)" : ""); +} + +static unsigned long next_boundary(unsigned long addr, unsigned long end) +{ +#ifdef CONFIG_STRICT_KERNEL_RWX + if (addr < __pa_symbol(__init_begin)) + return __pa_symbol(__init_begin); +#endif + return end; } static int __meminit create_physical_mapping(unsigned long start, @@ -260,13 +269,8 @@ static int __meminit create_physical_mapping(unsigned long start, int nid) { unsigned long vaddr, addr, mapping_size = 0; + bool prev_exec, exec = false; pgprot_t prot; - unsigned long max_mapping_size; -#ifdef CONFIG_STRICT_KERNEL_RWX - int split_text_mapping = 1; -#else - int split_text_mapping = 0; -#endif int psize; start = _ALIGN_UP(start, PAGE_SIZE); @@ -274,14 +278,12 @@ static int __meminit create_physical_mapping(unsigned long start, unsigned long gap, previous_size; int rc; - gap = end - addr; + gap = next_boundary(addr, end) - addr; previous_size = mapping_size; - max_mapping_size = PUD_SIZE; + prev_exec = exec; -retry: if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE && - mmu_psize_defs[MMU_PAGE_1G].shift && - PUD_SIZE <= max_mapping_size) { + mmu_psize_defs[MMU_PAGE_1G].shift) { mapping_size = PUD_SIZE; psize = MMU_PAGE_1G; } else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE && @@ -293,32 +295,21 @@ retry: psize = mmu_virtual_psize; } - if (split_text_mapping && (mapping_size == PUD_SIZE) && - (addr <= __pa_symbol(__init_begin)) && - (addr + mapping_size) >= __pa_symbol(_stext)) { - max_mapping_size = PMD_SIZE; - goto retry; - } - - if (split_text_mapping && (mapping_size == PMD_SIZE) && - (addr <= __pa_symbol(__init_begin)) && - (addr + mapping_size) >= __pa_symbol(_stext)) { - mapping_size = PAGE_SIZE; - psize = mmu_virtual_psize; - } - - if (mapping_size != previous_size) { - print_mapping(start, addr, previous_size); - start = addr; - } - vaddr = (unsigned long)__va(addr); if (overlaps_kernel_text(vaddr, vaddr + mapping_size) || - overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) + overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) { prot = PAGE_KERNEL_X; - else + exec = true; + } else { prot = PAGE_KERNEL; + exec = false; + } + + if (mapping_size != previous_size || exec != prev_exec) { + print_mapping(start, addr, previous_size, prev_exec); + start = addr; + } rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end); if (rc) @@ -327,7 +318,7 @@ retry: update_page_count(psize, 1); } - print_mapping(start, addr, mapping_size); + print_mapping(start, addr, mapping_size, exec); return 0; } diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index d71c7777669c..010e1c616cb2 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -44,20 +44,13 @@ static inline int is_exec_fault(void) static inline int pte_looks_normal(pte_t pte) { -#if defined(CONFIG_PPC_BOOK3S_64) - if ((pte_val(pte) & (_PAGE_PRESENT | _PAGE_SPECIAL)) == _PAGE_PRESENT) { + if (pte_present(pte) && !pte_special(pte)) { if (pte_ci(pte)) return 0; if (pte_user(pte)) return 1; } return 0; -#else - return (pte_val(pte) & - (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER | - _PAGE_PRIVILEGED)) == - (_PAGE_PRESENT | _PAGE_USER); -#endif } static struct page *maybe_pte_to_page(pte_t pte) @@ -73,7 +66,7 @@ static struct page *maybe_pte_to_page(pte_t pte) return page; } -#if defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 +#ifdef CONFIG_PPC_BOOK3S /* Server-style MMU handles coherency when hashing if HW exec permission * is supposed per page (currently 64-bit only). If not, then, we always @@ -106,7 +99,7 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, return pte; } -#else /* defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 */ +#else /* CONFIG_PPC_BOOK3S */ /* Embedded type MMU with HW exec support. This is a bit more complicated * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so @@ -117,7 +110,7 @@ static pte_t set_pte_filter(pte_t pte) struct page *pg; /* No exec permission in the first place, move on */ - if (!(pte_val(pte) & _PAGE_EXEC) || !pte_looks_normal(pte)) + if (!pte_exec(pte) || !pte_looks_normal(pte)) return pte; /* If you set _PAGE_EXEC on weird pages you're on your own */ @@ -137,7 +130,7 @@ static pte_t set_pte_filter(pte_t pte) } /* Else, we filter out _PAGE_EXEC */ - return __pte(pte_val(pte) & ~_PAGE_EXEC); + return pte_exprotect(pte); } static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, @@ -150,7 +143,7 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, * if necessary. Also if _PAGE_EXEC is already set, same deal, * we just bail out */ - if (dirty || (pte_val(pte) & _PAGE_EXEC) || !is_exec_fault()) + if (dirty || pte_exec(pte) || !is_exec_fault()) return pte; #ifdef CONFIG_DEBUG_VM @@ -176,10 +169,10 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, set_bit(PG_arch_1, &pg->flags); bail: - return __pte(pte_val(pte) | _PAGE_EXEC); + return pte_mkexec(pte); } -#endif /* !(defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0) */ +#endif /* CONFIG_PPC_BOOK3S */ /* * set_pte stores a linux PTE into the linux page table. @@ -188,14 +181,13 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { /* - * When handling numa faults, we already have the pte marked - * _PAGE_PRESENT, but we can be sure that it is not in hpte. - * Hence we can use set_pte_at for them. + * Make sure hardware valid bit is not set. We don't do + * tlb flush for this update. */ - VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep)); + VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); /* Add the pte bit when trying to set a pte */ - pte = __pte(pte_val(pte) | _PAGE_PTE); + pte = pte_mkpte(pte); /* Note: mm->context.id might not yet have been assigned as * this context might not have been activated yet when this diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 120a49bfb9c6..5877f5aa8f5d 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -76,56 +76,69 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) void __iomem * ioremap(phys_addr_t addr, unsigned long size) { - return __ioremap_caller(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED, - __builtin_return_address(0)); + pgprot_t prot = pgprot_noncached(PAGE_KERNEL); + + return __ioremap_caller(addr, size, prot, __builtin_return_address(0)); } EXPORT_SYMBOL(ioremap); void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size) { - return __ioremap_caller(addr, size, _PAGE_NO_CACHE, - __builtin_return_address(0)); + pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL); + + return __ioremap_caller(addr, size, prot, __builtin_return_address(0)); } EXPORT_SYMBOL(ioremap_wc); void __iomem * +ioremap_wt(phys_addr_t addr, unsigned long size) +{ + pgprot_t prot = pgprot_cached_wthru(PAGE_KERNEL); + + return __ioremap_caller(addr, size, prot, __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_wt); + +void __iomem * +ioremap_coherent(phys_addr_t addr, unsigned long size) +{ + pgprot_t prot = pgprot_cached(PAGE_KERNEL); + + return __ioremap_caller(addr, size, prot, __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_coherent); + +void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags) { + pte_t pte = __pte(flags); + /* writeable implies dirty for kernel addresses */ - if ((flags & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO) - flags |= _PAGE_DIRTY | _PAGE_HWWRITE; + if (pte_write(pte)) + pte = pte_mkdirty(pte); /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ - flags &= ~(_PAGE_USER | _PAGE_EXEC); - flags |= _PAGE_PRIVILEGED; + pte = pte_exprotect(pte); + pte = pte_mkprivileged(pte); - return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); + return __ioremap_caller(addr, size, pte_pgprot(pte), __builtin_return_address(0)); } EXPORT_SYMBOL(ioremap_prot); void __iomem * __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) { - return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); + return __ioremap_caller(addr, size, __pgprot(flags), __builtin_return_address(0)); } void __iomem * -__ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags, - void *caller) +__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller) { unsigned long v, i; phys_addr_t p; int err; - /* Make sure we have the base flags */ - if ((flags & _PAGE_PRESENT) == 0) - flags |= pgprot_val(PAGE_KERNEL); - - /* Non-cacheable page cannot be coherent */ - if (flags & _PAGE_NO_CACHE) - flags &= ~_PAGE_COHERENT; - /* * Choose an address to map it to. * Once the vmalloc system is running, we use it. @@ -183,7 +196,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags, err = 0; for (i = 0; i < size && err == 0; i += PAGE_SIZE) - err = map_kernel_page(v+i, p+i, flags); + err = map_kernel_page(v + i, p + i, prot); if (err) { if (slab_is_available()) vunmap((void *)v); @@ -209,7 +222,7 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap); -int map_kernel_page(unsigned long va, phys_addr_t pa, int flags) +int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) { pmd_t *pd; pte_t *pg; @@ -224,10 +237,8 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, int flags) /* The PTE should never be already set nor present in the * hash table */ - BUG_ON((pte_val(*pg) & (_PAGE_PRESENT | _PAGE_HASHPTE)) && - flags); - set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, - __pgprot(flags))); + BUG_ON((pte_present(*pg) | pte_hashpte(*pg)) && pgprot_val(prot)); + set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, prot)); } smp_wmb(); return err; @@ -238,7 +249,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, int flags) */ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) { - unsigned long v, s, f; + unsigned long v, s; phys_addr_t p; int ktext; @@ -248,11 +259,10 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) for (; s < top; s += PAGE_SIZE) { ktext = ((char *)v >= _stext && (char *)v < etext) || ((char *)v >= _sinittext && (char *)v < _einittext); - f = ktext ? pgprot_val(PAGE_KERNEL_TEXT) : pgprot_val(PAGE_KERNEL); - map_kernel_page(v, p, f); + map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL); #ifdef CONFIG_PPC_STD_MMU_32 if (ktext) - hash_preload(&init_mm, v, 0, 0x300); + hash_preload(&init_mm, v, false, 0x300); #endif v += PAGE_SIZE; p += PAGE_SIZE; diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 53e9eeecd5d4..fb1375c07e8c 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -113,17 +113,12 @@ unsigned long ioremap_bot = IOREMAP_BASE; * __ioremap_at - Low level function to establish the page tables * for an IO mapping */ -void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size, - unsigned long flags) +void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot) { unsigned long i; - /* Make sure we have the base flags */ - if ((flags & _PAGE_PRESENT) == 0) - flags |= pgprot_val(PAGE_KERNEL); - /* We don't support the 4K PFN hack with ioremap */ - if (flags & H_PAGE_4K_PFN) + if (pgprot_val(prot) & H_PAGE_4K_PFN) return NULL; WARN_ON(pa & ~PAGE_MASK); @@ -131,7 +126,7 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size, WARN_ON(size & ~PAGE_MASK); for (i = 0; i < size; i += PAGE_SIZE) - if (map_kernel_page((unsigned long)ea+i, pa+i, flags)) + if (map_kernel_page((unsigned long)ea + i, pa + i, prot)) return NULL; return (void __iomem *)ea; @@ -152,7 +147,7 @@ void __iounmap_at(void *ea, unsigned long size) } void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size, - unsigned long flags, void *caller) + pgprot_t prot, void *caller) { phys_addr_t paligned; void __iomem *ret; @@ -182,11 +177,11 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size, return NULL; area->phys_addr = paligned; - ret = __ioremap_at(paligned, area->addr, size, flags); + ret = __ioremap_at(paligned, area->addr, size, prot); if (!ret) vunmap(area->addr); } else { - ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags); + ret = __ioremap_at(paligned, (void *)ioremap_bot, size, prot); if (ret) ioremap_bot += size; } @@ -199,49 +194,59 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size, void __iomem * __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) { - return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); + return __ioremap_caller(addr, size, __pgprot(flags), __builtin_return_address(0)); } void __iomem * ioremap(phys_addr_t addr, unsigned long size) { - unsigned long flags = pgprot_val(pgprot_noncached(__pgprot(0))); + pgprot_t prot = pgprot_noncached(PAGE_KERNEL); void *caller = __builtin_return_address(0); if (ppc_md.ioremap) - return ppc_md.ioremap(addr, size, flags, caller); - return __ioremap_caller(addr, size, flags, caller); + return ppc_md.ioremap(addr, size, prot, caller); + return __ioremap_caller(addr, size, prot, caller); } void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size) { - unsigned long flags = pgprot_val(pgprot_noncached_wc(__pgprot(0))); + pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL); + void *caller = __builtin_return_address(0); + + if (ppc_md.ioremap) + return ppc_md.ioremap(addr, size, prot, caller); + return __ioremap_caller(addr, size, prot, caller); +} + +void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size) +{ + pgprot_t prot = pgprot_cached(PAGE_KERNEL); void *caller = __builtin_return_address(0); if (ppc_md.ioremap) - return ppc_md.ioremap(addr, size, flags, caller); - return __ioremap_caller(addr, size, flags, caller); + return ppc_md.ioremap(addr, size, prot, caller); + return __ioremap_caller(addr, size, prot, caller); } void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags) { + pte_t pte = __pte(flags); void *caller = __builtin_return_address(0); /* writeable implies dirty for kernel addresses */ - if (flags & _PAGE_WRITE) - flags |= _PAGE_DIRTY; + if (pte_write(pte)) + pte = pte_mkdirty(pte); /* we don't want to let _PAGE_EXEC leak out */ - flags &= ~_PAGE_EXEC; + pte = pte_exprotect(pte); /* * Force kernel mapping. */ - flags &= ~_PAGE_USER; - flags |= _PAGE_PRIVILEGED; + pte = pte_mkprivileged(pte); if (ppc_md.ioremap) - return ppc_md.ioremap(addr, size, flags, caller); - return __ioremap_caller(addr, size, flags, caller); + return ppc_md.ioremap(addr, size, pte_pgprot(pte), caller); + return __ioremap_caller(addr, size, pte_pgprot(pte), caller); } @@ -306,7 +311,7 @@ struct page *pud_page(pud_t pud) */ struct page *pmd_page(pmd_t pmd) { - if (pmd_trans_huge(pmd) || pmd_huge(pmd) || pmd_devmap(pmd)) + if (pmd_large(pmd) || pmd_huge(pmd) || pmd_devmap(pmd)) return pte_page(pmd_pte(pmd)); return virt_to_page(pmd_page_vaddr(pmd)); } diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index bea6c544e38f..38a793bfca37 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -163,7 +163,7 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys, * Preload a translation in the hash table */ void hash_preload(struct mm_struct *mm, unsigned long ea, - unsigned long access, unsigned long trap) + bool is_exec, unsigned long trap) { pmd_t *pmd; diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 9f574e59d178..c3fdf2969d9f 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -14,6 +14,7 @@ * 2 of the License, or (at your option) any later version. */ +#include <asm/asm-prototypes.h> #include <asm/pgtable.h> #include <asm/mmu.h> #include <asm/mmu_context.h> @@ -30,11 +31,10 @@ enum slb_index { LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */ - VMALLOC_INDEX = 1, /* Kernel virtual map (0xd000000000000000) */ - KSTACK_INDEX = 2, /* Kernel stack map */ + KSTACK_INDEX = 1, /* Kernel stack map */ }; -extern void slb_allocate(unsigned long ea); +static long slb_allocate_user(struct mm_struct *mm, unsigned long ea); #define slb_esid_mask(ssize) \ (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T) @@ -45,13 +45,43 @@ static inline unsigned long mk_esid_data(unsigned long ea, int ssize, return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index; } -static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, +static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize, unsigned long flags) { - return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags | + return (vsid << slb_vsid_shift(ssize)) | flags | ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); } +static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, + unsigned long flags) +{ + return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); +} + +static void assert_slb_exists(unsigned long ea) +{ +#ifdef CONFIG_DEBUG_VM + unsigned long tmp; + + WARN_ON_ONCE(mfmsr() & MSR_EE); + + asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0"); + WARN_ON(tmp == 0); +#endif +} + +static void assert_slb_notexists(unsigned long ea) +{ +#ifdef CONFIG_DEBUG_VM + unsigned long tmp; + + WARN_ON_ONCE(mfmsr() & MSR_EE); + + asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0"); + WARN_ON(tmp != 0); +#endif +} + static inline void slb_shadow_update(unsigned long ea, int ssize, unsigned long flags, enum slb_index index) @@ -84,6 +114,7 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize, */ slb_shadow_update(ea, ssize, flags, index); + assert_slb_notexists(ea); asm volatile("slbmte %0,%1" : : "r" (mk_vsid_data(ea, ssize, flags)), "r" (mk_esid_data(ea, ssize, index)) @@ -105,17 +136,20 @@ void __slb_restore_bolted_realmode(void) : "r" (be64_to_cpu(p->save_area[index].vsid)), "r" (be64_to_cpu(p->save_area[index].esid))); } + + assert_slb_exists(local_paca->kstack); } /* * Insert the bolted entries into an empty SLB. - * This is not the same as rebolt because the bolted segments are not - * changed, just loaded from the shadow area. */ void slb_restore_bolted_realmode(void) { __slb_restore_bolted_realmode(); get_paca()->slb_cache_ptr = 0; + + get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; + get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap; } /* @@ -123,113 +157,262 @@ void slb_restore_bolted_realmode(void) */ void slb_flush_all_realmode(void) { - /* - * This flushes all SLB entries including 0, so it must be realmode. - */ asm volatile("slbmte %0,%0; slbia" : : "r" (0)); } -static void __slb_flush_and_rebolt(void) +/* + * This flushes non-bolted entries, it can be run in virtual mode. Must + * be called with interrupts disabled. + */ +void slb_flush_and_restore_bolted(void) { - /* If you change this make sure you change SLB_NUM_BOLTED - * and PR KVM appropriately too. */ - unsigned long linear_llp, vmalloc_llp, lflags, vflags; - unsigned long ksp_esid_data, ksp_vsid_data; + struct slb_shadow *p = get_slb_shadow(); - linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; - vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; - lflags = SLB_VSID_KERNEL | linear_llp; - vflags = SLB_VSID_KERNEL | vmalloc_llp; + BUILD_BUG_ON(SLB_NUM_BOLTED != 2); - ksp_esid_data = mk_esid_data(get_paca()->kstack, mmu_kernel_ssize, KSTACK_INDEX); - if ((ksp_esid_data & ~0xfffffffUL) <= PAGE_OFFSET) { - ksp_esid_data &= ~SLB_ESID_V; - ksp_vsid_data = 0; - slb_shadow_clear(KSTACK_INDEX); - } else { - /* Update stack entry; others don't change */ - slb_shadow_update(get_paca()->kstack, mmu_kernel_ssize, lflags, KSTACK_INDEX); - ksp_vsid_data = - be64_to_cpu(get_slb_shadow()->save_area[KSTACK_INDEX].vsid); - } + WARN_ON(!irqs_disabled()); + + /* + * We can't take a PMU exception in the following code, so hard + * disable interrupts. + */ + hard_irq_disable(); - /* We need to do this all in asm, so we're sure we don't touch - * the stack between the slbia and rebolting it. */ asm volatile("isync\n" "slbia\n" - /* Slot 1 - first VMALLOC segment */ - "slbmte %0,%1\n" - /* Slot 2 - kernel stack */ - "slbmte %2,%3\n" - "isync" - :: "r"(mk_vsid_data(VMALLOC_START, mmu_kernel_ssize, vflags)), - "r"(mk_esid_data(VMALLOC_START, mmu_kernel_ssize, VMALLOC_INDEX)), - "r"(ksp_vsid_data), - "r"(ksp_esid_data) + "slbmte %0, %1\n" + "isync\n" + :: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)), + "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid)) : "memory"); + assert_slb_exists(get_paca()->kstack); + + get_paca()->slb_cache_ptr = 0; + + get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; + get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap; } -void slb_flush_and_rebolt(void) +void slb_save_contents(struct slb_entry *slb_ptr) { + int i; + unsigned long e, v; - WARN_ON(!irqs_disabled()); + /* Save slb_cache_ptr value. */ + get_paca()->slb_save_cache_ptr = get_paca()->slb_cache_ptr; + + if (!slb_ptr) + return; + + for (i = 0; i < mmu_slb_size; i++) { + asm volatile("slbmfee %0,%1" : "=r" (e) : "r" (i)); + asm volatile("slbmfev %0,%1" : "=r" (v) : "r" (i)); + slb_ptr->esid = e; + slb_ptr->vsid = v; + slb_ptr++; + } +} + +void slb_dump_contents(struct slb_entry *slb_ptr) +{ + int i, n; + unsigned long e, v; + unsigned long llp; + + if (!slb_ptr) + return; + + pr_err("SLB contents of cpu 0x%x\n", smp_processor_id()); + pr_err("Last SLB entry inserted at slot %d\n", get_paca()->stab_rr); + + for (i = 0; i < mmu_slb_size; i++) { + e = slb_ptr->esid; + v = slb_ptr->vsid; + slb_ptr++; + + if (!e && !v) + continue; + + pr_err("%02d %016lx %016lx\n", i, e, v); + + if (!(e & SLB_ESID_V)) { + pr_err("\n"); + continue; + } + llp = v & SLB_VSID_LLP; + if (v & SLB_VSID_B_1T) { + pr_err(" 1T ESID=%9lx VSID=%13lx LLP:%3lx\n", + GET_ESID_1T(e), + (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp); + } else { + pr_err(" 256M ESID=%9lx VSID=%13lx LLP:%3lx\n", + GET_ESID(e), + (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp); + } + } + pr_err("----------------------------------\n"); + + /* Dump slb cache entires as well. */ + pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr); + pr_err("Valid SLB cache entries:\n"); + n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES); + for (i = 0; i < n; i++) + pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); + pr_err("Rest of SLB cache entries:\n"); + for (i = n; i < SLB_CACHE_ENTRIES; i++) + pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]); +} +void slb_vmalloc_update(void) +{ /* - * We can't take a PMU exception in the following code, so hard - * disable interrupts. + * vmalloc is not bolted, so just have to flush non-bolted. */ - hard_irq_disable(); + slb_flush_and_restore_bolted(); +} - __slb_flush_and_rebolt(); - get_paca()->slb_cache_ptr = 0; +static bool preload_hit(struct thread_info *ti, unsigned long esid) +{ + unsigned char i; + + for (i = 0; i < ti->slb_preload_nr; i++) { + unsigned char idx; + + idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR; + if (esid == ti->slb_preload_esid[idx]) + return true; + } + return false; } -void slb_vmalloc_update(void) +static bool preload_add(struct thread_info *ti, unsigned long ea) { - unsigned long vflags; + unsigned char idx; + unsigned long esid; + + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) { + /* EAs are stored >> 28 so 256MB segments don't need clearing */ + if (ea & ESID_MASK_1T) + ea &= ESID_MASK_1T; + } + + esid = ea >> SID_SHIFT; - vflags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmalloc_psize].sllp; - slb_shadow_update(VMALLOC_START, mmu_kernel_ssize, vflags, VMALLOC_INDEX); - slb_flush_and_rebolt(); + if (preload_hit(ti, esid)) + return false; + + idx = (ti->slb_preload_tail + ti->slb_preload_nr) % SLB_PRELOAD_NR; + ti->slb_preload_esid[idx] = esid; + if (ti->slb_preload_nr == SLB_PRELOAD_NR) + ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR; + else + ti->slb_preload_nr++; + + return true; } -/* Helper function to compare esids. There are four cases to handle. - * 1. The system is not 1T segment size capable. Use the GET_ESID compare. - * 2. The system is 1T capable, both addresses are < 1T, use the GET_ESID compare. - * 3. The system is 1T capable, only one of the two addresses is > 1T. This is not a match. - * 4. The system is 1T capable, both addresses are > 1T, use the GET_ESID_1T macro to compare. - */ -static inline int esids_match(unsigned long addr1, unsigned long addr2) +static void preload_age(struct thread_info *ti) { - int esid_1t_count; + if (!ti->slb_preload_nr) + return; + ti->slb_preload_nr--; + ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR; +} - /* System is not 1T segment size capable. */ - if (!mmu_has_feature(MMU_FTR_1T_SEGMENT)) - return (GET_ESID(addr1) == GET_ESID(addr2)); +void slb_setup_new_exec(void) +{ + struct thread_info *ti = current_thread_info(); + struct mm_struct *mm = current->mm; + unsigned long exec = 0x10000000; - esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) + - ((addr2 >> SID_SHIFT_1T) != 0)); + WARN_ON(irqs_disabled()); - /* both addresses are < 1T */ - if (esid_1t_count == 0) - return (GET_ESID(addr1) == GET_ESID(addr2)); + /* + * preload cache can only be used to determine whether a SLB + * entry exists if it does not start to overflow. + */ + if (ti->slb_preload_nr + 2 > SLB_PRELOAD_NR) + return; - /* One address < 1T, the other > 1T. Not a match */ - if (esid_1t_count == 1) - return 0; + hard_irq_disable(); - /* Both addresses are > 1T. */ - return (GET_ESID_1T(addr1) == GET_ESID_1T(addr2)); + /* + * We have no good place to clear the slb preload cache on exec, + * flush_thread is about the earliest arch hook but that happens + * after we switch to the mm and have aleady preloaded the SLBEs. + * + * For the most part that's probably okay to use entries from the + * previous exec, they will age out if unused. It may turn out to + * be an advantage to clear the cache before switching to it, + * however. + */ + + /* + * preload some userspace segments into the SLB. + * Almost all 32 and 64bit PowerPC executables are linked at + * 0x10000000 so it makes sense to preload this segment. + */ + if (!is_kernel_addr(exec)) { + if (preload_add(ti, exec)) + slb_allocate_user(mm, exec); + } + + /* Libraries and mmaps. */ + if (!is_kernel_addr(mm->mmap_base)) { + if (preload_add(ti, mm->mmap_base)) + slb_allocate_user(mm, mm->mmap_base); + } + + /* see switch_slb */ + asm volatile("isync" : : : "memory"); + + local_irq_enable(); } +void preload_new_slb_context(unsigned long start, unsigned long sp) +{ + struct thread_info *ti = current_thread_info(); + struct mm_struct *mm = current->mm; + unsigned long heap = mm->start_brk; + + WARN_ON(irqs_disabled()); + + /* see above */ + if (ti->slb_preload_nr + 3 > SLB_PRELOAD_NR) + return; + + hard_irq_disable(); + + /* Userspace entry address. */ + if (!is_kernel_addr(start)) { + if (preload_add(ti, start)) + slb_allocate_user(mm, start); + } + + /* Top of stack, grows down. */ + if (!is_kernel_addr(sp)) { + if (preload_add(ti, sp)) + slb_allocate_user(mm, sp); + } + + /* Bottom of heap, grows up. */ + if (heap && !is_kernel_addr(heap)) { + if (preload_add(ti, heap)) + slb_allocate_user(mm, heap); + } + + /* see switch_slb */ + asm volatile("isync" : : : "memory"); + + local_irq_enable(); +} + + /* Flush all user entries from the segment table of the current processor. */ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) { - unsigned long offset; - unsigned long slbie_data = 0; - unsigned long pc = KSTK_EIP(tsk); - unsigned long stack = KSTK_ESP(tsk); - unsigned long exec_base; + struct thread_info *ti = task_thread_info(tsk); + unsigned char i; /* * We need interrupts hard-disabled here, not just soft-disabled, @@ -238,91 +421,107 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) * which would update the slb_cache/slb_cache_ptr fields in the PACA. */ hard_irq_disable(); - offset = get_paca()->slb_cache_ptr; - if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) && - offset <= SLB_CACHE_ENTRIES) { - int i; - asm volatile("isync" : : : "memory"); - for (i = 0; i < offset; i++) { - slbie_data = (unsigned long)get_paca()->slb_cache[i] - << SID_SHIFT; /* EA */ - slbie_data |= user_segment_size(slbie_data) - << SLBIE_SSIZE_SHIFT; - slbie_data |= SLBIE_C; /* C set for user addresses */ - asm volatile("slbie %0" : : "r" (slbie_data)); - } - asm volatile("isync" : : : "memory"); + asm volatile("isync" : : : "memory"); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + /* + * SLBIA IH=3 invalidates all Class=1 SLBEs and their + * associated lookaside structures, which matches what + * switch_slb wants. So ARCH_300 does not use the slb + * cache. + */ + asm volatile(PPC_SLBIA(3)); } else { - __slb_flush_and_rebolt(); - } + unsigned long offset = get_paca()->slb_cache_ptr; + + if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) && + offset <= SLB_CACHE_ENTRIES) { + unsigned long slbie_data = 0; + + for (i = 0; i < offset; i++) { + unsigned long ea; + + ea = (unsigned long) + get_paca()->slb_cache[i] << SID_SHIFT; + /* + * Could assert_slb_exists here, but hypervisor + * or machine check could have come in and + * removed the entry at this point. + */ + + slbie_data = ea; + slbie_data |= user_segment_size(slbie_data) + << SLBIE_SSIZE_SHIFT; + slbie_data |= SLBIE_C; /* user slbs have C=1 */ + asm volatile("slbie %0" : : "r" (slbie_data)); + } + + /* Workaround POWER5 < DD2.1 issue */ + if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1) + asm volatile("slbie %0" : : "r" (slbie_data)); + + } else { + struct slb_shadow *p = get_slb_shadow(); + unsigned long ksp_esid_data = + be64_to_cpu(p->save_area[KSTACK_INDEX].esid); + unsigned long ksp_vsid_data = + be64_to_cpu(p->save_area[KSTACK_INDEX].vsid); + + asm volatile(PPC_SLBIA(1) "\n" + "slbmte %0,%1\n" + "isync" + :: "r"(ksp_vsid_data), + "r"(ksp_esid_data)); + + get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; + } - /* Workaround POWER5 < DD2.1 issue */ - if (offset == 1 || offset > SLB_CACHE_ENTRIES) - asm volatile("slbie %0" : : "r" (slbie_data)); + get_paca()->slb_cache_ptr = 0; + } + get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap; - get_paca()->slb_cache_ptr = 0; copy_mm_to_paca(mm); /* - * preload some userspace segments into the SLB. - * Almost all 32 and 64bit PowerPC executables are linked at - * 0x10000000 so it makes sense to preload this segment. + * We gradually age out SLBs after a number of context switches to + * reduce reload overhead of unused entries (like we do with FP/VEC + * reload). Each time we wrap 256 switches, take an entry out of the + * SLB preload cache. */ - exec_base = 0x10000000; - - if (is_kernel_addr(pc) || is_kernel_addr(stack) || - is_kernel_addr(exec_base)) - return; + tsk->thread.load_slb++; + if (!tsk->thread.load_slb) { + unsigned long pc = KSTK_EIP(tsk); - slb_allocate(pc); + preload_age(ti); + preload_add(ti, pc); + } - if (!esids_match(pc, stack)) - slb_allocate(stack); + for (i = 0; i < ti->slb_preload_nr; i++) { + unsigned char idx; + unsigned long ea; - if (!esids_match(pc, exec_base) && - !esids_match(stack, exec_base)) - slb_allocate(exec_base); -} + idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR; + ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT; -static inline void patch_slb_encoding(unsigned int *insn_addr, - unsigned int immed) -{ + slb_allocate_user(mm, ea); + } /* - * This function patches either an li or a cmpldi instruction with - * a new immediate value. This relies on the fact that both li - * (which is actually addi) and cmpldi both take a 16-bit immediate - * value, and it is situated in the same location in the instruction, - * ie. bits 16-31 (Big endian bit order) or the lower 16 bits. - * The signedness of the immediate operand differs between the two - * instructions however this code is only ever patching a small value, - * much less than 1 << 15, so we can get away with it. - * To patch the value we read the existing instruction, clear the - * immediate value, and or in our new value, then write the instruction - * back. + * Synchronize slbmte preloads with possible subsequent user memory + * address accesses by the kernel (user mode won't happen until + * rfid, which is safe). */ - unsigned int insn = (*insn_addr & 0xffff0000) | immed; - patch_instruction(insn_addr, insn); + asm volatile("isync" : : : "memory"); } -extern u32 slb_miss_kernel_load_linear[]; -extern u32 slb_miss_kernel_load_io[]; -extern u32 slb_compare_rr_to_size[]; -extern u32 slb_miss_kernel_load_vmemmap[]; - void slb_set_size(u16 size) { - if (mmu_slb_size == size) - return; - mmu_slb_size = size; - patch_slb_encoding(slb_compare_rr_to_size, mmu_slb_size); } void slb_initialize(void) { unsigned long linear_llp, vmalloc_llp, io_llp; - unsigned long lflags, vflags; + unsigned long lflags; static int slb_encoding_inited; #ifdef CONFIG_SPARSEMEM_VMEMMAP unsigned long vmemmap_llp; @@ -338,34 +537,24 @@ void slb_initialize(void) #endif if (!slb_encoding_inited) { slb_encoding_inited = 1; - patch_slb_encoding(slb_miss_kernel_load_linear, - SLB_VSID_KERNEL | linear_llp); - patch_slb_encoding(slb_miss_kernel_load_io, - SLB_VSID_KERNEL | io_llp); - patch_slb_encoding(slb_compare_rr_to_size, - mmu_slb_size); - pr_devel("SLB: linear LLP = %04lx\n", linear_llp); pr_devel("SLB: io LLP = %04lx\n", io_llp); - #ifdef CONFIG_SPARSEMEM_VMEMMAP - patch_slb_encoding(slb_miss_kernel_load_vmemmap, - SLB_VSID_KERNEL | vmemmap_llp); pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp); #endif } - get_paca()->stab_rr = SLB_NUM_BOLTED; + get_paca()->stab_rr = SLB_NUM_BOLTED - 1; + get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; + get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap; lflags = SLB_VSID_KERNEL | linear_llp; - vflags = SLB_VSID_KERNEL | vmalloc_llp; /* Invalidate the entire SLB (even entry 0) & all the ERATS */ asm volatile("isync":::"memory"); asm volatile("slbmte %0,%0"::"r" (0) : "memory"); asm volatile("isync; slbia; isync":::"memory"); create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX); - create_shadowed_slbe(VMALLOC_START, mmu_kernel_ssize, vflags, VMALLOC_INDEX); /* For the boot cpu, we're running on the stack in init_thread_union, * which is in the first segment of the linear mapping, and also @@ -381,122 +570,259 @@ void slb_initialize(void) asm volatile("isync":::"memory"); } -static void insert_slb_entry(unsigned long vsid, unsigned long ea, - int bpsize, int ssize) +static void slb_cache_update(unsigned long esid_data) { - unsigned long flags, vsid_data, esid_data; - enum slb_index index; int slb_cache_index; - /* - * We are irq disabled, hence should be safe to access PACA. - */ - VM_WARN_ON(!irqs_disabled()); - - /* - * We can't take a PMU exception in the following code, so hard - * disable interrupts. - */ - hard_irq_disable(); - - index = get_paca()->stab_rr; - - /* - * simple round-robin replacement of slb starting at SLB_NUM_BOLTED. - */ - if (index < (mmu_slb_size - 1)) - index++; - else - index = SLB_NUM_BOLTED; - - get_paca()->stab_rr = index; - - flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp; - vsid_data = (vsid << slb_vsid_shift(ssize)) | flags | - ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); - esid_data = mk_esid_data(ea, ssize, index); - - /* - * No need for an isync before or after this slbmte. The exception - * we enter with and the rfid we exit with are context synchronizing. - * Also we only handle user segments here. - */ - asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data) - : "memory"); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return; /* ISAv3.0B and later does not use slb_cache */ /* * Now update slb cache entries */ - slb_cache_index = get_paca()->slb_cache_ptr; + slb_cache_index = local_paca->slb_cache_ptr; if (slb_cache_index < SLB_CACHE_ENTRIES) { /* * We have space in slb cache for optimized switch_slb(). * Top 36 bits from esid_data as per ISA */ - get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28; - get_paca()->slb_cache_ptr++; + local_paca->slb_cache[slb_cache_index++] = esid_data >> 28; + local_paca->slb_cache_ptr++; } else { /* * Our cache is full and the current cache content strictly * doesn't indicate the active SLB conents. Bump the ptr * so that switch_slb() will ignore the cache. */ - get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1; + local_paca->slb_cache_ptr = SLB_CACHE_ENTRIES + 1; } } -static void handle_multi_context_slb_miss(int context_id, unsigned long ea) +static enum slb_index alloc_slb_index(bool kernel) { - struct mm_struct *mm = current->mm; - unsigned long vsid; - int bpsize; + enum slb_index index; /* - * We are always above 1TB, hence use high user segment size. + * The allocation bitmaps can become out of synch with the SLB + * when the _switch code does slbie when bolting a new stack + * segment and it must not be anywhere else in the SLB. This leaves + * a kernel allocated entry that is unused in the SLB. With very + * large systems or small segment sizes, the bitmaps could slowly + * fill with these entries. They will eventually be cleared out + * by the round robin allocator in that case, so it's probably not + * worth accounting for. */ - vsid = get_vsid(context_id, ea, mmu_highuser_ssize); - bpsize = get_slice_psize(mm, ea); - insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize); + + /* + * SLBs beyond 32 entries are allocated with stab_rr only + * POWER7/8/9 have 32 SLB entries, this could be expanded if a + * future CPU has more. + */ + if (local_paca->slb_used_bitmap != U32_MAX) { + index = ffz(local_paca->slb_used_bitmap); + local_paca->slb_used_bitmap |= 1U << index; + if (kernel) + local_paca->slb_kern_bitmap |= 1U << index; + } else { + /* round-robin replacement of slb starting at SLB_NUM_BOLTED. */ + index = local_paca->stab_rr; + if (index < (mmu_slb_size - 1)) + index++; + else + index = SLB_NUM_BOLTED; + local_paca->stab_rr = index; + if (index < 32) { + if (kernel) + local_paca->slb_kern_bitmap |= 1U << index; + else + local_paca->slb_kern_bitmap &= ~(1U << index); + } + } + BUG_ON(index < SLB_NUM_BOLTED); + + return index; } -void slb_miss_large_addr(struct pt_regs *regs) +static long slb_insert_entry(unsigned long ea, unsigned long context, + unsigned long flags, int ssize, bool kernel) { - enum ctx_state prev_state = exception_enter(); - unsigned long ea = regs->dar; - int context; + unsigned long vsid; + unsigned long vsid_data, esid_data; + enum slb_index index; - if (REGION_ID(ea) != USER_REGION_ID) - goto slb_bad_addr; + vsid = get_vsid(context, ea, ssize); + if (!vsid) + return -EFAULT; /* - * Are we beyound what the page table layout supports ? + * There must not be a kernel SLB fault in alloc_slb_index or before + * slbmte here or the allocation bitmaps could get out of whack with + * the SLB. + * + * User SLB faults or preloads take this path which might get inlined + * into the caller, so add compiler barriers here to ensure unsafe + * memory accesses do not come between. */ - if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE) - goto slb_bad_addr; + barrier(); - /* Lower address should have been handled by asm code */ - if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT)) - goto slb_bad_addr; + index = alloc_slb_index(kernel); + + vsid_data = __mk_vsid_data(vsid, ssize, flags); + esid_data = mk_esid_data(ea, ssize, index); + + /* + * No need for an isync before or after this slbmte. The exception + * we enter with and the rfid we exit with are context synchronizing. + * User preloads should add isync afterwards in case the kernel + * accesses user memory before it returns to userspace with rfid. + */ + assert_slb_notexists(ea); + asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)); + + barrier(); + + if (!kernel) + slb_cache_update(esid_data); + + return 0; +} + +static long slb_allocate_kernel(unsigned long ea, unsigned long id) +{ + unsigned long context; + unsigned long flags; + int ssize; + + if (id == KERNEL_REGION_ID) { + + /* We only support upto MAX_PHYSMEM_BITS */ + if ((ea & ~REGION_MASK) > (1UL << MAX_PHYSMEM_BITS)) + return -EFAULT; + + flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp; + +#ifdef CONFIG_SPARSEMEM_VMEMMAP + } else if (id == VMEMMAP_REGION_ID) { + + if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT)) + return -EFAULT; + + flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp; +#endif + } else if (id == VMALLOC_REGION_ID) { + + if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT)) + return -EFAULT; + + if (ea < H_VMALLOC_END) + flags = get_paca()->vmalloc_sllp; + else + flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp; + } else { + return -EFAULT; + } + + ssize = MMU_SEGSIZE_1T; + if (!mmu_has_feature(MMU_FTR_1T_SEGMENT)) + ssize = MMU_SEGSIZE_256M; + + context = get_kernel_context(ea); + return slb_insert_entry(ea, context, flags, ssize, true); +} + +static long slb_allocate_user(struct mm_struct *mm, unsigned long ea) +{ + unsigned long context; + unsigned long flags; + int bpsize; + int ssize; /* * consider this as bad access if we take a SLB miss * on an address above addr limit. */ - if (ea >= current->mm->context.slb_addr_limit) - goto slb_bad_addr; + if (ea >= mm->context.slb_addr_limit) + return -EFAULT; - context = get_ea_context(¤t->mm->context, ea); + context = get_user_context(&mm->context, ea); if (!context) - goto slb_bad_addr; + return -EFAULT; + + if (unlikely(ea >= H_PGTABLE_RANGE)) { + WARN_ON(1); + return -EFAULT; + } - handle_multi_context_slb_miss(context, ea); - exception_exit(prev_state); - return; + ssize = user_segment_size(ea); -slb_bad_addr: - if (user_mode(regs)) - _exception(SIGSEGV, regs, SEGV_BNDERR, ea); - else - bad_page_fault(regs, ea, SIGSEGV); - exception_exit(prev_state); + bpsize = get_slice_psize(mm, ea); + flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp; + + return slb_insert_entry(ea, context, flags, ssize, false); +} + +long do_slb_fault(struct pt_regs *regs, unsigned long ea) +{ + unsigned long id = REGION_ID(ea); + + /* IRQs are not reconciled here, so can't check irqs_disabled */ + VM_WARN_ON(mfmsr() & MSR_EE); + + if (unlikely(!(regs->msr & MSR_RI))) + return -EINVAL; + + /* + * SLB kernel faults must be very careful not to touch anything + * that is not bolted. E.g., PACA and global variables are okay, + * mm->context stuff is not. + * + * SLB user faults can access all of kernel memory, but must be + * careful not to touch things like IRQ state because it is not + * "reconciled" here. The difficulty is that we must use + * fast_exception_return to return from kernel SLB faults without + * looking at possible non-bolted memory. We could test user vs + * kernel faults in the interrupt handler asm and do a full fault, + * reconcile, ret_from_except for user faults which would make them + * first class kernel code. But for performance it's probably nicer + * if they go via fast_exception_return too. + */ + if (id >= KERNEL_REGION_ID) { + long err; +#ifdef CONFIG_DEBUG_VM + /* Catch recursive kernel SLB faults. */ + BUG_ON(local_paca->in_kernel_slb_handler); + local_paca->in_kernel_slb_handler = 1; +#endif + err = slb_allocate_kernel(ea, id); +#ifdef CONFIG_DEBUG_VM + local_paca->in_kernel_slb_handler = 0; +#endif + return err; + } else { + struct mm_struct *mm = current->mm; + long err; + + if (unlikely(!mm)) + return -EFAULT; + + err = slb_allocate_user(mm, ea); + if (!err) + preload_add(current_thread_info(), ea); + + return err; + } +} + +void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err) +{ + if (err == -EFAULT) { + if (user_mode(regs)) + _exception(SIGSEGV, regs, SEGV_BNDERR, ea); + else + bad_page_fault(regs, ea, SIGSEGV); + } else if (err == -EINVAL) { + unrecoverable_exception(regs); + } else { + BUG(); + } } diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S deleted file mode 100644 index 4ac5057ad439..000000000000 --- a/arch/powerpc/mm/slb_low.S +++ /dev/null @@ -1,335 +0,0 @@ -/* - * Low-level SLB routines - * - * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM - * - * Based on earlier C version: - * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com - * Copyright (c) 2001 Dave Engebretsen - * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/processor.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/cputable.h> -#include <asm/page.h> -#include <asm/mmu.h> -#include <asm/pgtable.h> -#include <asm/firmware.h> -#include <asm/feature-fixups.h> - -/* - * This macro generates asm code to compute the VSID scramble - * function. Used in slb_allocate() and do_stab_bolted. The function - * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS - * - * rt = register containing the proto-VSID and into which the - * VSID will be stored - * rx = scratch register (clobbered) - * rf = flags - * - * - rt and rx must be different registers - * - The answer will end up in the low VSID_BITS bits of rt. The higher - * bits may contain other garbage, so you may need to mask the - * result. - */ -#define ASM_VSID_SCRAMBLE(rt, rx, rf, size) \ - lis rx,VSID_MULTIPLIER_##size@h; \ - ori rx,rx,VSID_MULTIPLIER_##size@l; \ - mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \ -/* \ - * powermac get slb fault before feature fixup, so make 65 bit part \ - * the default part of feature fixup \ - */ \ -BEGIN_MMU_FTR_SECTION \ - srdi rx,rt,VSID_BITS_65_##size; \ - clrldi rt,rt,(64-VSID_BITS_65_##size); \ - add rt,rt,rx; \ - addi rx,rt,1; \ - srdi rx,rx,VSID_BITS_65_##size; \ - add rt,rt,rx; \ - rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_65_##size)); \ -MMU_FTR_SECTION_ELSE \ - srdi rx,rt,VSID_BITS_##size; \ - clrldi rt,rt,(64-VSID_BITS_##size); \ - add rt,rt,rx; /* add high and low bits */ \ - addi rx,rt,1; \ - srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \ - add rt,rt,rx; \ - rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_##size)); \ -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA) - - -/* void slb_allocate(unsigned long ea); - * - * Create an SLB entry for the given EA (user or kernel). - * r3 = faulting address, r13 = PACA - * r9, r10, r11 are clobbered by this function - * r3 is preserved. - * No other registers are examined or changed. - */ -_GLOBAL(slb_allocate) - /* - * Check if the address falls within the range of the first context, or - * if we may need to handle multi context. For the first context we - * allocate the slb entry via the fast path below. For large address we - * branch out to C-code and see if additional contexts have been - * allocated. - * The test here is: - * (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT) - */ - rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4) - bne- 8f - - srdi r9,r3,60 /* get region */ - srdi r10,r3,SID_SHIFT /* get esid */ - cmpldi cr7,r9,0xc /* cmp PAGE_OFFSET for later use */ - - /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */ - blt cr7,0f /* user or kernel? */ - - /* Check if hitting the linear mapping or some other kernel space - */ - bne cr7,1f - - /* Linear mapping encoding bits, the "li" instruction below will - * be patched by the kernel at boot - */ -.globl slb_miss_kernel_load_linear -slb_miss_kernel_load_linear: - li r11,0 - /* - * context = (ea >> 60) - (0xc - 1) - * r9 = region id. - */ - subi r9,r9,KERNEL_REGION_CONTEXT_OFFSET - -BEGIN_FTR_SECTION - b .Lslb_finish_load -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) - b .Lslb_finish_load_1T - -1: -#ifdef CONFIG_SPARSEMEM_VMEMMAP - cmpldi cr0,r9,0xf - bne 1f -/* Check virtual memmap region. To be patched at kernel boot */ -.globl slb_miss_kernel_load_vmemmap -slb_miss_kernel_load_vmemmap: - li r11,0 - b 6f -1: -#endif /* CONFIG_SPARSEMEM_VMEMMAP */ - - /* - * r10 contains the ESID, which is the original faulting EA shifted - * right by 28 bits. We need to compare that with (H_VMALLOC_END >> 28) - * which is 0xd00038000. That can't be used as an immediate, even if we - * ignored the 0xd, so we have to load it into a register, and we only - * have one register free. So we must load all of (H_VMALLOC_END >> 28) - * into a register and compare ESID against that. - */ - lis r11,(H_VMALLOC_END >> 32)@h // r11 = 0xffffffffd0000000 - ori r11,r11,(H_VMALLOC_END >> 32)@l // r11 = 0xffffffffd0003800 - // Rotate left 4, then mask with 0xffffffff0 - rldic r11,r11,4,28 // r11 = 0xd00038000 - cmpld r10,r11 // if r10 >= r11 - bge 5f // goto io_mapping - - /* - * vmalloc mapping gets the encoding from the PACA as the mapping - * can be demoted from 64K -> 4K dynamically on some machines. - */ - lhz r11,PACAVMALLOCSLLP(r13) - b 6f -5: - /* IO mapping */ -.globl slb_miss_kernel_load_io -slb_miss_kernel_load_io: - li r11,0 -6: - /* - * context = (ea >> 60) - (0xc - 1) - * r9 = region id. - */ - subi r9,r9,KERNEL_REGION_CONTEXT_OFFSET - -BEGIN_FTR_SECTION - b .Lslb_finish_load -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) - b .Lslb_finish_load_1T - -0: /* - * For userspace addresses, make sure this is region 0. - */ - cmpdi r9, 0 - bne- 8f - /* - * user space make sure we are within the allowed limit - */ - ld r11,PACA_SLB_ADDR_LIMIT(r13) - cmpld r3,r11 - bge- 8f - - /* when using slices, we extract the psize off the slice bitmaps - * and then we need to get the sllp encoding off the mmu_psize_defs - * array. - * - * XXX This is a bit inefficient especially for the normal case, - * so we should try to implement a fast path for the standard page - * size using the old sllp value so we avoid the array. We cannot - * really do dynamic patching unfortunately as processes might flip - * between 4k and 64k standard page size - */ -#ifdef CONFIG_PPC_MM_SLICES - /* r10 have esid */ - cmpldi r10,16 - /* below SLICE_LOW_TOP */ - blt 5f - /* - * Handle hpsizes, - * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index - */ - srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */ - addi r9,r11,PACAHIGHSLICEPSIZE - lbzx r9,r13,r9 /* r9 is hpsizes[r11] */ - /* r11 = (r10 >> (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)) & 0x1 */ - rldicl r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63 - b 6f - -5: - /* - * Handle lpsizes - * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index - */ - srdi r11,r10,1 /* index */ - addi r9,r11,PACALOWSLICESPSIZE - lbzx r9,r13,r9 /* r9 is lpsizes[r11] */ - rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */ -6: - sldi r11,r11,2 /* index * 4 */ - /* Extract the psize and multiply to get an array offset */ - srd r9,r9,r11 - andi. r9,r9,0xf - mulli r9,r9,MMUPSIZEDEFSIZE - - /* Now get to the array and obtain the sllp - */ - ld r11,PACATOC(r13) - ld r11,mmu_psize_defs@got(r11) - add r11,r11,r9 - ld r11,MMUPSIZESLLP(r11) - ori r11,r11,SLB_VSID_USER -#else - /* paca context sllp already contains the SLB_VSID_USER bits */ - lhz r11,PACACONTEXTSLLP(r13) -#endif /* CONFIG_PPC_MM_SLICES */ - - ld r9,PACACONTEXTID(r13) -BEGIN_FTR_SECTION - cmpldi r10,0x1000 - bge .Lslb_finish_load_1T -END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) - b .Lslb_finish_load - -8: /* invalid EA - return an error indication */ - crset 4*cr0+eq /* indicate failure */ - blr - -/* - * Finish loading of an SLB entry and return - * - * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET - */ -.Lslb_finish_load: - rldimi r10,r9,ESID_BITS,0 - ASM_VSID_SCRAMBLE(r10,r9,r11,256M) - /* r3 = EA, r11 = VSID data */ - /* - * Find a slot, round robin. Previously we tried to find a - * free slot first but that took too long. Unfortunately we - * dont have any LRU information to help us choose a slot. - */ - - mr r9,r3 - - /* slb_finish_load_1T continues here. r9=EA with non-ESID bits clear */ -7: ld r10,PACASTABRR(r13) - addi r10,r10,1 - /* This gets soft patched on boot. */ -.globl slb_compare_rr_to_size -slb_compare_rr_to_size: - cmpldi r10,0 - - blt+ 4f - li r10,SLB_NUM_BOLTED - -4: - std r10,PACASTABRR(r13) - -3: - rldimi r9,r10,0,36 /* r9 = EA[0:35] | entry */ - oris r10,r9,SLB_ESID_V@h /* r10 = r9 | SLB_ESID_V */ - - /* r9 = ESID data, r11 = VSID data */ - - /* - * No need for an isync before or after this slbmte. The exception - * we enter with and the rfid we exit with are context synchronizing. - */ - slbmte r11,r10 - - /* we're done for kernel addresses */ - crclr 4*cr0+eq /* set result to "success" */ - bgelr cr7 - - /* Update the slb cache */ - lhz r9,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ - cmpldi r9,SLB_CACHE_ENTRIES - bge 1f - - /* still room in the slb cache */ - sldi r11,r9,2 /* r11 = offset * sizeof(u32) */ - srdi r10,r10,28 /* get the 36 bits of the ESID */ - add r11,r11,r13 /* r11 = (u32 *)paca + offset */ - stw r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */ - addi r9,r9,1 /* offset++ */ - b 2f -1: /* offset >= SLB_CACHE_ENTRIES */ - li r9,SLB_CACHE_ENTRIES+1 -2: - sth r9,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ - crclr 4*cr0+eq /* set result to "success" */ - blr - -/* - * Finish loading of a 1T SLB entry (for the kernel linear mapping) and return. - * - * r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9 - */ -.Lslb_finish_load_1T: - srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */ - rldimi r10,r9,ESID_BITS_1T,0 - ASM_VSID_SCRAMBLE(r10,r9,r11,1T) - - li r10,MMU_SEGSIZE_1T - rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */ - - /* r3 = EA, r11 = VSID data */ - clrrdi r9,r3,SID_SHIFT_1T /* clear out non-ESID bits */ - b 7b - - -_ASM_NOKPROBE_SYMBOL(slb_allocate) -_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear) -_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io) -_ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size) -#ifdef CONFIG_SPARSEMEM_VMEMMAP -_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_vmemmap) -#endif diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 205fe557ca10..06898c13901d 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -31,6 +31,7 @@ #include <linux/spinlock.h> #include <linux/export.h> #include <linux/hugetlb.h> +#include <linux/sched/mm.h> #include <asm/mman.h> #include <asm/mmu.h> #include <asm/copro.h> @@ -61,6 +62,13 @@ static void slice_print_mask(const char *label, const struct slice_mask *mask) { #endif +static inline bool slice_addr_is_low(unsigned long addr) +{ + u64 tmp = (u64)addr; + + return tmp < SLICE_LOW_TOP; +} + static void slice_range_to_mask(unsigned long start, unsigned long len, struct slice_mask *ret) { @@ -70,7 +78,7 @@ static void slice_range_to_mask(unsigned long start, unsigned long len, if (SLICE_NUM_HIGH) bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); - if (start < SLICE_LOW_TOP) { + if (slice_addr_is_low(start)) { unsigned long mend = min(end, (unsigned long)(SLICE_LOW_TOP - 1)); @@ -78,7 +86,7 @@ static void slice_range_to_mask(unsigned long start, unsigned long len, - (1u << GET_LOW_SLICE_INDEX(start)); } - if ((start + len) > SLICE_LOW_TOP) { + if (SLICE_NUM_HIGH && !slice_addr_is_low(end)) { unsigned long start_index = GET_HIGH_SLICE_INDEX(start); unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT)); unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index; @@ -133,7 +141,7 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret, if (!slice_low_has_vma(mm, i)) ret->low_slices |= 1u << i; - if (high_limit <= SLICE_LOW_TOP) + if (slice_addr_is_low(high_limit - 1)) return; for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) @@ -182,7 +190,7 @@ static bool slice_check_range_fits(struct mm_struct *mm, unsigned long end = start + len - 1; u64 low_slices = 0; - if (start < SLICE_LOW_TOP) { + if (slice_addr_is_low(start)) { unsigned long mend = min(end, (unsigned long)(SLICE_LOW_TOP - 1)); @@ -192,7 +200,7 @@ static bool slice_check_range_fits(struct mm_struct *mm, if ((low_slices & available->low_slices) != low_slices) return false; - if (SLICE_NUM_HIGH && ((start + len) > SLICE_LOW_TOP)) { + if (SLICE_NUM_HIGH && !slice_addr_is_low(end)) { unsigned long start_index = GET_HIGH_SLICE_INDEX(start); unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT)); unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index; @@ -219,7 +227,7 @@ static void slice_flush_segments(void *parm) copy_mm_to_paca(current->active_mm); local_irq_save(flags); - slb_flush_and_rebolt(); + slb_flush_and_restore_bolted(); local_irq_restore(flags); #endif } @@ -303,7 +311,7 @@ static bool slice_scan_available(unsigned long addr, int end, unsigned long *boundary_addr) { unsigned long slice; - if (addr < SLICE_LOW_TOP) { + if (slice_addr_is_low(addr)) { slice = GET_LOW_SLICE_INDEX(addr); *boundary_addr = (slice + end) << SLICE_LOW_SHIFT; return !!(available->low_slices & (1u << slice)); @@ -706,7 +714,7 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) VM_BUG_ON(radix_enabled()); - if (addr < SLICE_LOW_TOP) { + if (slice_addr_is_low(addr)) { psizes = mm->context.low_slices_psize; index = GET_LOW_SLICE_INDEX(addr); } else { @@ -757,6 +765,20 @@ void slice_init_new_context_exec(struct mm_struct *mm) bitmap_fill(mask->high_slices, SLICE_NUM_HIGH); } +#ifdef CONFIG_PPC_BOOK3S_64 +void slice_setup_new_exec(void) +{ + struct mm_struct *mm = current->mm; + + slice_dbg("slice_setup_new_exec(mm=%p)\n", mm); + + if (!is_32bit_task()) + return; + + mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW; +} +#endif + void slice_set_range_psize(struct mm_struct *mm, unsigned long start, unsigned long len, unsigned int psize) { diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 4c4dfc473800..6a23b9ebd2a1 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -366,6 +366,7 @@ static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric) __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB); asm volatile("ptesync": : :"memory"); + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); } @@ -1016,7 +1017,6 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) goto local; } _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); - goto local; } else { local: _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 15fe5f0c8665..ae5d568e267f 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -503,6 +503,9 @@ static void setup_page_sizes(void) for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { struct mmu_psize_def *def = &mmu_psize_defs[psize]; + if (!def->shift) + continue; + if (tlb1ps & (1U << (def->shift - 10))) { def->flags |= MMU_PAGE_SIZE_DIRECT; diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile index 7a7834c39f64..8d26d7416481 100644 --- a/arch/powerpc/oprofile/Makefile +++ b/arch/powerpc/oprofile/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 82986d2acd9b..ab26df5bacb9 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 1fafc32b12a0..6954636b16d1 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1392,7 +1392,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id if (ret) goto err_free_cpuhp_mem; - pr_info("%s performance monitor hardware support registered\n", + pr_debug("%s performance monitor hardware support registered\n", pmu_ptr->pmu.name); return 0; diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 7963658dbc22..6dbae9884ec4 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -238,6 +238,7 @@ static int power7_marked_instr_event(u64 event) case 6: if (psel == 0x64) return pmc >= 3; + break; case 8: return unit == 0xd; } diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig index 60254a321a91..2a9d66254ffc 100644 --- a/arch/powerpc/platforms/40x/Kconfig +++ b/arch/powerpc/platforms/40x/Kconfig @@ -2,7 +2,6 @@ config ACADIA bool "Acadia" depends on 40x - default n select PPC40x_SIMPLE select 405EZ help @@ -11,7 +10,6 @@ config ACADIA config EP405 bool "EP405/EP405PC" depends on 40x - default n select 405GP select PCI help @@ -20,7 +18,6 @@ config EP405 config HOTFOOT bool "Hotfoot" depends on 40x - default n select PPC40x_SIMPLE select PCI help @@ -29,7 +26,6 @@ config HOTFOOT config KILAUEA bool "Kilauea" depends on 40x - default n select 405EX select PPC40x_SIMPLE select PPC4xx_PCI_EXPRESS @@ -41,7 +37,6 @@ config KILAUEA config MAKALU bool "Makalu" depends on 40x - default n select 405EX select PCI select PPC4xx_PCI_EXPRESS @@ -62,7 +57,6 @@ config WALNUT config XILINX_VIRTEX_GENERIC_BOARD bool "Generic Xilinx Virtex board" depends on 40x - default n select XILINX_VIRTEX_II_PRO select XILINX_VIRTEX_4_FX select XILINX_INTC @@ -80,7 +74,6 @@ config XILINX_VIRTEX_GENERIC_BOARD config OBS600 bool "OpenBlockS 600" depends on 40x - default n select 405EX select PPC40x_SIMPLE help @@ -90,7 +83,6 @@ config OBS600 config PPC40x_SIMPLE bool "Simple PowerPC 40x board support" depends on 40x - default n help This option enables the simple PowerPC 40x platform support. @@ -156,7 +148,6 @@ config IBM405_ERR51 config APM8018X bool "APM8018X" depends on 40x - default n select PPC40x_SIMPLE help This option enables support for the AppliedMicro APM8018X evaluation diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index a6011422b861..f024efd5a4c2 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -2,7 +2,6 @@ config PPC_47x bool "Support for 47x variant" depends on 44x - default n select MPIC help This option enables support for the 47x family of processors and is @@ -11,7 +10,6 @@ config PPC_47x config BAMBOO bool "Bamboo" depends on 44x - default n select PPC44x_SIMPLE select 440EP select PCI @@ -21,7 +19,6 @@ config BAMBOO config BLUESTONE bool "Bluestone" depends on 44x - default n select PPC44x_SIMPLE select APM821xx select PCI_MSI @@ -44,7 +41,6 @@ config EBONY config SAM440EP bool "Sam440ep" depends on 44x - default n select 440EP select PCI help @@ -53,7 +49,6 @@ config SAM440EP config SEQUOIA bool "Sequoia" depends on 44x - default n select PPC44x_SIMPLE select 440EPX help @@ -62,7 +57,6 @@ config SEQUOIA config TAISHAN bool "Taishan" depends on 44x - default n select PPC44x_SIMPLE select 440GX select PCI @@ -73,7 +67,6 @@ config TAISHAN config KATMAI bool "Katmai" depends on 44x - default n select PPC44x_SIMPLE select 440SPe select PCI @@ -86,7 +79,6 @@ config KATMAI config RAINIER bool "Rainier" depends on 44x - default n select PPC44x_SIMPLE select 440GRX select PCI @@ -96,7 +88,6 @@ config RAINIER config WARP bool "PIKA Warp" depends on 44x - default n select 440EP help This option enables support for the PIKA Warp(tm) Appliance. The Warp @@ -109,7 +100,6 @@ config WARP config ARCHES bool "Arches" depends on 44x - default n select PPC44x_SIMPLE select 460EX # Odd since it uses 460GT but the effects are the same select PCI @@ -120,7 +110,6 @@ config ARCHES config CANYONLANDS bool "Canyonlands" depends on 44x - default n select 460EX select PCI select PPC4xx_PCI_EXPRESS @@ -134,7 +123,6 @@ config CANYONLANDS config GLACIER bool "Glacier" depends on 44x - default n select PPC44x_SIMPLE select 460EX # Odd since it uses 460GT but the effects are the same select PCI @@ -147,7 +135,6 @@ config GLACIER config REDWOOD bool "Redwood" depends on 44x - default n select PPC44x_SIMPLE select 460SX select PCI @@ -160,7 +147,6 @@ config REDWOOD config EIGER bool "Eiger" depends on 44x - default n select PPC44x_SIMPLE select 460SX select PCI @@ -172,7 +158,6 @@ config EIGER config YOSEMITE bool "Yosemite" depends on 44x - default n select PPC44x_SIMPLE select 440EP select PCI @@ -182,7 +167,6 @@ config YOSEMITE config ISS4xx bool "ISS 4xx Simulator" depends on (44x || 40x) - default n select 405GP if 40x select 440GP if 44x && !PPC_47x select PPC_FPU @@ -193,7 +177,6 @@ config ISS4xx config CURRITUCK bool "IBM Currituck (476fpe) Support" depends on PPC_47x - default n select SWIOTLB select 476FPE select PPC4xx_PCI_EXPRESS @@ -203,7 +186,6 @@ config CURRITUCK config FSP2 bool "IBM FSP2 (476fpe) Support" depends on PPC_47x - default n select 476FPE select IBM_EMAC_EMAC4 if IBM_EMAC select IBM_EMAC_RGMII if IBM_EMAC @@ -215,7 +197,6 @@ config FSP2 config AKEBONO bool "IBM Akebono (476gtr) Support" depends on PPC_47x - default n select SWIOTLB select 476FPE select PPC4xx_PCI_EXPRESS @@ -241,7 +222,6 @@ config AKEBONO config ICON bool "Icon" depends on 44x - default n select PPC44x_SIMPLE select 440SPe select PCI @@ -252,7 +232,6 @@ config ICON config XILINX_VIRTEX440_GENERIC_BOARD bool "Generic Xilinx Virtex 5 FXT board support" depends on 44x - default n select XILINX_VIRTEX_5_FXT select XILINX_INTC help @@ -280,7 +259,6 @@ config XILINX_ML510 config PPC44x_SIMPLE bool "Simple PowerPC 44x board support" depends on 44x - default n help This option enables the simple PowerPC 44x platform support. diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c index 04f0c73a9b4f..7a507f775308 100644 --- a/arch/powerpc/platforms/44x/fsp2.c +++ b/arch/powerpc/platforms/44x/fsp2.c @@ -210,15 +210,15 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler) for_each_compatible_node(np, NULL, compat) { irq = irq_of_parse_and_map(np, 0); if (irq == NO_IRQ) { - pr_err("device tree node %s is missing a interrupt", - np->name); + pr_err("device tree node %pOFn is missing a interrupt", + np); return; } rc = request_irq(irq, errirq_handler, 0, np->name, np); if (rc) { - pr_err("fsp_of_probe: request_irq failed: np=%s rc=%d", - np->full_name, rc); + pr_err("fsp_of_probe: request_irq failed: np=%pOF rc=%d", + np, rc); return; } } diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c index 69d9f60d9fe5..f5bbd4563342 100644 --- a/arch/powerpc/platforms/4xx/ocm.c +++ b/arch/powerpc/platforms/4xx/ocm.c @@ -113,7 +113,6 @@ static void __init ocm_init_node(int count, struct device_node *node) int len; struct resource rsrc; - int ioflags; ocm = ocm_get_node(count); @@ -179,9 +178,8 @@ static void __init ocm_init_node(int count, struct device_node *node) /* ioremap the non-cached region */ if (ocm->nc.memtotal) { - ioflags = _PAGE_NO_CACHE | _PAGE_GUARDED | _PAGE_EXEC; ocm->nc.virt = __ioremap(ocm->nc.phys, ocm->nc.memtotal, - ioflags); + _PAGE_EXEC | PAGE_KERNEL_NCG); if (!ocm->nc.virt) { printk(KERN_ERR @@ -195,9 +193,8 @@ static void __init ocm_init_node(int count, struct device_node *node) /* ioremap the cached region */ if (ocm->c.memtotal) { - ioflags = _PAGE_EXEC; ocm->c.virt = __ioremap(ocm->c.phys, ocm->c.memtotal, - ioflags); + _PAGE_EXEC | PAGE_KERNEL); if (!ocm->c.virt) { printk(KERN_ERR diff --git a/arch/powerpc/platforms/4xx/soc.c b/arch/powerpc/platforms/4xx/soc.c index 5e36508b2a70..1844bf502fcf 100644 --- a/arch/powerpc/platforms/4xx/soc.c +++ b/arch/powerpc/platforms/4xx/soc.c @@ -200,7 +200,7 @@ void ppc4xx_reset_system(char *cmd) u32 reset_type = DBCR0_RST_SYSTEM; const u32 *prop; - np = of_find_node_by_type(NULL, "cpu"); + np = of_get_cpu_node(0, NULL); if (np) { prop = of_get_property(np, "reset-type", NULL); diff --git a/arch/powerpc/platforms/82xx/Kconfig b/arch/powerpc/platforms/82xx/Kconfig index 6e04099361b9..1947a88bc69f 100644 --- a/arch/powerpc/platforms/82xx/Kconfig +++ b/arch/powerpc/platforms/82xx/Kconfig @@ -51,7 +51,6 @@ endif config PQ2ADS bool - default n config 8260 bool diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 7e966f4cf19a..fff72425727a 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -216,8 +216,8 @@ static int smp_85xx_start_cpu(int cpu) /* Map the spin table */ if (ioremappable) - spin_table = ioremap_prot(*cpu_rel_addr, - sizeof(struct epapr_spin_table), _PAGE_COHERENT); + spin_table = ioremap_coherent(*cpu_rel_addr, + sizeof(struct epapr_spin_table)); else spin_table = phys_to_virt(*cpu_rel_addr); diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index 027c42d8966c..f1c805c8adbc 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -66,7 +66,7 @@ static int __init get_freq(char *name, unsigned long *val) int found = 0; /* The cpu node should have timebase and clock frequency properties */ - cpu = of_find_node_by_type(NULL, "cpu"); + cpu = of_get_cpu_node(0, NULL); if (cpu) { fp = of_get_property(cpu, name, NULL); @@ -147,8 +147,9 @@ void __init mpc8xx_calibrate_decr(void) * we have to enable the timebase). The decrementer interrupt * is wired into the vector table, nothing to do here for that. */ - cpu = of_find_node_by_type(NULL, "cpu"); + cpu = of_get_cpu_node(0, NULL); virq= irq_of_parse_and_map(cpu, 0); + of_node_put(cpu); irq = virq_to_hw(virq); sys_tmr2 = immr_map(im_sit); diff --git a/arch/powerpc/platforms/8xx/machine_check.c b/arch/powerpc/platforms/8xx/machine_check.c index 402016705a39..9944fc303df0 100644 --- a/arch/powerpc/platforms/8xx/machine_check.c +++ b/arch/powerpc/platforms/8xx/machine_check.c @@ -18,9 +18,9 @@ int machine_check_8xx(struct pt_regs *regs) pr_err("Machine check in kernel mode.\n"); pr_err("Caused by (from SRR1=%lx): ", reason); if (reason & 0x40000000) - pr_err("Fetch error at address %lx\n", regs->nip); + pr_cont("Fetch error at address %lx\n", regs->nip); else - pr_err("Data access error at address %lx\n", regs->dar); + pr_cont("Data access error at address %lx\n", regs->dar); #ifdef CONFIG_PCI /* the qspan pci read routines can cause machine checks -- Cort diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 14ef17e10ec9..260a56b7602d 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -23,7 +23,6 @@ source "arch/powerpc/platforms/amigaone/Kconfig" config KVM_GUEST bool "KVM Guest support" - default n select EPAPR_PARAVIRT ---help--- This option enables various optimizations for running under the KVM @@ -34,7 +33,6 @@ config KVM_GUEST config EPAPR_PARAVIRT bool "ePAPR para-virtualization support" - default n help Enables ePAPR para-virtualization support for guests. @@ -74,7 +72,6 @@ config PPC_DT_CPU_FTRS config UDBG_RTAS_CONSOLE bool "RTAS based debug console" depends on PPC_RTAS - default n config PPC_SMP_MUXED_IPI bool @@ -86,16 +83,13 @@ config PPC_SMP_MUXED_IPI config IPIC bool - default n config MPIC bool - default n config MPIC_TIMER bool "MPIC Global Timer" depends on MPIC && FSL_SOC - default n help The MPIC global timer is a hardware timer inside the Freescale PIC complying with OpenPIC standard. When the @@ -107,7 +101,6 @@ config MPIC_TIMER config FSL_MPIC_TIMER_WAKEUP tristate "Freescale MPIC global timer wakeup driver" depends on FSL_SOC && MPIC_TIMER && PM - default n help The driver provides a way to wake up the system by MPIC timer. @@ -115,43 +108,35 @@ config FSL_MPIC_TIMER_WAKEUP config PPC_EPAPR_HV_PIC bool - default n select EPAPR_PARAVIRT config MPIC_WEIRD bool - default n config MPIC_MSGR bool "MPIC message register support" depends on MPIC - default n help Enables support for the MPIC message registers. These registers are used for inter-processor communication. config PPC_I8259 bool - default n config U3_DART bool depends on PPC64 - default n config PPC_RTAS bool - default n config RTAS_ERROR_LOGGING bool depends on PPC_RTAS - default n config PPC_RTAS_DAEMON bool depends on PPC_RTAS - default n config RTAS_PROC bool "Proc interface to RTAS" @@ -164,11 +149,9 @@ config RTAS_FLASH config MMIO_NVRAM bool - default n config MPIC_U3_HT_IRQS bool - default n config MPIC_BROKEN_REGREAD bool @@ -187,15 +170,12 @@ config EEH config PPC_MPC106 bool - default n config PPC_970_NAP bool - default n config PPC_P7_NAP bool - default n config PPC_INDIRECT_PIO bool @@ -295,7 +275,6 @@ config CPM2 config FSL_ULI1575 bool - default n select GENERIC_ISA_DMA help Supports for the ULI1575 PCIe south bridge that exists on some diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 6c6a7c72cae4..f4e2c5729374 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 config PPC64 bool "64-bit kernel" - default n select ZLIB_DEFLATE help This option selects whether a 32-bit or a 64-bit kernel @@ -72,6 +71,7 @@ config PPC_BOOK3S_64 select PPC_HAVE_PMU_SUPPORT select SYS_SUPPORTS_HUGETLBFS select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_NUMA_BALANCING select IRQ_WORK @@ -368,7 +368,6 @@ config PPC_MM_SLICES bool default y if PPC_BOOK3S_64 default y if PPC_8xx && HUGETLB_PAGE - default n config PPC_HAVE_PMU_SUPPORT bool @@ -382,7 +381,6 @@ config PPC_PERF_CTRS config FORCE_SMP # Allow platforms to force SMP=y by selecting this bool - default n select SMP config SMP @@ -423,7 +421,6 @@ config CHECK_CACHE_COHERENCY config PPC_DOORBELL bool - default n endmenu diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index e46bb7ea710f..143d4417f6cc 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -1,7 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror - obj-$(CONFIG_FSL_ULI1575) += fsl_uli1575.o obj-$(CONFIG_PPC_PMAC) += powermac/ diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 9f5958f16923..4b2f114f3116 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 config PPC_CELL bool - default n config PPC_CELL_COMMON bool @@ -22,7 +21,6 @@ config PPC_CELL_NATIVE select IBM_EMAC_RGMII if IBM_EMAC select IBM_EMAC_ZMII if IBM_EMAC #test only select IBM_EMAC_TAH if IBM_EMAC #test only - default n config PPC_IBM_CELL_BLADE bool "IBM Cell Blade" @@ -54,7 +52,6 @@ config SPU_FS config SPU_BASE bool - default n select PPC_COPRO_BASE config CBE_RAS diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c index 5c409c98cca8..f7e36373f6e0 100644 --- a/arch/powerpc/platforms/cell/spu_manage.c +++ b/arch/powerpc/platforms/cell/spu_manage.c @@ -180,35 +180,22 @@ out: static int __init spu_map_interrupts(struct spu *spu, struct device_node *np) { - struct of_phandle_args oirq; - int ret; int i; for (i=0; i < 3; i++) { - ret = of_irq_parse_one(np, i, &oirq); - if (ret) { - pr_debug("spu_new: failed to get irq %d\n", i); - goto err; - } - ret = -EINVAL; - pr_debug(" irq %d no 0x%x on %pOF\n", i, oirq.args[0], - oirq.np); - spu->irqs[i] = irq_create_of_mapping(&oirq); - if (!spu->irqs[i]) { - pr_debug("spu_new: failed to map it !\n"); + spu->irqs[i] = irq_of_parse_and_map(np, i); + if (!spu->irqs[i]) goto err; - } } return 0; err: - pr_debug("failed to map irq %x for spu %s\n", *oirq.args, - spu->name); + pr_debug("failed to map irq %x for spu %s\n", i, spu->name); for (; i >= 0; i--) { if (spu->irqs[i]) irq_dispose_mapping(spu->irqs[i]); } - return ret; + return -EINVAL; } static int spu_map_resource(struct spu *spu, int nr, @@ -295,8 +282,8 @@ static int __init of_enumerate_spus(int (*fn)(void *data)) for_each_node_by_type(node, "spe") { ret = fn(node); if (ret) { - printk(KERN_WARNING "%s: Error initializing %s\n", - __func__, node->name); + printk(KERN_WARNING "%s: Error initializing %pOFn\n", + __func__, node); of_node_put(node); break; } diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 403523c061ba..ecf703ee3a76 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -112,7 +112,7 @@ static void __iomem *wii_ioremap_hw_regs(char *name, char *compatible) } error = of_address_to_resource(np, 0, &res); if (error) { - pr_err("no valid reg found for %s\n", np->name); + pr_err("no valid reg found for %pOFn\n", np); goto out_put; } diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig index 376d0be36b66..2601fac50354 100644 --- a/arch/powerpc/platforms/maple/Kconfig +++ b/arch/powerpc/platforms/maple/Kconfig @@ -13,7 +13,6 @@ config PPC_MAPLE select PPC_RTAS select MMIO_NVRAM select ATA_NONSTANDARD if ATA - default n help This option enables support for the Maple 970FX Evaluation Board. For more information, refer to <http://www.970eval.com> diff --git a/arch/powerpc/platforms/pasemi/Kconfig b/arch/powerpc/platforms/pasemi/Kconfig index d458a791d35b..98e3bc22bebc 100644 --- a/arch/powerpc/platforms/pasemi/Kconfig +++ b/arch/powerpc/platforms/pasemi/Kconfig @@ -2,7 +2,6 @@ config PPC_PASEMI depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN bool "PA Semi SoC-based platforms" - default n select MPIC select PCI select PPC_UDBG_16550 diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c index c80f72c370ae..53384eb42a76 100644 --- a/arch/powerpc/platforms/pasemi/dma_lib.c +++ b/arch/powerpc/platforms/pasemi/dma_lib.c @@ -576,7 +576,7 @@ int pasemi_dma_init(void) res.start = 0xfd800000; res.end = res.start + 0x1000; } - dma_status = __ioremap(res.start, resource_size(&res), 0); + dma_status = ioremap_cache(res.start, resource_size(&res)); pci_dev_put(iob_pdev); for (i = 0; i < MAX_TXCH; i++) diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile index f2839eed0f89..923bfb340433 100644 --- a/arch/powerpc/platforms/powermac/Makefile +++ b/arch/powerpc/platforms/powermac/Makefile @@ -1,9 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS_bootx_init.o += -fPIC +CFLAGS_bootx_init.o += $(call cc-option, -fno-stack-protector) ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code -CFLAGS_REMOVE_bootx_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_bootx_init.o = $(CC_FLAGS_FTRACE) endif obj-y += pic.o setup.o time.o feature.o pci.o \ diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 4eb8cb38fc69..ed2f54b3f173 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -1049,7 +1049,6 @@ core99_reset_cpu(struct device_node *node, long param, long value) unsigned long flags; struct macio_chip *macio; struct device_node *np; - struct device_node *cpus; const int dflt_reset_lines[] = { KL_GPIO_RESET_CPU0, KL_GPIO_RESET_CPU1, KL_GPIO_RESET_CPU2, @@ -1059,10 +1058,7 @@ core99_reset_cpu(struct device_node *node, long param, long value) if (macio->type != macio_keylargo) return -ENODEV; - cpus = of_find_node_by_path("/cpus"); - if (cpus == NULL) - return -ENODEV; - for (np = cpus->child; np != NULL; np = np->sibling) { + for_each_of_cpu_node(np) { const u32 *num = of_get_property(np, "reg", NULL); const u32 *rst = of_get_property(np, "soft-reset", NULL); if (num == NULL || rst == NULL) @@ -1072,7 +1068,6 @@ core99_reset_cpu(struct device_node *node, long param, long value) break; } } - of_node_put(cpus); if (np == NULL || reset_io == 0) reset_io = dflt_reset_lines[param]; @@ -1504,16 +1499,12 @@ static long g5_reset_cpu(struct device_node *node, long param, long value) unsigned long flags; struct macio_chip *macio; struct device_node *np; - struct device_node *cpus; macio = &macio_chips[0]; if (macio->type != macio_keylargo2 && macio->type != macio_shasta) return -ENODEV; - cpus = of_find_node_by_path("/cpus"); - if (cpus == NULL) - return -ENODEV; - for (np = cpus->child; np != NULL; np = np->sibling) { + for_each_of_cpu_node(np) { const u32 *num = of_get_property(np, "reg", NULL); const u32 *rst = of_get_property(np, "soft-reset", NULL); if (num == NULL || rst == NULL) @@ -1523,7 +1514,6 @@ static long g5_reset_cpu(struct device_node *node, long param, long value) break; } } - of_node_put(cpus); if (np == NULL || reset_io == 0) return -ENODEV; @@ -2515,31 +2505,26 @@ found: * supposed to be set when not supported, but I'm not very confident * that all Apple OF revs did it properly, I do it the paranoid way. */ - while (uninorth_base && uninorth_rev > 3) { - struct device_node *cpus = of_find_node_by_path("/cpus"); + if (uninorth_base && uninorth_rev > 3) { struct device_node *np; - if (!cpus || !cpus->child) { - printk(KERN_WARNING "Can't find CPU(s) in device tree !\n"); - of_node_put(cpus); - break; - } - np = cpus->child; - /* Nap mode not supported on SMP */ - if (np->sibling) { - of_node_put(cpus); - break; - } - /* Nap mode not supported if flush-on-lock property is present */ - if (of_get_property(np, "flush-on-lock", NULL)) { - of_node_put(cpus); - break; + for_each_of_cpu_node(np) { + int cpu_count = 1; + + /* Nap mode not supported on SMP */ + if (of_get_property(np, "flush-on-lock", NULL) || + (cpu_count > 1)) { + powersave_nap = 0; + of_node_put(np); + break; + } + + cpu_count++; + powersave_nap = 1; } - of_node_put(cpus); - powersave_nap = 1; - printk(KERN_DEBUG "Processor NAP mode on idle enabled.\n"); - break; } + if (powersave_nap) + printk(KERN_DEBUG "Processor NAP mode on idle enabled.\n"); /* On CPUs that support it (750FX), lowspeed by default during * NAP mode diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 3a529fcdae97..2f00e3daafb0 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -243,10 +243,9 @@ static void __init l2cr_init(void) { /* Checks "l2cr-value" property in the registry */ if (cpu_has_feature(CPU_FTR_L2CR)) { - struct device_node *np = of_find_node_by_name(NULL, "cpus"); - if (!np) - np = of_find_node_by_type(NULL, "cpu"); - if (np) { + struct device_node *np; + + for_each_of_cpu_node(np) { const unsigned int *l2cr = of_get_property(np, "l2cr-value", NULL); if (l2cr) { @@ -256,6 +255,7 @@ static void __init l2cr_init(void) _set_L2CR(ppc_override_l2cr_value); } of_node_put(np); + break; } } @@ -279,8 +279,8 @@ static void __init pmac_setup_arch(void) /* Set loops_per_jiffy to a half-way reasonable value, for use until calibrate_delay gets called. */ loops_per_jiffy = 50000000 / HZ; - cpu = of_find_node_by_type(NULL, "cpu"); - if (cpu != NULL) { + + for_each_of_cpu_node(cpu) { fp = of_get_property(cpu, "clock-frequency", NULL); if (fp != NULL) { if (pvr >= 0x30 && pvr < 0x80) @@ -292,8 +292,9 @@ static void __init pmac_setup_arch(void) else /* 601, 603, etc. */ loops_per_jiffy = *fp / (2 * HZ); + of_node_put(cpu); + break; } - of_node_put(cpu); } /* See if newworld or oldworld */ diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c index f92c1918fb56..f157e3d071f2 100644 --- a/arch/powerpc/platforms/powermac/time.c +++ b/arch/powerpc/platforms/powermac/time.c @@ -45,13 +45,6 @@ #endif /* - * Offset between Unix time (1970-based) and Mac time (1904-based). Cuda and PMU - * times wrap in 2040. If we need to handle later times, the read_time functions - * need to be changed to interpret wrapped times as post-2040. - */ -#define RTC_OFFSET 2082844800 - -/* * Calibrate the decrementer frequency with the VIA timer 1. */ #define VIA_TIMER_FREQ_6 4700000 /* time 1 frequency * 6 */ @@ -90,98 +83,6 @@ long __init pmac_time_init(void) return delta; } -#ifdef CONFIG_ADB_CUDA -static time64_t cuda_get_time(void) -{ - struct adb_request req; - time64_t now; - - if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0) - return 0; - while (!req.complete) - cuda_poll(); - if (req.reply_len != 7) - printk(KERN_ERR "cuda_get_time: got %d byte reply\n", - req.reply_len); - now = (u32)((req.reply[3] << 24) + (req.reply[4] << 16) + - (req.reply[5] << 8) + req.reply[6]); - /* it's either after year 2040, or the RTC has gone backwards */ - WARN_ON(now < RTC_OFFSET); - - return now - RTC_OFFSET; -} - -#define cuda_get_rtc_time(tm) rtc_time64_to_tm(cuda_get_time(), (tm)) - -static int cuda_set_rtc_time(struct rtc_time *tm) -{ - u32 nowtime; - struct adb_request req; - - nowtime = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET); - if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME, - nowtime >> 24, nowtime >> 16, nowtime >> 8, - nowtime) < 0) - return -ENXIO; - while (!req.complete) - cuda_poll(); - if ((req.reply_len != 3) && (req.reply_len != 7)) - printk(KERN_ERR "cuda_set_rtc_time: got %d byte reply\n", - req.reply_len); - return 0; -} - -#else -#define cuda_get_time() 0 -#define cuda_get_rtc_time(tm) -#define cuda_set_rtc_time(tm) 0 -#endif - -#ifdef CONFIG_ADB_PMU -static time64_t pmu_get_time(void) -{ - struct adb_request req; - time64_t now; - - if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0) - return 0; - pmu_wait_complete(&req); - if (req.reply_len != 4) - printk(KERN_ERR "pmu_get_time: got %d byte reply from PMU\n", - req.reply_len); - now = (u32)((req.reply[0] << 24) + (req.reply[1] << 16) + - (req.reply[2] << 8) + req.reply[3]); - - /* it's either after year 2040, or the RTC has gone backwards */ - WARN_ON(now < RTC_OFFSET); - - return now - RTC_OFFSET; -} - -#define pmu_get_rtc_time(tm) rtc_time64_to_tm(pmu_get_time(), (tm)) - -static int pmu_set_rtc_time(struct rtc_time *tm) -{ - u32 nowtime; - struct adb_request req; - - nowtime = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET); - if (pmu_request(&req, NULL, 5, PMU_SET_RTC, nowtime >> 24, - nowtime >> 16, nowtime >> 8, nowtime) < 0) - return -ENXIO; - pmu_wait_complete(&req); - if (req.reply_len != 0) - printk(KERN_ERR "pmu_set_rtc_time: %d byte reply from PMU\n", - req.reply_len); - return 0; -} - -#else -#define pmu_get_time() 0 -#define pmu_get_rtc_time(tm) -#define pmu_set_rtc_time(tm) 0 -#endif - #ifdef CONFIG_PMAC_SMU static time64_t smu_get_time(void) { @@ -191,11 +92,6 @@ static time64_t smu_get_time(void) return 0; return rtc_tm_to_time64(&tm); } - -#else -#define smu_get_time() 0 -#define smu_get_rtc_time(tm, spin) -#define smu_set_rtc_time(tm, spin) 0 #endif /* Can't be __init, it's called when suspending and resuming */ @@ -203,12 +99,18 @@ time64_t pmac_get_boot_time(void) { /* Get the time from the RTC, used only at boot time */ switch (sys_ctrler) { +#ifdef CONFIG_ADB_CUDA case SYS_CTRLER_CUDA: return cuda_get_time(); +#endif +#ifdef CONFIG_ADB_PMU case SYS_CTRLER_PMU: return pmu_get_time(); +#endif +#ifdef CONFIG_PMAC_SMU case SYS_CTRLER_SMU: return smu_get_time(); +#endif default: return 0; } @@ -218,15 +120,21 @@ void pmac_get_rtc_time(struct rtc_time *tm) { /* Get the time from the RTC, used only at boot time */ switch (sys_ctrler) { +#ifdef CONFIG_ADB_CUDA case SYS_CTRLER_CUDA: - cuda_get_rtc_time(tm); + rtc_time64_to_tm(cuda_get_time(), tm); break; +#endif +#ifdef CONFIG_ADB_PMU case SYS_CTRLER_PMU: - pmu_get_rtc_time(tm); + rtc_time64_to_tm(pmu_get_time(), tm); break; +#endif +#ifdef CONFIG_PMAC_SMU case SYS_CTRLER_SMU: smu_get_rtc_time(tm, 1); break; +#endif default: ; } @@ -235,12 +143,18 @@ void pmac_get_rtc_time(struct rtc_time *tm) int pmac_set_rtc_time(struct rtc_time *tm) { switch (sys_ctrler) { +#ifdef CONFIG_ADB_CUDA case SYS_CTRLER_CUDA: return cuda_set_rtc_time(tm); +#endif +#ifdef CONFIG_ADB_PMU case SYS_CTRLER_PMU: return pmu_set_rtc_time(tm); +#endif +#ifdef CONFIG_PMAC_SMU case SYS_CTRLER_SMU: return smu_set_rtc_time(tm, 1); +#endif default: return -ENODEV; } diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index f8dc98d3dc01..99083fe992d5 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -15,11 +15,6 @@ config PPC_POWERNV select PPC_SCOM select ARCH_RANDOM select CPU_FREQ - select CPU_FREQ_GOV_PERFORMANCE - select CPU_FREQ_GOV_POWERSAVE - select CPU_FREQ_GOV_USERSPACE - select CPU_FREQ_GOV_ONDEMAND - select CPU_FREQ_GOV_CONSERVATIVE select PPC_DOORBELL select MMU_NOTIFIER select FORCE_SMP @@ -35,7 +30,6 @@ config OPAL_PRD config PPC_MEMTRACE bool "Enable removal of RAM from kernel mappings for tracing" depends on PPC_POWERNV && MEMORY_HOTREMOVE - default n help Enabling this option allows for the removal of memory (RAM) from the kernel mappings to be used for hardware tracing. diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 3c1beae29f2d..abc0be7507c8 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -223,14 +223,6 @@ int pnv_eeh_post_init(void) eeh_probe_devices(); eeh_addr_cache_build(); - if (eeh_has_flag(EEH_POSTPONED_PROBE)) { - eeh_clear_flag(EEH_POSTPONED_PROBE); - if (eeh_enabled()) - pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); - else - pr_info("EEH: No capable adapters found\n"); - } - /* Register OPAL event notifier */ eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR)); if (eeh_event_irq < 0) { @@ -391,12 +383,6 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA) return NULL; - /* Skip if we haven't probed yet */ - if (phb->ioda.pe_rmap[config_addr] == IODA_INVALID_PE) { - eeh_add_flag(EEH_POSTPONED_PROBE); - return NULL; - } - /* Initialize eeh device */ edev->class_code = pdn->class_code; edev->mode &= 0xFFFFFF00; @@ -604,7 +590,7 @@ static int pnv_eeh_get_phb_state(struct eeh_pe *pe) EEH_STATE_MMIO_ENABLED | EEH_STATE_DMA_ENABLED); } else if (!(pe->state & EEH_PE_ISOLATED)) { - eeh_pe_state_mark(pe, EEH_PE_ISOLATED); + eeh_pe_mark_isolated(pe); pnv_eeh_get_phb_diag(pe); if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) @@ -706,7 +692,7 @@ static int pnv_eeh_get_pe_state(struct eeh_pe *pe) if (phb->freeze_pe) phb->freeze_pe(phb, pe->addr); - eeh_pe_state_mark(pe, EEH_PE_ISOLATED); + eeh_pe_mark_isolated(pe); pnv_eeh_get_phb_diag(pe); if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) @@ -1054,7 +1040,7 @@ static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option) int ret; /* The VF PE should have only one child device */ - edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list); + edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry); pdn = eeh_dev_to_pdn(edev); if (!pdn) return -ENXIO; @@ -1148,43 +1134,6 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) } /** - * pnv_eeh_wait_state - Wait for PE state - * @pe: EEH PE - * @max_wait: maximal period in millisecond - * - * Wait for the state of associated PE. It might take some time - * to retrieve the PE's state. - */ -static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait) -{ - int ret; - int mwait; - - while (1) { - ret = pnv_eeh_get_state(pe, &mwait); - - /* - * If the PE's state is temporarily unavailable, - * we have to wait for the specified time. Otherwise, - * the PE's state will be returned immediately. - */ - if (ret != EEH_STATE_UNAVAILABLE) - return ret; - - if (max_wait <= 0) { - pr_warn("%s: Timeout getting PE#%x's state (%d)\n", - __func__, pe->addr, max_wait); - return EEH_STATE_NOT_SUPPORT; - } - - max_wait -= mwait; - msleep(mwait); - } - - return EEH_STATE_NOT_SUPPORT; -} - -/** * pnv_eeh_get_log - Retrieve error log * @pe: EEH PE * @severity: temporary or permanent error log @@ -1611,7 +1560,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) if ((ret == EEH_NEXT_ERR_FROZEN_PE || ret == EEH_NEXT_ERR_FENCED_PHB) && !((*pe)->state & EEH_PE_ISOLATED)) { - eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); + eeh_pe_mark_isolated(*pe); pnv_eeh_get_phb_diag(*pe); if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) @@ -1640,7 +1589,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) } /* We possibly migrate to another PE */ - eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); + eeh_pe_mark_isolated(*pe); } /* @@ -1702,7 +1651,6 @@ static struct eeh_ops pnv_eeh_ops = { .get_pe_addr = pnv_eeh_get_pe_addr, .get_state = pnv_eeh_get_state, .reset = pnv_eeh_reset, - .wait_state = pnv_eeh_wait_state, .get_log = pnv_eeh_get_log, .configure_bridge = pnv_eeh_configure_bridge, .err_inject = pnv_eeh_err_inject, diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 51dc398ae3f7..a29fdf8a2e56 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -90,17 +90,15 @@ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE, change_memblock_state); - lock_device_hotplug(); - remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT); - unlock_device_hotplug(); return true; } static u64 memtrace_alloc_node(u32 nid, u64 size) { - u64 start_pfn, end_pfn, nr_pages; + u64 start_pfn, end_pfn, nr_pages, pfn; u64 base_pfn; + u64 bytes = memory_block_size_bytes(); if (!node_spanned_pages(nid)) return 0; @@ -113,8 +111,21 @@ static u64 memtrace_alloc_node(u32 nid, u64 size) end_pfn = round_down(end_pfn - nr_pages, nr_pages); for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { - if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) + if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { + /* + * Remove memory in memory block size chunks so that + * iomem resources are always split to the same size and + * we never try to remove memory that spans two iomem + * resources. + */ + lock_device_hotplug(); + end_pfn = base_pfn + nr_pages; + for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { + remove_memory(nid, pfn << PAGE_SHIFT, bytes); + } + unlock_device_hotplug(); return base_pfn << PAGE_SHIFT; + } } return 0; diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 8006c54a91e3..6f60e0931922 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -17,7 +17,7 @@ #include <linux/pci.h> #include <linux/memblock.h> #include <linux/iommu.h> -#include <linux/debugfs.h> +#include <linux/sizes.h> #include <asm/debugfs.h> #include <asm/tlb.h> @@ -42,14 +42,6 @@ static DEFINE_SPINLOCK(npu_context_lock); /* - * When an address shootdown range exceeds this threshold we invalidate the - * entire TLB on the GPU for the given PID rather than each specific address in - * the range. - */ -static uint64_t atsd_threshold = 2 * 1024 * 1024; -static struct dentry *atsd_threshold_dentry; - -/* * Other types of TCE cache invalidation are not functional in the * hardware. */ @@ -454,79 +446,73 @@ static void put_mmio_atsd_reg(struct npu *npu, int reg) } /* MMIO ATSD register offsets */ -#define XTS_ATSD_AVA 1 -#define XTS_ATSD_STAT 2 - -static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg, - unsigned long launch, unsigned long va) -{ - struct npu *npu = mmio_atsd_reg->npu; - int reg = mmio_atsd_reg->reg; - - __raw_writeq_be(va, npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA); - eieio(); - __raw_writeq_be(launch, npu->mmio_atsd_regs[reg]); -} +#define XTS_ATSD_LAUNCH 0 +#define XTS_ATSD_AVA 1 +#define XTS_ATSD_STAT 2 -static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], - unsigned long pid, bool flush) +static unsigned long get_atsd_launch_val(unsigned long pid, unsigned long psize) { - int i; - unsigned long launch; - - for (i = 0; i <= max_npu2_index; i++) { - if (mmio_atsd_reg[i].reg < 0) - continue; + unsigned long launch = 0; - /* IS set to invalidate matching PID */ - launch = PPC_BIT(12); - - /* PRS set to process-scoped */ - launch |= PPC_BIT(13); + if (psize == MMU_PAGE_COUNT) { + /* IS set to invalidate entire matching PID */ + launch |= PPC_BIT(12); + } else { + /* AP set to invalidate region of psize */ + launch |= (u64)mmu_get_ap(psize) << PPC_BITLSHIFT(17); + } - /* AP */ - launch |= (u64) - mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); + /* PRS set to process-scoped */ + launch |= PPC_BIT(13); - /* PID */ - launch |= pid << PPC_BITLSHIFT(38); + /* PID */ + launch |= pid << PPC_BITLSHIFT(38); - /* No flush */ - launch |= !flush << PPC_BITLSHIFT(39); + /* Leave "No flush" (bit 39) 0 so every ATSD performs a flush */ - /* Invalidating the entire process doesn't use a va */ - mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0); - } + return launch; } -static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], - unsigned long va, unsigned long pid, bool flush) +static void mmio_atsd_regs_write(struct mmio_atsd_reg + mmio_atsd_reg[NV_MAX_NPUS], unsigned long offset, + unsigned long val) { - int i; - unsigned long launch; + struct npu *npu; + int i, reg; for (i = 0; i <= max_npu2_index; i++) { - if (mmio_atsd_reg[i].reg < 0) + reg = mmio_atsd_reg[i].reg; + if (reg < 0) continue; - /* IS set to invalidate target VA */ - launch = 0; + npu = mmio_atsd_reg[i].npu; + __raw_writeq_be(val, npu->mmio_atsd_regs[reg] + offset); + } +} + +static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], + unsigned long pid) +{ + unsigned long launch = get_atsd_launch_val(pid, MMU_PAGE_COUNT); - /* PRS set to process scoped */ - launch |= PPC_BIT(13); + /* Invalidating the entire process doesn't use a va */ + mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch); +} - /* AP */ - launch |= (u64) - mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); +static void mmio_invalidate_range(struct mmio_atsd_reg + mmio_atsd_reg[NV_MAX_NPUS], unsigned long pid, + unsigned long start, unsigned long psize) +{ + unsigned long launch = get_atsd_launch_val(pid, psize); - /* PID */ - launch |= pid << PPC_BITLSHIFT(38); + /* Write all VAs first */ + mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_AVA, start); - /* No flush */ - launch |= !flush << PPC_BITLSHIFT(39); + /* Issue one barrier for all address writes */ + eieio(); - mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va); - } + /* Launch */ + mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch); } #define mn_to_npu_context(x) container_of(x, struct npu_context, mn) @@ -612,14 +598,36 @@ static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) } /* - * Invalidate either a single address or an entire PID depending on - * the value of va. + * Invalidate a virtual address range */ -static void mmio_invalidate(struct npu_context *npu_context, int va, - unsigned long address, bool flush) +static void mmio_invalidate(struct npu_context *npu_context, + unsigned long start, unsigned long size) { struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; unsigned long pid = npu_context->mm->context.id; + unsigned long atsd_start = 0; + unsigned long end = start + size - 1; + int atsd_psize = MMU_PAGE_COUNT; + + /* + * Convert the input range into one of the supported sizes. If the range + * doesn't fit, use the next larger supported size. Invalidation latency + * is high, so over-invalidation is preferred to issuing multiple + * invalidates. + * + * A 4K page size isn't supported by NPU/GPU ATS, so that case is + * ignored. + */ + if (size == SZ_64K) { + atsd_start = start; + atsd_psize = MMU_PAGE_64K; + } else if (ALIGN_DOWN(start, SZ_2M) == ALIGN_DOWN(end, SZ_2M)) { + atsd_start = ALIGN_DOWN(start, SZ_2M); + atsd_psize = MMU_PAGE_2M; + } else if (ALIGN_DOWN(start, SZ_1G) == ALIGN_DOWN(end, SZ_1G)) { + atsd_start = ALIGN_DOWN(start, SZ_1G); + atsd_psize = MMU_PAGE_1G; + } if (npu_context->nmmu_flush) /* @@ -634,23 +642,25 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, * an invalidate. */ acquire_atsd_reg(npu_context, mmio_atsd_reg); - if (va) - mmio_invalidate_va(mmio_atsd_reg, address, pid, flush); + + if (atsd_psize == MMU_PAGE_COUNT) + mmio_invalidate_pid(mmio_atsd_reg, pid); else - mmio_invalidate_pid(mmio_atsd_reg, pid, flush); + mmio_invalidate_range(mmio_atsd_reg, pid, atsd_start, + atsd_psize); mmio_invalidate_wait(mmio_atsd_reg); - if (flush) { - /* - * The GPU requires two flush ATSDs to ensure all entries have - * been flushed. We use PID 0 as it will never be used for a - * process on the GPU. - */ - mmio_invalidate_pid(mmio_atsd_reg, 0, true); - mmio_invalidate_wait(mmio_atsd_reg); - mmio_invalidate_pid(mmio_atsd_reg, 0, true); - mmio_invalidate_wait(mmio_atsd_reg); - } + + /* + * The GPU requires two flush ATSDs to ensure all entries have been + * flushed. We use PID 0 as it will never be used for a process on the + * GPU. + */ + mmio_invalidate_pid(mmio_atsd_reg, 0); + mmio_invalidate_wait(mmio_atsd_reg); + mmio_invalidate_pid(mmio_atsd_reg, 0); + mmio_invalidate_wait(mmio_atsd_reg); + release_atsd_reg(mmio_atsd_reg); } @@ -667,7 +677,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn, * There should be no more translation requests for this PID, but we * need to ensure any entries for it are removed from the TLB. */ - mmio_invalidate(npu_context, 0, 0, true); + mmio_invalidate(npu_context, 0, ~0UL); } static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, @@ -676,8 +686,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, pte_t pte) { struct npu_context *npu_context = mn_to_npu_context(mn); - - mmio_invalidate(npu_context, 1, address, true); + mmio_invalidate(npu_context, address, PAGE_SIZE); } static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, @@ -685,21 +694,7 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, unsigned long start, unsigned long end) { struct npu_context *npu_context = mn_to_npu_context(mn); - unsigned long address; - - if (end - start > atsd_threshold) { - /* - * Just invalidate the entire PID if the address range is too - * large. - */ - mmio_invalidate(npu_context, 0, 0, true); - } else { - for (address = start; address < end; address += PAGE_SIZE) - mmio_invalidate(npu_context, 1, address, false); - - /* Do the flush only on the final addess == end */ - mmio_invalidate(npu_context, 1, address, true); - } + mmio_invalidate(npu_context, start, end - start); } static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { @@ -962,11 +957,6 @@ int pnv_npu2_init(struct pnv_phb *phb) static int npu_index; uint64_t rc = 0; - if (!atsd_threshold_dentry) { - atsd_threshold_dentry = debugfs_create_x64("atsd_threshold", - 0600, powerpc_debugfs_root, &atsd_threshold); - } - phb->npu.nmmu_flush = of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush"); for_each_child_of_node(phb->hose->dn, dn) { diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c index badb29bde93f..d90ee4fc2c6a 100644 --- a/arch/powerpc/platforms/powernv/opal-powercap.c +++ b/arch/powerpc/platforms/powernv/opal-powercap.c @@ -199,7 +199,7 @@ void __init opal_powercap_init(void) } j = 0; - pcaps[i].pg.name = node->name; + pcaps[i].pg.name = kasprintf(GFP_KERNEL, "%pOFn", node); if (has_min) { powercap_add_attr(min, "powercap-min", &pcaps[i].pattrs[j]); @@ -237,6 +237,7 @@ out_pcaps_pattrs: while (--i >= 0) { kfree(pcaps[i].pattrs); kfree(pcaps[i].pg.attrs); + kfree(pcaps[i].pg.name); } kobject_put(powercap_kobj); out_pcaps: diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c index f7d04b6a2d7a..179609220e6f 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c +++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c @@ -214,9 +214,9 @@ void __init opal_sensor_groups_init(void) } if (!of_property_read_u32(node, "ibm,chip-id", &chipid)) - sprintf(sgs[i].name, "%s%d", node->name, chipid); + sprintf(sgs[i].name, "%pOFn%d", node, chipid); else - sprintf(sgs[i].name, "%s", node->name); + sprintf(sgs[i].name, "%pOFn", node); sgs[i].sg.name = sgs[i].name; if (add_attr_group(ops, len, &sgs[i], sgid)) { diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c index 9aa87df114fd..916a4b7b1bb5 100644 --- a/arch/powerpc/platforms/powernv/opal-sysparam.c +++ b/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -194,7 +194,7 @@ void __init opal_sys_param_init(void) count = of_property_count_strings(sysparam, "param-name"); if (count < 0) { pr_err("SYSPARAM: No string found of property param-name in " - "the node %s\n", sysparam->name); + "the node %pOFn\n", sysparam); goto out_param_buf; } diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 38fe4087484a..a4641515956f 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -535,7 +535,7 @@ static int opal_recover_mce(struct pt_regs *regs, return recovered; } -void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) +void __noreturn pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) { panic_flush_kmsg_start(); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index adddde023622..14befee4b3f1 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -219,17 +219,41 @@ static void pnv_prepare_going_down(void) static void __noreturn pnv_restart(char *cmd) { - long rc = OPAL_BUSY; + long rc; pnv_prepare_going_down(); - while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { - rc = opal_cec_reboot(); - if (rc == OPAL_BUSY_EVENT) - opal_poll_events(NULL); + do { + if (!cmd) + rc = opal_cec_reboot(); + else if (strcmp(cmd, "full") == 0) + rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL); else + rc = OPAL_UNSUPPORTED; + + if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + /* Opal is busy wait for some time and retry */ + opal_poll_events(NULL); mdelay(10); - } + + } else if (cmd && rc) { + /* Unknown error while issuing reboot */ + if (rc == OPAL_UNSUPPORTED) + pr_err("Unsupported '%s' reboot.\n", cmd); + else + pr_err("Unable to issue '%s' reboot. Err=%ld\n", + cmd, rc); + pr_info("Forcing a cec-reboot\n"); + cmd = NULL; + rc = OPAL_BUSY; + + } else if (rc != OPAL_SUCCESS) { + /* Unknown error while issuing cec-reboot */ + pr_err("Unable to reboot. Err=%ld\n", rc); + } + + } while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT); + for (;;) opal_poll_events(NULL); } @@ -437,6 +461,16 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu) return ret_freq; } +static long pnv_machine_check_early(struct pt_regs *regs) +{ + long handled = 0; + + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) + handled = cur_cpu_spec->machine_check_early(regs); + + return handled; +} + define_machine(powernv) { .name = "PowerNV", .probe = pnv_probe, @@ -448,6 +482,7 @@ define_machine(powernv) { .machine_shutdown = pnv_shutdown, .power_save = NULL, .calibrate_decr = generic_calibrate_decr, + .machine_check_early = pnv_machine_check_early, #ifdef CONFIG_KEXEC_CORE .kexec_cpu_down = pnv_kexec_cpu_down, #endif diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig index 6f7525555b19..24864b8aaf5d 100644 --- a/arch/powerpc/platforms/ps3/Kconfig +++ b/arch/powerpc/platforms/ps3/Kconfig @@ -49,7 +49,6 @@ config PS3_HTAB_SIZE config PS3_DYNAMIC_DMA depends on PPC_PS3 bool "PS3 Platform dynamic DMA page table management" - default n help This option will enable kernel support to take advantage of the per device dynamic DMA page table management provided by the Cell @@ -89,7 +88,6 @@ config PS3_SYS_MANAGER config PS3_REPOSITORY_WRITE bool "PS3 Repository write support" if PS3_ADVANCED depends on PPC_PS3 - default n help Enables support for writing to the PS3 System Repository. diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c index cdbfc5cfd6f3..f5387ad82279 100644 --- a/arch/powerpc/platforms/ps3/os-area.c +++ b/arch/powerpc/platforms/ps3/os-area.c @@ -664,7 +664,7 @@ static int update_flash_db(void) db_set_64(db, &os_area_db_id_rtc_diff, saved_params.rtc_diff); count = os_area_flash_write(db, sizeof(struct os_area_db), pos); - if (count < sizeof(struct os_area_db)) { + if (count < 0 || count < sizeof(struct os_area_db)) { pr_debug("%s: os_area_flash_write failed %zd\n", __func__, count); error = count < 0 ? count : -EIO; diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c index b54850845466..7746c2a3c509 100644 --- a/arch/powerpc/platforms/ps3/spu.c +++ b/arch/powerpc/platforms/ps3/spu.c @@ -215,8 +215,7 @@ static int __init setup_areas(struct spu *spu) goto fail_ioremap; } - spu->local_store = (__force void *)ioremap_prot(spu->local_store_phys, - LS_SIZE, pgprot_val(pgprot_noncached_wc(__pgprot(0)))); + spu->local_store = (__force void *)ioremap_wc(spu->local_store_phys, LS_SIZE); if (!spu->local_store) { pr_debug("%s:%d: ioremap local_store failed\n", diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 0c698fd6d491..2e4bd32154b5 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -28,7 +28,6 @@ config PPC_PSERIES config PPC_SPLPAR depends on PPC_PSERIES bool "Support for shared-processor logical partitions" - default n help Enabling this option will make the kernel run more efficiently on logically-partitioned pSeries systems which use shared @@ -99,7 +98,6 @@ config PPC_SMLPAR bool "Support for shared-memory logical partitions" depends on PPC_PSERIES select LPARCFG - default n help Select this option to enable shared memory partition support. With this option a system running in an LPAR can be given more @@ -140,3 +138,10 @@ config IBMEBUS bool "Support for GX bus based adapters" help Bus device driver for GX bus based adapters. + +config PAPR_SCM + depends on PPC_PSERIES && MEMORY_HOTPLUG + select LIBNVDIMM + tristate "Support for the PAPR Storage Class Memory interface" + help + Enable access to hypervisor provided storage class memory. diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 7e89d5c47068..a43ec843c8e2 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_KEXEC_CORE) += kexec.o obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o -obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o +obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o pmem.o obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_HVCS) += hvcserver.o @@ -24,6 +24,7 @@ obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o obj-$(CONFIG_LPARCFG) += lparcfg.o obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_IBMEBUS) += ibmebus.o +obj-$(CONFIG_PAPR_SCM) += papr_scm.o ifdef CONFIG_PPC_PSERIES obj-$(CONFIG_SUSPEND) += suspend.o diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index a0b20c03f078..7625546caefd 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -32,8 +32,6 @@ static struct workqueue_struct *pseries_hp_wq; struct pseries_hp_work { struct work_struct work; struct pseries_hp_errorlog *errlog; - struct completion *hp_completion; - int *rc; }; struct cc_workarea { @@ -329,7 +327,7 @@ int dlpar_release_drc(u32 drc_index) return 0; } -static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog) +int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog) { int rc; @@ -357,6 +355,10 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog) case PSERIES_HP_ELOG_RESOURCE_CPU: rc = dlpar_cpu(hp_elog); break; + case PSERIES_HP_ELOG_RESOURCE_PMEM: + rc = dlpar_hp_pmem(hp_elog); + break; + default: pr_warn_ratelimited("Invalid resource (%d) specified\n", hp_elog->resource); @@ -371,20 +373,13 @@ static void pseries_hp_work_fn(struct work_struct *work) struct pseries_hp_work *hp_work = container_of(work, struct pseries_hp_work, work); - if (hp_work->rc) - *(hp_work->rc) = handle_dlpar_errorlog(hp_work->errlog); - else - handle_dlpar_errorlog(hp_work->errlog); - - if (hp_work->hp_completion) - complete(hp_work->hp_completion); + handle_dlpar_errorlog(hp_work->errlog); kfree(hp_work->errlog); kfree((void *)work); } -void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog, - struct completion *hotplug_done, int *rc) +void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog) { struct pseries_hp_work *work; struct pseries_hp_errorlog *hp_errlog_copy; @@ -397,13 +392,9 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog, if (work) { INIT_WORK((struct work_struct *)work, pseries_hp_work_fn); work->errlog = hp_errlog_copy; - work->hp_completion = hotplug_done; - work->rc = rc; queue_work(pseries_hp_wq, (struct work_struct *)work); } else { - *rc = -ENOMEM; kfree(hp_errlog_copy); - complete(hotplug_done); } } @@ -521,18 +512,15 @@ static int dlpar_parse_id_type(char **cmd, struct pseries_hp_errorlog *hp_elog) static ssize_t dlpar_store(struct class *class, struct class_attribute *attr, const char *buf, size_t count) { - struct pseries_hp_errorlog *hp_elog; - struct completion hotplug_done; + struct pseries_hp_errorlog hp_elog; char *argbuf; char *args; int rc; args = argbuf = kstrdup(buf, GFP_KERNEL); - hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL); - if (!hp_elog || !argbuf) { + if (!argbuf) { pr_info("Could not allocate resources for DLPAR operation\n"); kfree(argbuf); - kfree(hp_elog); return -ENOMEM; } @@ -540,25 +528,22 @@ static ssize_t dlpar_store(struct class *class, struct class_attribute *attr, * Parse out the request from the user, this will be in the form: * <resource> <action> <id_type> <id> */ - rc = dlpar_parse_resource(&args, hp_elog); + rc = dlpar_parse_resource(&args, &hp_elog); if (rc) goto dlpar_store_out; - rc = dlpar_parse_action(&args, hp_elog); + rc = dlpar_parse_action(&args, &hp_elog); if (rc) goto dlpar_store_out; - rc = dlpar_parse_id_type(&args, hp_elog); + rc = dlpar_parse_id_type(&args, &hp_elog); if (rc) goto dlpar_store_out; - init_completion(&hotplug_done); - queue_hotplug_event(hp_elog, &hotplug_done, &rc); - wait_for_completion(&hotplug_done); + rc = handle_dlpar_errorlog(&hp_elog); dlpar_store_out: kfree(argbuf); - kfree(hp_elog); if (rc) pr_err("Could not handle DLPAR request \"%s\"\n", buf); diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 18014cdeb590..ef6595153642 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -149,7 +149,7 @@ static int dtl_start(struct dtl *dtl) /* Register our dtl buffer with the hypervisor. The HV expects the * buffer size to be passed in the second word of the buffer */ - ((u32 *)dtl->buf)[1] = DISPATCH_LOG_BYTES; + ((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES); hwcpu = get_hard_smp_processor_id(dtl->cpu); addr = __pa(dtl->buf); @@ -184,7 +184,7 @@ static void dtl_stop(struct dtl *dtl) static u64 dtl_current_index(struct dtl *dtl) { - return lppaca_of(dtl->cpu).dtl_idx; + return be64_to_cpu(lppaca_of(dtl->cpu).dtl_idx); } #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 823cb27efa8b..c9e5ca4afb26 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -438,7 +438,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) /** * pseries_eeh_get_state - Retrieve PE state * @pe: EEH PE - * @state: return value + * @delay: suggested time to wait if state is unavailable * * Retrieve the state of the specified PE. On RTAS compliant * pseries platform, there already has one dedicated RTAS function @@ -448,7 +448,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) * RTAS calls for the purpose, we need to try the new one and back * to the old one if the new one couldn't work properly. */ -static int pseries_eeh_get_state(struct eeh_pe *pe, int *state) +static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay) { int config_addr; int ret; @@ -499,7 +499,8 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *state) break; case 5: if (rets[2]) { - if (state) *state = rets[2]; + if (delay) + *delay = rets[2]; result = EEH_STATE_UNAVAILABLE; } else { result = EEH_STATE_NOT_SUPPORT; @@ -554,64 +555,6 @@ static int pseries_eeh_reset(struct eeh_pe *pe, int option) } /** - * pseries_eeh_wait_state - Wait for PE state - * @pe: EEH PE - * @max_wait: maximal period in millisecond - * - * Wait for the state of associated PE. It might take some time - * to retrieve the PE's state. - */ -static int pseries_eeh_wait_state(struct eeh_pe *pe, int max_wait) -{ - int ret; - int mwait; - - /* - * According to PAPR, the state of PE might be temporarily - * unavailable. Under the circumstance, we have to wait - * for indicated time determined by firmware. The maximal - * wait time is 5 minutes, which is acquired from the original - * EEH implementation. Also, the original implementation - * also defined the minimal wait time as 1 second. - */ -#define EEH_STATE_MIN_WAIT_TIME (1000) -#define EEH_STATE_MAX_WAIT_TIME (300 * 1000) - - while (1) { - ret = pseries_eeh_get_state(pe, &mwait); - - /* - * If the PE's state is temporarily unavailable, - * we have to wait for the specified time. Otherwise, - * the PE's state will be returned immediately. - */ - if (ret != EEH_STATE_UNAVAILABLE) - return ret; - - if (max_wait <= 0) { - pr_warn("%s: Timeout when getting PE's state (%d)\n", - __func__, max_wait); - return EEH_STATE_NOT_SUPPORT; - } - - if (mwait <= 0) { - pr_warn("%s: Firmware returned bad wait value %d\n", - __func__, mwait); - mwait = EEH_STATE_MIN_WAIT_TIME; - } else if (mwait > EEH_STATE_MAX_WAIT_TIME) { - pr_warn("%s: Firmware returned too long wait value %d\n", - __func__, mwait); - mwait = EEH_STATE_MAX_WAIT_TIME; - } - - max_wait -= mwait; - msleep(mwait); - } - - return EEH_STATE_NOT_SUPPORT; -} - -/** * pseries_eeh_get_log - Retrieve error log * @pe: EEH PE * @severity: temporary or permanent error log @@ -849,7 +792,6 @@ static struct eeh_ops pseries_eeh_ops = { .get_pe_addr = pseries_eeh_get_pe_addr, .get_state = pseries_eeh_get_state, .reset = pseries_eeh_reset, - .wait_state = pseries_eeh_wait_state, .get_log = pseries_eeh_get_log, .configure_bridge = pseries_eeh_configure_bridge, .err_inject = NULL, diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c index 6eeb0d4bab61..446ef104fb3a 100644 --- a/arch/powerpc/platforms/pseries/event_sources.c +++ b/arch/powerpc/platforms/pseries/event_sources.c @@ -16,7 +16,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <asm/prom.h> +#include <linux/interrupt.h> +#include <linux/of_irq.h> #include "pseries.h" @@ -24,34 +25,19 @@ void request_event_sources_irqs(struct device_node *np, irq_handler_t handler, const char *name) { - int i, index, count = 0; - struct of_phandle_args oirq; - unsigned int virqs[16]; + int i, virq, rc; - /* First try to do a proper OF tree parsing */ - for (index = 0; of_irq_parse_one(np, index, &oirq) == 0; - index++) { - if (count > 15) - break; - virqs[count] = irq_create_of_mapping(&oirq); - if (!virqs[count]) { - pr_err("event-sources: Unable to allocate " - "interrupt number for %pOF\n", - np); - WARN_ON(1); - } else { - count++; - } - } + for (i = 0; i < 16; i++) { + virq = of_irq_get(np, i); + if (virq < 0) + return; + if (WARN(!virq, "event-sources: Unable to allocate " + "interrupt number for %pOF\n", np)) + continue; - /* Now request them */ - for (i = 0; i < count; i++) { - if (request_irq(virqs[i], handler, 0, name, NULL)) { - pr_err("event-sources: Unable to request interrupt " - "%d for %pOF\n", virqs[i], np); - WARN_ON(1); + rc = request_irq(virq, handler, 0, name, NULL); + if (WARN(rc, "event-sources: Unable to request interrupt %d for %pOF\n", + virq, np)) return; - } } } - diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index a3bbeb43689e..608ecad0178f 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -65,6 +65,8 @@ hypertas_fw_features_table[] = { {FW_FEATURE_SET_MODE, "hcall-set-mode"}, {FW_FEATURE_BEST_ENERGY, "hcall-best-energy-1*"}, {FW_FEATURE_HPT_RESIZE, "hcall-hpt-resize"}, + {FW_FEATURE_BLOCK_REMOVE, "hcall-block-remove"}, + {FW_FEATURE_PAPR_SCM, "hcall-scm"}, }; /* Build up the firmware features bitmask using the contents of diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 6ef77caf7bcf..2f8e62163602 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -287,7 +287,7 @@ static int pseries_add_processor(struct device_node *np) if (cpumask_empty(tmp)) { printk(KERN_ERR "Unable to find space in cpu_present_mask for" - " processor %s with %d thread(s)\n", np->name, + " processor %pOFn with %d thread(s)\n", np, nthreads); goto out_unlock; } @@ -481,8 +481,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index) if (rc) { saved_rc = rc; - pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n", - dn->name, rc, drc_index); + pr_warn("Failed to attach node %pOFn, rc: %d, drc index: %x\n", + dn, rc, drc_index); rc = dlpar_release_drc(drc_index); if (!rc) @@ -494,8 +494,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index) rc = dlpar_online_cpu(dn); if (rc) { saved_rc = rc; - pr_warn("Failed to online cpu %s, rc: %d, drc index: %x\n", - dn->name, rc, drc_index); + pr_warn("Failed to online cpu %pOFn, rc: %d, drc index: %x\n", + dn, rc, drc_index); rc = dlpar_detach_node(dn); if (!rc) @@ -504,7 +504,7 @@ static ssize_t dlpar_cpu_add(u32 drc_index) return saved_rc; } - pr_debug("Successfully added CPU %s, drc index: %x\n", dn->name, + pr_debug("Successfully added CPU %pOFn, drc index: %x\n", dn, drc_index); return rc; } @@ -570,19 +570,19 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index) { int rc; - pr_debug("Attempting to remove CPU %s, drc index: %x\n", - dn->name, drc_index); + pr_debug("Attempting to remove CPU %pOFn, drc index: %x\n", + dn, drc_index); rc = dlpar_offline_cpu(dn); if (rc) { - pr_warn("Failed to offline CPU %s, rc: %d\n", dn->name, rc); + pr_warn("Failed to offline CPU %pOFn, rc: %d\n", dn, rc); return -EINVAL; } rc = dlpar_release_drc(drc_index); if (rc) { - pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n", - drc_index, dn->name, rc); + pr_warn("Failed to release drc (%x) for CPU %pOFn, rc: %d\n", + drc_index, dn, rc); dlpar_online_cpu(dn); return rc; } @@ -591,7 +591,7 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index) if (rc) { int saved_rc = rc; - pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc); + pr_warn("Failed to detach CPU %pOFn, rc: %d", dn, rc); rc = dlpar_acquire_drc(drc_index); if (!rc) @@ -662,8 +662,8 @@ static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove) rc = of_property_read_u32(dn, "ibm,my-drc-index", &cpu_drcs[cpus_found - 1]); if (rc) { - pr_warn("Error occurred getting drc-index for %s\n", - dn->name); + pr_warn("Error occurred getting drc-index for %pOFn\n", + dn); of_node_put(dn); return -1; } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index c1578f54c626..2b796da822c2 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -101,11 +101,12 @@ static struct property *dlpar_clone_property(struct property *prop, return new_prop; } -static u32 find_aa_index(struct device_node *dr_node, - struct property *ala_prop, const u32 *lmb_assoc) +static bool find_aa_index(struct device_node *dr_node, + struct property *ala_prop, + const u32 *lmb_assoc, u32 *aa_index) { - u32 *assoc_arrays; - u32 aa_index; + u32 *assoc_arrays, new_prop_size; + struct property *new_prop; int aa_arrays, aa_array_entries, aa_array_sz; int i, index; @@ -121,54 +122,48 @@ static u32 find_aa_index(struct device_node *dr_node, aa_array_entries = be32_to_cpu(assoc_arrays[1]); aa_array_sz = aa_array_entries * sizeof(u32); - aa_index = -1; for (i = 0; i < aa_arrays; i++) { index = (i * aa_array_entries) + 2; if (memcmp(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz)) continue; - aa_index = i; - break; + *aa_index = i; + return true; } - if (aa_index == -1) { - struct property *new_prop; - u32 new_prop_size; - - new_prop_size = ala_prop->length + aa_array_sz; - new_prop = dlpar_clone_property(ala_prop, new_prop_size); - if (!new_prop) - return -1; - - assoc_arrays = new_prop->value; + new_prop_size = ala_prop->length + aa_array_sz; + new_prop = dlpar_clone_property(ala_prop, new_prop_size); + if (!new_prop) + return false; - /* increment the number of entries in the lookup array */ - assoc_arrays[0] = cpu_to_be32(aa_arrays + 1); + assoc_arrays = new_prop->value; - /* copy the new associativity into the lookup array */ - index = aa_arrays * aa_array_entries + 2; - memcpy(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz); + /* increment the number of entries in the lookup array */ + assoc_arrays[0] = cpu_to_be32(aa_arrays + 1); - of_update_property(dr_node, new_prop); + /* copy the new associativity into the lookup array */ + index = aa_arrays * aa_array_entries + 2; + memcpy(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz); - /* - * The associativity lookup array index for this lmb is - * number of entries - 1 since we added its associativity - * to the end of the lookup array. - */ - aa_index = be32_to_cpu(assoc_arrays[0]) - 1; - } + of_update_property(dr_node, new_prop); - return aa_index; + /* + * The associativity lookup array index for this lmb is + * number of entries - 1 since we added its associativity + * to the end of the lookup array. + */ + *aa_index = be32_to_cpu(assoc_arrays[0]) - 1; + return true; } -static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb) +static int update_lmb_associativity_index(struct drmem_lmb *lmb) { struct device_node *parent, *lmb_node, *dr_node; struct property *ala_prop; const u32 *lmb_assoc; u32 aa_index; + bool found; parent = of_find_node_by_path("/"); if (!parent) @@ -200,46 +195,17 @@ static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb) return -ENODEV; } - aa_index = find_aa_index(dr_node, ala_prop, lmb_assoc); + found = find_aa_index(dr_node, ala_prop, lmb_assoc, &aa_index); dlpar_free_cc_nodes(lmb_node); - return aa_index; -} -static int dlpar_add_device_tree_lmb(struct drmem_lmb *lmb) -{ - int rc, aa_index; - - lmb->flags |= DRCONF_MEM_ASSIGNED; - - aa_index = lookup_lmb_associativity_index(lmb); - if (aa_index < 0) { - pr_err("Couldn't find associativity index for drc index %x\n", - lmb->drc_index); - return aa_index; + if (!found) { + pr_err("Could not find LMB associativity\n"); + return -1; } lmb->aa_index = aa_index; - - rtas_hp_event = true; - rc = drmem_update_dt(); - rtas_hp_event = false; - - return rc; -} - -static int dlpar_remove_device_tree_lmb(struct drmem_lmb *lmb) -{ - int rc; - - lmb->flags &= ~DRCONF_MEM_ASSIGNED; - lmb->aa_index = 0xffffffff; - - rtas_hp_event = true; - rc = drmem_update_dt(); - rtas_hp_event = false; - - return rc; + return 0; } static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb) @@ -428,7 +394,9 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb) /* Update memory regions for memory remove */ memblock_remove(lmb->base_addr, block_sz); - dlpar_remove_device_tree_lmb(lmb); + invalidate_lmb_associativity_index(lmb); + lmb->flags &= ~DRCONF_MEM_ASSIGNED; + return 0; } @@ -688,10 +656,8 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) if (lmb->flags & DRCONF_MEM_ASSIGNED) return -EINVAL; - rc = dlpar_add_device_tree_lmb(lmb); + rc = update_lmb_associativity_index(lmb); if (rc) { - pr_err("Couldn't update device tree for drc index %x\n", - lmb->drc_index); dlpar_release_drc(lmb->drc_index); return rc; } @@ -704,14 +670,14 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) /* Add the memory */ rc = add_memory(nid, lmb->base_addr, block_sz); if (rc) { - dlpar_remove_device_tree_lmb(lmb); + invalidate_lmb_associativity_index(lmb); return rc; } rc = dlpar_online_lmb(lmb); if (rc) { remove_memory(nid, lmb->base_addr, block_sz); - dlpar_remove_device_tree_lmb(lmb); + invalidate_lmb_associativity_index(lmb); } else { lmb->flags |= DRCONF_MEM_ASSIGNED; } @@ -958,6 +924,12 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) break; } + if (!rc) { + rtas_hp_event = true; + rc = drmem_update_dt(); + rtas_hp_event = false; + } + unlock_device_hotplug(); return rc; } diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index c7c1140c13b6..5b4a56131904 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -404,7 +404,7 @@ static ssize_t name_show(struct device *dev, struct platform_device *ofdev; ofdev = to_platform_device(dev); - return sprintf(buf, "%s\n", ofdev->dev.of_node->name); + return sprintf(buf, "%pOFn\n", ofdev->dev.of_node); } static DEVICE_ATTR_RO(name); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index d3992ced0782..32d4452973e7 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -48,6 +48,7 @@ #include <asm/kexec.h> #include <asm/fadump.h> #include <asm/asm-prototypes.h> +#include <asm/debugfs.h> #include "pseries.h" @@ -417,6 +418,79 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, BUG_ON(lpar_rc != H_SUCCESS); } + +/* + * As defined in the PAPR's section 14.5.4.1.8 + * The control mask doesn't include the returned reference and change bit from + * the processed PTE. + */ +#define HBLKR_AVPN 0x0100000000000000UL +#define HBLKR_CTRL_MASK 0xf800000000000000UL +#define HBLKR_CTRL_SUCCESS 0x8000000000000000UL +#define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL +#define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL + +/** + * H_BLOCK_REMOVE caller. + * @idx should point to the latest @param entry set with a PTEX. + * If PTE cannot be processed because another CPUs has already locked that + * group, those entries are put back in @param starting at index 1. + * If entries has to be retried and @retry_busy is set to true, these entries + * are retried until success. If @retry_busy is set to false, the returned + * is the number of entries yet to process. + */ +static unsigned long call_block_remove(unsigned long idx, unsigned long *param, + bool retry_busy) +{ + unsigned long i, rc, new_idx; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + if (idx < 2) { + pr_warn("Unexpected empty call to H_BLOCK_REMOVE"); + return 0; + } +again: + new_idx = 0; + if (idx > PLPAR_HCALL9_BUFSIZE) { + pr_err("Too many PTEs (%lu) for H_BLOCK_REMOVE", idx); + idx = PLPAR_HCALL9_BUFSIZE; + } else if (idx < PLPAR_HCALL9_BUFSIZE) + param[idx] = HBR_END; + + rc = plpar_hcall9(H_BLOCK_REMOVE, retbuf, + param[0], /* AVA */ + param[1], param[2], param[3], param[4], /* TS0-7 */ + param[5], param[6], param[7], param[8]); + if (rc == H_SUCCESS) + return 0; + + BUG_ON(rc != H_PARTIAL); + + /* Check that the unprocessed entries were 'not found' or 'busy' */ + for (i = 0; i < idx-1; i++) { + unsigned long ctrl = retbuf[i] & HBLKR_CTRL_MASK; + + if (ctrl == HBLKR_CTRL_ERRBUSY) { + param[++new_idx] = param[i+1]; + continue; + } + + BUG_ON(ctrl != HBLKR_CTRL_SUCCESS + && ctrl != HBLKR_CTRL_ERRNOTFOUND); + } + + /* + * If there were entries found busy, retry these entries if requested, + * of if all the entries have to be retried. + */ + if (new_idx && (retry_busy || new_idx == (PLPAR_HCALL9_BUFSIZE-1))) { + idx = new_idx + 1; + goto again; + } + + return new_idx; +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need @@ -424,17 +498,57 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, */ #define PPC64_HUGE_HPTE_BATCH 12 -static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, - unsigned long *vpn, int count, - int psize, int ssize) +static void hugepage_block_invalidate(unsigned long *slot, unsigned long *vpn, + int count, int psize, int ssize) { unsigned long param[PLPAR_HCALL9_BUFSIZE]; - int i = 0, pix = 0, rc; - unsigned long flags = 0; - int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + unsigned long shift, current_vpgb, vpgb; + int i, pix = 0; - if (lock_tlbie) - spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); + shift = mmu_psize_defs[psize].shift; + + for (i = 0; i < count; i++) { + /* + * Shifting 3 bits more on the right to get a + * 8 pages aligned virtual addresse. + */ + vpgb = (vpn[i] >> (shift - VPN_SHIFT + 3)); + if (!pix || vpgb != current_vpgb) { + /* + * Need to start a new 8 pages block, flush + * the current one if needed. + */ + if (pix) + (void)call_block_remove(pix, param, true); + current_vpgb = vpgb; + param[0] = hpte_encode_avpn(vpn[i], psize, ssize); + pix = 1; + } + + param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot[i]; + if (pix == PLPAR_HCALL9_BUFSIZE) { + pix = call_block_remove(pix, param, false); + /* + * pix = 0 means that all the entries were + * removed, we can start a new block. + * Otherwise, this means that there are entries + * to retry, and pix points to latest one, so + * we should increment it and try to continue + * the same block. + */ + if (pix) + pix++; + } + } + if (pix) + (void)call_block_remove(pix, param, true); +} + +static void hugepage_bulk_invalidate(unsigned long *slot, unsigned long *vpn, + int count, int psize, int ssize) +{ + unsigned long param[PLPAR_HCALL9_BUFSIZE]; + int i = 0, pix = 0, rc; for (i = 0; i < count; i++) { @@ -462,6 +576,23 @@ static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, param[6], param[7]); BUG_ON(rc != H_SUCCESS); } +} + +static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, + unsigned long *vpn, + int count, int psize, + int ssize) +{ + unsigned long flags = 0; + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + + if (lock_tlbie) + spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); + + if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) + hugepage_block_invalidate(slot, vpn, count, psize, ssize); + else + hugepage_bulk_invalidate(slot, vpn, count, psize, ssize); if (lock_tlbie) spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); @@ -546,6 +677,86 @@ static int pSeries_lpar_hpte_removebolted(unsigned long ea, return 0; } + +static inline unsigned long compute_slot(real_pte_t pte, + unsigned long vpn, + unsigned long index, + unsigned long shift, + int ssize) +{ + unsigned long slot, hash, hidx; + + hash = hpt_hash(vpn, shift, ssize); + hidx = __rpte_to_hidx(pte, index); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + return slot; +} + +/** + * The hcall H_BLOCK_REMOVE implies that the virtual pages to processed are + * "all within the same naturally aligned 8 page virtual address block". + */ +static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch, + unsigned long *param) +{ + unsigned long vpn; + unsigned long i, pix = 0; + unsigned long index, shift, slot, current_vpgb, vpgb; + real_pte_t pte; + int psize, ssize; + + psize = batch->psize; + ssize = batch->ssize; + + for (i = 0; i < number; i++) { + vpn = batch->vpn[i]; + pte = batch->pte[i]; + pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { + /* + * Shifting 3 bits more on the right to get a + * 8 pages aligned virtual addresse. + */ + vpgb = (vpn >> (shift - VPN_SHIFT + 3)); + if (!pix || vpgb != current_vpgb) { + /* + * Need to start a new 8 pages block, flush + * the current one if needed. + */ + if (pix) + (void)call_block_remove(pix, param, + true); + current_vpgb = vpgb; + param[0] = hpte_encode_avpn(vpn, psize, + ssize); + pix = 1; + } + + slot = compute_slot(pte, vpn, index, shift, ssize); + param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot; + + if (pix == PLPAR_HCALL9_BUFSIZE) { + pix = call_block_remove(pix, param, false); + /* + * pix = 0 means that all the entries were + * removed, we can start a new block. + * Otherwise, this means that there are entries + * to retry, and pix points to latest one, so + * we should increment it and try to continue + * the same block. + */ + if (pix) + pix++; + } + } pte_iterate_hashed_end(); + } + + if (pix) + (void)call_block_remove(pix, param, true); +} + /* * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie * lock. @@ -558,13 +769,18 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long param[PLPAR_HCALL9_BUFSIZE]; - unsigned long hash, index, shift, hidx, slot; + unsigned long index, shift, slot; real_pte_t pte; int psize, ssize; if (lock_tlbie) spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); + if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) { + do_block_remove(number, batch, param); + goto out; + } + psize = batch->psize; ssize = batch->ssize; pix = 0; @@ -572,12 +788,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) vpn = batch->vpn[i]; pte = batch->pte[i]; pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { - hash = hpt_hash(vpn, shift, ssize); - hidx = __rpte_to_hidx(pte, index); - if (hidx & _PTEIDX_SECONDARY) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += hidx & _PTEIDX_GROUP_IX; + slot = compute_slot(pte, vpn, index, shift, ssize); if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { /* * lpar doesn't use the passed actual page size @@ -608,6 +819,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) BUG_ON(rc != H_SUCCESS); } +out: if (lock_tlbie) spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); } @@ -1028,3 +1240,56 @@ static int __init reserve_vrma_context_id(void) return 0; } machine_device_initcall(pseries, reserve_vrma_context_id); + +#ifdef CONFIG_DEBUG_FS +/* debugfs file interface for vpa data */ +static ssize_t vpa_file_read(struct file *filp, char __user *buf, size_t len, + loff_t *pos) +{ + int cpu = (long)filp->private_data; + struct lppaca *lppaca = &lppaca_of(cpu); + + return simple_read_from_buffer(buf, len, pos, lppaca, + sizeof(struct lppaca)); +} + +static const struct file_operations vpa_fops = { + .open = simple_open, + .read = vpa_file_read, + .llseek = default_llseek, +}; + +static int __init vpa_debugfs_init(void) +{ + char name[16]; + long i; + static struct dentry *vpa_dir; + + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return 0; + + vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root); + if (!vpa_dir) { + pr_warn("%s: can't create vpa root dir\n", __func__); + return -ENOMEM; + } + + /* set up the per-cpu vpa file*/ + for_each_possible_cpu(i) { + struct dentry *d; + + sprintf(name, "cpu-%ld", i); + + d = debugfs_create_file(name, 0400, vpa_dir, (void *)i, + &vpa_fops); + if (!d) { + pr_warn("%s: can't create per-cpu vpa file\n", + __func__); + return -ENOMEM; + } + } + + return 0; +} +machine_arch_initcall(pseries, vpa_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index 7c872dc01bdb..8bd590af488a 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -585,8 +585,7 @@ static ssize_t update_mpp(u64 *entitlement, u8 *weight) static ssize_t lparcfg_write(struct file *file, const char __user * buf, size_t count, loff_t * off) { - int kbuf_sz = 64; - char kbuf[kbuf_sz]; + char kbuf[64]; char *tmp; u64 new_entitled, *new_entitled_ptr = &new_entitled; u8 new_weight, *new_weight_ptr = &new_weight; @@ -595,7 +594,7 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf, if (!firmware_has_feature(FW_FEATURE_SPLPAR)) return -EINVAL; - if (count > kbuf_sz) + if (count > sizeof(kbuf)) return -EINVAL; if (copy_from_user(kbuf, buf, count)) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index f0e30dc94988..88925f8ca8a0 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -242,7 +242,7 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index) static void prrn_update_node(__be32 phandle) { - struct pseries_hp_errorlog *hp_elog; + struct pseries_hp_errorlog hp_elog; struct device_node *dn; /* @@ -255,18 +255,12 @@ static void prrn_update_node(__be32 phandle) return; } - hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL); - if(!hp_elog) - return; - - hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM; - hp_elog->action = PSERIES_HP_ELOG_ACTION_READD; - hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX; - hp_elog->_drc_u.drc_index = phandle; - - queue_hotplug_event(hp_elog, NULL, NULL); + hp_elog.resource = PSERIES_HP_ELOG_RESOURCE_MEM; + hp_elog.action = PSERIES_HP_ELOG_ACTION_READD; + hp_elog.id_type = PSERIES_HP_ELOG_ID_DRC_INDEX; + hp_elog._drc_u.drc_index = phandle; - kfree(hp_elog); + handle_dlpar_errorlog(&hp_elog); } int pseries_devicetree_update(s32 scope) @@ -366,6 +360,8 @@ static ssize_t migration_store(struct class *class, if (rc) return rc; + stop_topology_update(); + do { rc = rtas_ibm_suspend_me(streamid); if (rc == -EAGAIN) @@ -376,6 +372,9 @@ static ssize_t migration_store(struct class *class, return rc; post_mobility_fixup(); + + start_topology_update(); + return count; } diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index b7496948129e..8011b4129e3a 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -203,7 +203,8 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) /* Get the top level device in the PE */ edev = pdn_to_eeh_dev(PCI_DN(dn)); if (edev->pe) - edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list); + edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, + entry); dn = pci_device_to_OF_node(edev->pdev); if (!dn) return NULL; diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c new file mode 100644 index 000000000000..ee9372b65ca5 --- /dev/null +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -0,0 +1,345 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define pr_fmt(fmt) "papr-scm: " fmt + +#include <linux/of.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/ioport.h> +#include <linux/slab.h> +#include <linux/ndctl.h> +#include <linux/sched.h> +#include <linux/libnvdimm.h> +#include <linux/platform_device.h> + +#include <asm/plpar_wrappers.h> + +#define BIND_ANY_ADDR (~0ul) + +#define PAPR_SCM_DIMM_CMD_MASK \ + ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ + (1ul << ND_CMD_GET_CONFIG_DATA) | \ + (1ul << ND_CMD_SET_CONFIG_DATA)) + +struct papr_scm_priv { + struct platform_device *pdev; + struct device_node *dn; + uint32_t drc_index; + uint64_t blocks; + uint64_t block_size; + int metadata_size; + + uint64_t bound_addr; + + struct nvdimm_bus_descriptor bus_desc; + struct nvdimm_bus *bus; + struct nvdimm *nvdimm; + struct resource res; + struct nd_region *region; + struct nd_interleave_set nd_set; +}; + +static int drc_pmem_bind(struct papr_scm_priv *p) +{ + unsigned long ret[PLPAR_HCALL_BUFSIZE]; + uint64_t rc, token; + + /* + * When the hypervisor cannot map all the requested memory in a single + * hcall it returns H_BUSY and we call again with the token until + * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS + * leave the system in an undefined state, so we wait. + */ + token = 0; + + do { + rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0, + p->blocks, BIND_ANY_ADDR, token); + token = be64_to_cpu(ret[0]); + cond_resched(); + } while (rc == H_BUSY); + + if (rc) { + dev_err(&p->pdev->dev, "bind err: %lld\n", rc); + return -ENXIO; + } + + p->bound_addr = be64_to_cpu(ret[1]); + + dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res); + + return 0; +} + +static int drc_pmem_unbind(struct papr_scm_priv *p) +{ + unsigned long ret[PLPAR_HCALL_BUFSIZE]; + uint64_t rc, token; + + token = 0; + + /* NB: unbind has the same retry requirements mentioned above */ + do { + rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index, + p->bound_addr, p->blocks, token); + token = be64_to_cpu(ret); + cond_resched(); + } while (rc == H_BUSY); + + if (rc) + dev_err(&p->pdev->dev, "unbind error: %lld\n", rc); + + return !!rc; +} + +static int papr_scm_meta_get(struct papr_scm_priv *p, + struct nd_cmd_get_config_data_hdr *hdr) +{ + unsigned long data[PLPAR_HCALL_BUFSIZE]; + int64_t ret; + + if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1) + return -EINVAL; + + ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index, + hdr->in_offset, 1); + + if (ret == H_PARAMETER) /* bad DRC index */ + return -ENODEV; + if (ret) + return -EINVAL; /* other invalid parameter */ + + hdr->out_buf[0] = data[0] & 0xff; + + return 0; +} + +static int papr_scm_meta_set(struct papr_scm_priv *p, + struct nd_cmd_set_config_hdr *hdr) +{ + int64_t ret; + + if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1) + return -EINVAL; + + ret = plpar_hcall_norets(H_SCM_WRITE_METADATA, + p->drc_index, hdr->in_offset, hdr->in_buf[0], 1); + + if (ret == H_PARAMETER) /* bad DRC index */ + return -ENODEV; + if (ret) + return -EINVAL; /* other invalid parameter */ + + return 0; +} + +int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, + unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) +{ + struct nd_cmd_get_config_size *get_size_hdr; + struct papr_scm_priv *p; + + /* Only dimm-specific calls are supported atm */ + if (!nvdimm) + return -EINVAL; + + p = nvdimm_provider_data(nvdimm); + + switch (cmd) { + case ND_CMD_GET_CONFIG_SIZE: + get_size_hdr = buf; + + get_size_hdr->status = 0; + get_size_hdr->max_xfer = 1; + get_size_hdr->config_size = p->metadata_size; + *cmd_rc = 0; + break; + + case ND_CMD_GET_CONFIG_DATA: + *cmd_rc = papr_scm_meta_get(p, buf); + break; + + case ND_CMD_SET_CONFIG_DATA: + *cmd_rc = papr_scm_meta_set(p, buf); + break; + + default: + return -EINVAL; + } + + dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc); + + return 0; +} + +static const struct attribute_group *region_attr_groups[] = { + &nd_region_attribute_group, + &nd_device_attribute_group, + &nd_mapping_attribute_group, + &nd_numa_attribute_group, + NULL, +}; + +static const struct attribute_group *bus_attr_groups[] = { + &nvdimm_bus_attribute_group, + NULL, +}; + +static const struct attribute_group *papr_scm_dimm_groups[] = { + &nvdimm_attribute_group, + &nd_device_attribute_group, + NULL, +}; + +static int papr_scm_nvdimm_init(struct papr_scm_priv *p) +{ + struct device *dev = &p->pdev->dev; + struct nd_mapping_desc mapping; + struct nd_region_desc ndr_desc; + unsigned long dimm_flags; + + p->bus_desc.ndctl = papr_scm_ndctl; + p->bus_desc.module = THIS_MODULE; + p->bus_desc.of_node = p->pdev->dev.of_node; + p->bus_desc.attr_groups = bus_attr_groups; + p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL); + + if (!p->bus_desc.provider_name) + return -ENOMEM; + + p->bus = nvdimm_bus_register(NULL, &p->bus_desc); + if (!p->bus) { + dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn); + return -ENXIO; + } + + dimm_flags = 0; + set_bit(NDD_ALIASING, &dimm_flags); + + p->nvdimm = nvdimm_create(p->bus, p, papr_scm_dimm_groups, + dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL); + if (!p->nvdimm) { + dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn); + goto err; + } + + /* now add the region */ + + memset(&mapping, 0, sizeof(mapping)); + mapping.nvdimm = p->nvdimm; + mapping.start = 0; + mapping.size = p->blocks * p->block_size; // XXX: potential overflow? + + memset(&ndr_desc, 0, sizeof(ndr_desc)); + ndr_desc.attr_groups = region_attr_groups; + ndr_desc.numa_node = dev_to_node(&p->pdev->dev); + ndr_desc.res = &p->res; + ndr_desc.of_node = p->dn; + ndr_desc.provider_data = p; + ndr_desc.mapping = &mapping; + ndr_desc.num_mappings = 1; + ndr_desc.nd_set = &p->nd_set; + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); + + p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc); + if (!p->region) { + dev_err(dev, "Error registering region %pR from %pOF\n", + ndr_desc.res, p->dn); + goto err; + } + + return 0; + +err: nvdimm_bus_unregister(p->bus); + kfree(p->bus_desc.provider_name); + return -ENXIO; +} + +static int papr_scm_probe(struct platform_device *pdev) +{ + uint32_t drc_index, metadata_size, unit_cap[2]; + struct device_node *dn = pdev->dev.of_node; + struct papr_scm_priv *p; + int rc; + + /* check we have all the required DT properties */ + if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) { + dev_err(&pdev->dev, "%pOF: missing drc-index!\n", dn); + return -ENODEV; + } + + if (of_property_read_u32_array(dn, "ibm,unit-capacity", unit_cap, 2)) { + dev_err(&pdev->dev, "%pOF: missing unit-capacity!\n", dn); + return -ENODEV; + } + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + + /* optional DT properties */ + of_property_read_u32(dn, "ibm,metadata-size", &metadata_size); + + p->dn = dn; + p->drc_index = drc_index; + p->block_size = unit_cap[0]; + p->blocks = unit_cap[1]; + + /* might be zero */ + p->metadata_size = metadata_size; + p->pdev = pdev; + + /* request the hypervisor to bind this region to somewhere in memory */ + rc = drc_pmem_bind(p); + if (rc) + goto err; + + /* setup the resource for the newly bound range */ + p->res.start = p->bound_addr; + p->res.end = p->bound_addr + p->blocks * p->block_size; + p->res.name = pdev->name; + p->res.flags = IORESOURCE_MEM; + + rc = papr_scm_nvdimm_init(p); + if (rc) + goto err2; + + platform_set_drvdata(pdev, p); + + return 0; + +err2: drc_pmem_unbind(p); +err: kfree(p); + return rc; +} + +static int papr_scm_remove(struct platform_device *pdev) +{ + struct papr_scm_priv *p = platform_get_drvdata(pdev); + + nvdimm_bus_unregister(p->bus); + drc_pmem_unbind(p); + kfree(p); + + return 0; +} + +static const struct of_device_id papr_scm_match[] = { + { .compatible = "ibm,pmemory" }, + { }, +}; + +static struct platform_driver papr_scm_driver = { + .probe = papr_scm_probe, + .remove = papr_scm_remove, + .driver = { + .name = "papr_scm", + .owner = THIS_MODULE, + .of_match_table = papr_scm_match, + }, +}; + +module_platform_driver(papr_scm_driver); +MODULE_DEVICE_TABLE(of, papr_scm_match); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("IBM Corporation"); diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index eab96637d6cf..41d8a4d1d02e 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -239,6 +239,7 @@ void __init pSeries_final_fixup(void) { pSeries_request_regions(); + eeh_probe_devices(); eeh_addr_cache_build(); #ifdef CONFIG_PCI_IOV diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c new file mode 100644 index 000000000000..a27f40eb57b1 --- /dev/null +++ b/arch/powerpc/platforms/pseries/pmem.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Handles hot and cold plug of persistent memory regions on pseries. + */ + +#define pr_fmt(fmt) "pseries-pmem: " fmt + +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/sched.h> /* for idle_task_exit */ +#include <linux/sched/hotplug.h> +#include <linux/cpu.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/slab.h> +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/firmware.h> +#include <asm/machdep.h> +#include <asm/vdso_datapage.h> +#include <asm/plpar_wrappers.h> +#include <asm/topology.h> + +#include "pseries.h" +#include "offline_states.h" + +static struct device_node *pmem_node; + +static ssize_t pmem_drc_add_node(u32 drc_index) +{ + struct device_node *dn; + int rc; + + pr_debug("Attempting to add pmem node, drc index: %x\n", drc_index); + + rc = dlpar_acquire_drc(drc_index); + if (rc) { + pr_err("Failed to acquire DRC, rc: %d, drc index: %x\n", + rc, drc_index); + return -EINVAL; + } + + dn = dlpar_configure_connector(cpu_to_be32(drc_index), pmem_node); + if (!dn) { + pr_err("configure-connector failed for drc %x\n", drc_index); + dlpar_release_drc(drc_index); + return -EINVAL; + } + + /* NB: The of reconfig notifier creates platform device from the node */ + rc = dlpar_attach_node(dn, pmem_node); + if (rc) { + pr_err("Failed to attach node %s, rc: %d, drc index: %x\n", + dn->name, rc, drc_index); + + if (dlpar_release_drc(drc_index)) + dlpar_free_cc_nodes(dn); + + return rc; + } + + pr_info("Successfully added %pOF, drc index: %x\n", dn, drc_index); + + return 0; +} + +static ssize_t pmem_drc_remove_node(u32 drc_index) +{ + struct device_node *dn; + uint32_t index; + int rc; + + for_each_child_of_node(pmem_node, dn) { + if (of_property_read_u32(dn, "ibm,my-drc-index", &index)) + continue; + if (index == drc_index) + break; + } + + if (!dn) { + pr_err("Attempting to remove unused DRC index %x\n", drc_index); + return -ENODEV; + } + + pr_debug("Attempting to remove %pOF, drc index: %x\n", dn, drc_index); + + /* * NB: tears down the ibm,pmemory device as a side-effect */ + rc = dlpar_detach_node(dn); + if (rc) + return rc; + + rc = dlpar_release_drc(drc_index); + if (rc) { + pr_err("Failed to release drc (%x) for CPU %s, rc: %d\n", + drc_index, dn->name, rc); + dlpar_attach_node(dn, pmem_node); + return rc; + } + + pr_info("Successfully removed PMEM with drc index: %x\n", drc_index); + + return 0; +} + +int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog) +{ + u32 count, drc_index; + int rc; + + /* slim chance, but we might get a hotplug event while booting */ + if (!pmem_node) + pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory"); + if (!pmem_node) { + pr_err("Hotplug event for a pmem device, but none exists\n"); + return -ENODEV; + } + + if (hp_elog->id_type != PSERIES_HP_ELOG_ID_DRC_INDEX) { + pr_err("Unsupported hotplug event type %d\n", + hp_elog->id_type); + return -EINVAL; + } + + count = hp_elog->_drc_u.drc_count; + drc_index = hp_elog->_drc_u.drc_index; + + lock_device_hotplug(); + + if (hp_elog->action == PSERIES_HP_ELOG_ACTION_ADD) { + rc = pmem_drc_add_node(drc_index); + } else if (hp_elog->action == PSERIES_HP_ELOG_ACTION_REMOVE) { + rc = pmem_drc_remove_node(drc_index); + } else { + pr_err("Unsupported hotplug action (%d)\n", hp_elog->action); + rc = -EINVAL; + } + + unlock_device_hotplug(); + return rc; +} + +const struct of_device_id drc_pmem_match[] = { + { .type = "ibm,persistent-memory", }, + {} +}; + +static int pseries_pmem_init(void) +{ + pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory"); + if (!pmem_node) + return 0; + + /* + * The generic OF bus probe/populate handles creating platform devices + * from the child (ibm,pmemory) nodes. The generic code registers an of + * reconfig notifier to handle the hot-add/remove cases too. + */ + of_platform_bus_probe(pmem_node, drc_pmem_match, NULL); + + return 0; +} +machine_arch_initcall(pseries, pseries_pmem_init); diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 60db2ee511fb..7dee8c5d3363 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -24,6 +24,7 @@ struct pt_regs; extern int pSeries_system_reset_exception(struct pt_regs *regs); extern int pSeries_machine_check_exception(struct pt_regs *regs); +extern long pseries_machine_check_realmode(struct pt_regs *regs); #ifdef CONFIG_SMP extern void smp_init_pseries(void); @@ -59,15 +60,21 @@ extern int dlpar_detach_node(struct device_node *); extern int dlpar_acquire_drc(u32 drc_index); extern int dlpar_release_drc(u32 drc_index); -void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog, - struct completion *hotplug_done, int *rc); +void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog); +int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog); + #ifdef CONFIG_MEMORY_HOTPLUG int dlpar_memory(struct pseries_hp_errorlog *hp_elog); +int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog); #else static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog) { return -EOPNOTSUPP; } +static inline int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog) +{ + return -EOPNOTSUPP; +} #endif #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 851ce326874a..d97d52772789 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -27,6 +27,7 @@ #include <asm/machdep.h> #include <asm/rtas.h> #include <asm/firmware.h> +#include <asm/mce.h> #include "pseries.h" @@ -50,6 +51,101 @@ static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id); static irqreturn_t ras_epow_interrupt(int irq, void *dev_id); static irqreturn_t ras_error_interrupt(int irq, void *dev_id); +/* RTAS pseries MCE errorlog section. */ +struct pseries_mc_errorlog { + __be32 fru_id; + __be32 proc_id; + u8 error_type; + /* + * sub_err_type (1 byte). Bit fields depends on error_type + * + * MSB0 + * | + * V + * 01234567 + * XXXXXXXX + * + * For error_type == MC_ERROR_TYPE_UE + * XXXXXXXX + * X 1: Permanent or Transient UE. + * X 1: Effective address provided. + * X 1: Logical address provided. + * XX 2: Reserved. + * XXX 3: Type of UE error. + * + * For error_type != MC_ERROR_TYPE_UE + * XXXXXXXX + * X 1: Effective address provided. + * XXXXX 5: Reserved. + * XX 2: Type of SLB/ERAT/TLB error. + */ + u8 sub_err_type; + u8 reserved_1[6]; + __be64 effective_address; + __be64 logical_address; +} __packed; + +/* RTAS pseries MCE error types */ +#define MC_ERROR_TYPE_UE 0x00 +#define MC_ERROR_TYPE_SLB 0x01 +#define MC_ERROR_TYPE_ERAT 0x02 +#define MC_ERROR_TYPE_TLB 0x04 +#define MC_ERROR_TYPE_D_CACHE 0x05 +#define MC_ERROR_TYPE_I_CACHE 0x07 + +/* RTAS pseries MCE error sub types */ +#define MC_ERROR_UE_INDETERMINATE 0 +#define MC_ERROR_UE_IFETCH 1 +#define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2 +#define MC_ERROR_UE_LOAD_STORE 3 +#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4 + +#define MC_ERROR_SLB_PARITY 0 +#define MC_ERROR_SLB_MULTIHIT 1 +#define MC_ERROR_SLB_INDETERMINATE 2 + +#define MC_ERROR_ERAT_PARITY 1 +#define MC_ERROR_ERAT_MULTIHIT 2 +#define MC_ERROR_ERAT_INDETERMINATE 3 + +#define MC_ERROR_TLB_PARITY 1 +#define MC_ERROR_TLB_MULTIHIT 2 +#define MC_ERROR_TLB_INDETERMINATE 3 + +static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) +{ + switch (mlog->error_type) { + case MC_ERROR_TYPE_UE: + return (mlog->sub_err_type & 0x07); + case MC_ERROR_TYPE_SLB: + case MC_ERROR_TYPE_ERAT: + case MC_ERROR_TYPE_TLB: + return (mlog->sub_err_type & 0x03); + default: + return 0; + } +} + +static +inline u64 rtas_mc_get_effective_addr(const struct pseries_mc_errorlog *mlog) +{ + __be64 addr = 0; + + switch (mlog->error_type) { + case MC_ERROR_TYPE_UE: + if (mlog->sub_err_type & 0x40) + addr = mlog->effective_address; + break; + case MC_ERROR_TYPE_SLB: + case MC_ERROR_TYPE_ERAT: + case MC_ERROR_TYPE_TLB: + if (mlog->sub_err_type & 0x80) + addr = mlog->effective_address; + default: + break; + } + return be64_to_cpu(addr); +} /* * Enable the hotplug interrupt late because processing them may touch other @@ -237,8 +333,9 @@ static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id) * hotplug events on the ras_log_buf to be handled by rtas_errd. */ if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM || - hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU) - queue_hotplug_event(hp_elog, NULL, NULL); + hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU || + hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM) + queue_hotplug_event(hp_elog); else log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); @@ -427,6 +524,188 @@ int pSeries_system_reset_exception(struct pt_regs *regs) return 0; /* need to perform reset */ } +#define VAL_TO_STRING(ar, val) \ + (((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown") + +static void pseries_print_mce_info(struct pt_regs *regs, + struct rtas_error_log *errp) +{ + const char *level, *sevstr; + struct pseries_errorlog *pseries_log; + struct pseries_mc_errorlog *mce_log; + u8 error_type, err_sub_type; + u64 addr; + u8 initiator = rtas_error_initiator(errp); + int disposition = rtas_error_disposition(errp); + + static const char * const initiators[] = { + "Unknown", + "CPU", + "PCI", + "ISA", + "Memory", + "Power Mgmt", + }; + static const char * const mc_err_types[] = { + "UE", + "SLB", + "ERAT", + "TLB", + "D-Cache", + "Unknown", + "I-Cache", + }; + static const char * const mc_ue_types[] = { + "Indeterminate", + "Instruction fetch", + "Page table walk ifetch", + "Load/Store", + "Page table walk Load/Store", + }; + + /* SLB sub errors valid values are 0x0, 0x1, 0x2 */ + static const char * const mc_slb_types[] = { + "Parity", + "Multihit", + "Indeterminate", + }; + + /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */ + static const char * const mc_soft_types[] = { + "Unknown", + "Parity", + "Multihit", + "Indeterminate", + }; + + if (!rtas_error_extended(errp)) { + pr_err("Machine check interrupt: Missing extended error log\n"); + return; + } + + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); + if (pseries_log == NULL) + return; + + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; + + error_type = mce_log->error_type; + err_sub_type = rtas_mc_error_sub_type(mce_log); + + switch (rtas_error_severity(errp)) { + case RTAS_SEVERITY_NO_ERROR: + level = KERN_INFO; + sevstr = "Harmless"; + break; + case RTAS_SEVERITY_WARNING: + level = KERN_WARNING; + sevstr = ""; + break; + case RTAS_SEVERITY_ERROR: + case RTAS_SEVERITY_ERROR_SYNC: + level = KERN_ERR; + sevstr = "Severe"; + break; + case RTAS_SEVERITY_FATAL: + default: + level = KERN_ERR; + sevstr = "Fatal"; + break; + } + +#ifdef CONFIG_PPC_BOOK3S_64 + /* Display faulty slb contents for SLB errors. */ + if (error_type == MC_ERROR_TYPE_SLB) + slb_dump_contents(local_paca->mce_faulty_slbs); +#endif + + printk("%s%s Machine check interrupt [%s]\n", level, sevstr, + disposition == RTAS_DISP_FULLY_RECOVERED ? + "Recovered" : "Not recovered"); + if (user_mode(regs)) { + printk("%s NIP: [%016lx] PID: %d Comm: %s\n", level, + regs->nip, current->pid, current->comm); + } else { + printk("%s NIP [%016lx]: %pS\n", level, regs->nip, + (void *)regs->nip); + } + printk("%s Initiator: %s\n", level, + VAL_TO_STRING(initiators, initiator)); + + switch (error_type) { + case MC_ERROR_TYPE_UE: + printk("%s Error type: %s [%s]\n", level, + VAL_TO_STRING(mc_err_types, error_type), + VAL_TO_STRING(mc_ue_types, err_sub_type)); + break; + case MC_ERROR_TYPE_SLB: + printk("%s Error type: %s [%s]\n", level, + VAL_TO_STRING(mc_err_types, error_type), + VAL_TO_STRING(mc_slb_types, err_sub_type)); + break; + case MC_ERROR_TYPE_ERAT: + case MC_ERROR_TYPE_TLB: + printk("%s Error type: %s [%s]\n", level, + VAL_TO_STRING(mc_err_types, error_type), + VAL_TO_STRING(mc_soft_types, err_sub_type)); + break; + default: + printk("%s Error type: %s\n", level, + VAL_TO_STRING(mc_err_types, error_type)); + break; + } + + addr = rtas_mc_get_effective_addr(mce_log); + if (addr) + printk("%s Effective address: %016llx\n", level, addr); +} + +static int mce_handle_error(struct rtas_error_log *errp) +{ + struct pseries_errorlog *pseries_log; + struct pseries_mc_errorlog *mce_log; + int disposition = rtas_error_disposition(errp); + u8 error_type; + + if (!rtas_error_extended(errp)) + goto out; + + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); + if (pseries_log == NULL) + goto out; + + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; + error_type = mce_log->error_type; + +#ifdef CONFIG_PPC_BOOK3S_64 + if (disposition == RTAS_DISP_NOT_RECOVERED) { + switch (error_type) { + case MC_ERROR_TYPE_SLB: + case MC_ERROR_TYPE_ERAT: + /* + * Store the old slb content in paca before flushing. + * Print this when we go to virtual mode. + * There are chances that we may hit MCE again if there + * is a parity error on the SLB entry we trying to read + * for saving. Hence limit the slb saving to single + * level of recursion. + */ + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); + flush_and_reload_slb(); + disposition = RTAS_DISP_FULLY_RECOVERED; + rtas_set_disposition_recovered(errp); + break; + default: + break; + } + } +#endif + +out: + return disposition; +} + /* * Process MCE rtas errlog event. */ @@ -452,8 +731,11 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err) int recovered = 0; int disposition = rtas_error_disposition(err); + pseries_print_mce_info(regs, err); + if (!(regs->msr & MSR_RI)) { /* If MSR_RI isn't set, we cannot recover */ + pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); recovered = 0; } else if (disposition == RTAS_DISP_FULLY_RECOVERED) { @@ -503,11 +785,31 @@ int pSeries_machine_check_exception(struct pt_regs *regs) struct rtas_error_log *errp; if (fwnmi_active) { - errp = fwnmi_get_errinfo(regs); fwnmi_release_errinfo(); + errp = fwnmi_get_errlog(); if (errp && recover_mce(regs, errp)) return 1; } return 0; } + +long pseries_machine_check_realmode(struct pt_regs *regs) +{ + struct rtas_error_log *errp; + int disposition; + + if (fwnmi_active) { + errp = fwnmi_get_errinfo(regs); + /* + * Call to fwnmi_release_errinfo() in real mode causes kernel + * to panic. Hence we will call it as soon as we go into + * virtual mode. + */ + disposition = mce_handle_error(errp); + if (disposition == RTAS_DISP_FULLY_RECOVERED) + return 1; + } + + return 0; +} diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index ba1791fd3234..0f553dcfa548 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -107,6 +107,10 @@ static void __init fwnmi_init(void) u8 *mce_data_buf; unsigned int i; int nr_cpus = num_possible_cpus(); +#ifdef CONFIG_PPC_BOOK3S_64 + struct slb_entry *slb_ptr; + size_t size; +#endif int ibm_nmi_register = rtas_token("ibm,nmi-register"); if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE) @@ -132,6 +136,15 @@ static void __init fwnmi_init(void) paca_ptrs[i]->mce_data_buf = mce_data_buf + (RTAS_ERROR_LOG_MAX * i); } + +#ifdef CONFIG_PPC_BOOK3S_64 + /* Allocate per cpu slb area to save old slb contents during MCE */ + size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus; + slb_ptr = __va(memblock_alloc_base(size, sizeof(struct slb_entry), + ppc64_rma_size)); + for_each_possible_cpu(i) + paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i); +#endif } static void pseries_8259_cascade(struct irq_desc *desc) @@ -1017,6 +1030,7 @@ define_machine(pseries) { .calibrate_decr = generic_calibrate_decr, .progress = rtas_progress, .system_reset_exception = pSeries_system_reset_exception, + .machine_check_early = pseries_machine_check_realmode, .machine_check_exception = pSeries_machine_check_exception, #ifdef CONFIG_KEXEC_CORE .machine_kexec = pSeries_machine_kexec, diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 49e04ec19238..88f1ad1d6309 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1349,7 +1349,6 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) struct device_node *parent_node; const __be32 *prop; enum vio_dev_family family; - const char *of_node_name = of_node->name ? of_node->name : "<unknown>"; /* * Determine if this node is a under the /vdevice node or under the @@ -1362,24 +1361,24 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) else if (!strcmp(parent_node->type, "vdevice")) family = VDEVICE; else { - pr_warn("%s: parent(%pOF) of %s not recognized.\n", + pr_warn("%s: parent(%pOF) of %pOFn not recognized.\n", __func__, parent_node, - of_node_name); + of_node); of_node_put(parent_node); return NULL; } of_node_put(parent_node); } else { - pr_warn("%s: could not determine the parent of node %s.\n", - __func__, of_node_name); + pr_warn("%s: could not determine the parent of node %pOFn.\n", + __func__, of_node); return NULL; } if (family == PFO) { if (of_get_property(of_node, "interrupt-controller", NULL)) { - pr_debug("%s: Skipping the interrupt controller %s.\n", - __func__, of_node_name); + pr_debug("%s: Skipping the interrupt controller %pOFn.\n", + __func__, of_node); return NULL; } } @@ -1399,15 +1398,15 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) if (of_node->type != NULL) viodev->type = of_node->type; else { - pr_warn("%s: node %s is missing the 'device_type' " - "property.\n", __func__, of_node_name); + pr_warn("%s: node %pOFn is missing the 'device_type' " + "property.\n", __func__, of_node); goto out; } prop = of_get_property(of_node, "reg", NULL); if (prop == NULL) { - pr_warn("%s: node %s missing 'reg'\n", - __func__, of_node_name); + pr_warn("%s: node %pOFn missing 'reg'\n", + __func__, of_node); goto out; } unit_address = of_read_number(prop, 1); @@ -1422,8 +1421,8 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) if (prop != NULL) viodev->resource_id = of_read_number(prop, 1); - dev_set_name(&viodev->dev, "%s", of_node_name); - viodev->type = of_node_name; + dev_set_name(&viodev->dev, "%pOFn", of_node); + viodev->type = dev_name(&viodev->dev); viodev->irq = 0; } @@ -1694,7 +1693,7 @@ struct vio_dev *vio_find_node(struct device_node *vnode) snprintf(kobj_name, sizeof(kobj_name), "%x", (uint32_t)of_read_number(prop, 1)); } else if (!strcmp(dev_type, "ibm,platform-facilities")) - snprintf(kobj_name, sizeof(kobj_name), "%s", vnode->name); + snprintf(kobj_name, sizeof(kobj_name), "%pOFn", vnode); else return NULL; diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig index bcef2ac56479..e0dbec780fe9 100644 --- a/arch/powerpc/sysdev/Kconfig +++ b/arch/powerpc/sysdev/Kconfig @@ -6,19 +6,16 @@ config PPC4xx_PCI_EXPRESS bool depends on PCI && 4xx - default n config PPC4xx_HSTA_MSI bool depends on PCI_MSI depends on PCI && 4xx - default n config PPC4xx_MSI bool depends on PCI_MSI depends on PCI && 4xx - default n config PPC_MSI_BITMAP bool @@ -37,11 +34,9 @@ config PPC_SCOM config SCOM_DEBUGFS bool "Expose SCOM controllers via debugfs" depends on PPC_SCOM && DEBUG_FS - default n config GE_FPGA bool - default n config FSL_CORENET_RCPM bool diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index f730539074c4..2caa4defdfb6 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) @@ -56,8 +55,6 @@ obj-$(CONFIG_PPC_SCOM) += scom.o obj-$(CONFIG_PPC_EARLY_DEBUG_MEMCONS) += udbg_memcons.o -subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror - obj-$(CONFIG_PPC_XICS) += xics/ obj-$(CONFIG_PPC_XIVE) += xive/ diff --git a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c index 00ccf3e4fcb4..15cbdd4fde06 100644 --- a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c +++ b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c @@ -107,11 +107,11 @@ int __init instantiate_cache_sram(struct platform_device *dev, goto out_free; } - cache_sram->base_virt = ioremap_prot(cache_sram->base_phys, - cache_sram->size, _PAGE_COHERENT | PAGE_KERNEL); + cache_sram->base_virt = ioremap_coherent(cache_sram->base_phys, + cache_sram->size); if (!cache_sram->base_virt) { - dev_err(&dev->dev, "%pOF: ioremap_prot failed\n", - dev->dev.of_node); + dev_err(&dev->dev, "%pOF: ioremap_coherent failed\n", + dev->dev.of_node); ret = -ENOMEM; goto out_release; } diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index 535cf1f6941c..6300123ce965 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -846,7 +846,7 @@ void ipic_disable_mcp(enum ipic_mcp_irq mcp_irq) u32 ipic_get_mcp_status(void) { - return ipic_read(primary_ipic->regs, IPIC_SERSR); + return primary_ipic ? ipic_read(primary_ipic->regs, IPIC_SERSR) : 0; } void ipic_clear_mcp_status(u32 mask) diff --git a/arch/powerpc/sysdev/xics/Makefile b/arch/powerpc/sysdev/xics/Makefile index 5d438d92472b..ba1e3117b1c0 100644 --- a/arch/powerpc/sysdev/xics/Makefile +++ b/arch/powerpc/sysdev/xics/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror obj-y += xics-common.o obj-$(CONFIG_PPC_ICP_NATIVE) += icp-native.o diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig index 70ee976e1de0..785c292d104b 100644 --- a/arch/powerpc/sysdev/xive/Kconfig +++ b/arch/powerpc/sysdev/xive/Kconfig @@ -1,17 +1,14 @@ # SPDX-License-Identifier: GPL-2.0 config PPC_XIVE bool - default n select PPC_SMP_MUXED_IPI select HARDIRQS_SW_RESEND config PPC_XIVE_NATIVE bool - default n select PPC_XIVE depends on PPC_POWERNV config PPC_XIVE_SPAPR bool - default n select PPC_XIVE diff --git a/arch/powerpc/sysdev/xive/Makefile b/arch/powerpc/sysdev/xive/Makefile index 536d6e5706e3..dea2abc23f4d 100644 --- a/arch/powerpc/sysdev/xive/Makefile +++ b/arch/powerpc/sysdev/xive/Makefile @@ -1,4 +1,3 @@ -subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror obj-y += common.o obj-$(CONFIG_PPC_XIVE_NATIVE) += native.o diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 959a2a62f233..9824074ec1b5 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1010,12 +1010,13 @@ static void xive_ipi_eoi(struct irq_data *d) { struct xive_cpu *xc = __this_cpu_read(xive_cpu); - DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n", - d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio); - /* Handle possible race with unplug and drop stale IPIs */ if (!xc) return; + + DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n", + d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio); + xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data); xive_do_queue_eoi(xc); } diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 5b20a678d755..1ca127d052a6 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -238,20 +238,11 @@ static bool xive_native_match(struct device_node *node) #ifdef CONFIG_SMP static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc) { - struct device_node *np; - unsigned int chip_id; s64 irq; - /* Find the chip ID */ - np = of_get_cpu_node(cpu, NULL); - if (np) { - if (of_property_read_u32(np, "ibm,chip-id", &chip_id) < 0) - chip_id = 0; - } - /* Allocate an IPI and populate info about it */ for (;;) { - irq = opal_xive_allocate_irq(chip_id); + irq = opal_xive_allocate_irq(xc->chip_id); if (irq == OPAL_BUSY) { msleep(OPAL_BUSY_DELAY_MS); continue; diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index 1bc3abb237cd..69e7fb47bcaa 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -1,14 +1,15 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for xmon -subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror +# Disable clang warning for using setjmp without setjmp.h header +subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header) GCOV_PROFILE := n UBSAN_SANITIZE := n # Disable ftrace for the entire directory ORIG_CFLAGS := $(KBUILD_CFLAGS) -KBUILD_CFLAGS = $(subst -mno-sched-epilog,,$(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))) +KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS)) ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 4264aedc7775..36b8dc47a3c3 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2378,25 +2378,33 @@ static void dump_one_paca(int cpu) DUMP(p, cpu_start, "%#-*x"); DUMP(p, kexec_state, "%#-*x"); #ifdef CONFIG_PPC_BOOK3S_64 - for (i = 0; i < SLB_NUM_BOLTED; i++) { - u64 esid, vsid; + if (!early_radix_enabled()) { + for (i = 0; i < SLB_NUM_BOLTED; i++) { + u64 esid, vsid; - if (!p->slb_shadow_ptr) - continue; + if (!p->slb_shadow_ptr) + continue; + + esid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].esid); + vsid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].vsid); - esid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].esid); - vsid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].vsid); + if (esid || vsid) { + printf(" %-*s[%d] = 0x%016llx 0x%016llx\n", + 22, "slb_shadow", i, esid, vsid); + } + } + DUMP(p, vmalloc_sllp, "%#-*x"); + DUMP(p, stab_rr, "%#-*x"); + DUMP(p, slb_used_bitmap, "%#-*x"); + DUMP(p, slb_kern_bitmap, "%#-*x"); - if (esid || vsid) { - printf(" %-*s[%d] = 0x%016llx 0x%016llx\n", - 22, "slb_shadow", i, esid, vsid); + if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { + DUMP(p, slb_cache_ptr, "%#-*x"); + for (i = 0; i < SLB_CACHE_ENTRIES; i++) + printf(" %-*s[%d] = 0x%016x\n", + 22, "slb_cache", i, p->slb_cache[i]); } } - DUMP(p, vmalloc_sllp, "%#-*x"); - DUMP(p, slb_cache_ptr, "%#-*x"); - for (i = 0; i < SLB_CACHE_ENTRIES; i++) - printf(" %-*s[%d] = 0x%016x\n", - 22, "slb_cache", i, p->slb_cache[i]); DUMP(p, rfi_flush_fallback_area, "%-*px"); #endif @@ -2412,7 +2420,9 @@ static void dump_one_paca(int cpu) DUMP(p, __current, "%-*px"); DUMP(p, kstack, "%#-*llx"); printf(" %-*s = 0x%016llx\n", 25, "kstack_base", p->kstack & ~(THREAD_SIZE - 1)); - DUMP(p, stab_rr, "%#-*llx"); +#ifdef CONFIG_STACKPROTECTOR + DUMP(p, canary, "%#-*lx"); +#endif DUMP(p, saved_r1, "%#-*llx"); DUMP(p, trap_save, "%#-*x"); DUMP(p, irq_soft_mask, "%#-*x"); @@ -2444,11 +2454,15 @@ static void dump_one_paca(int cpu) DUMP(p, accounting.utime, "%#-*lx"); DUMP(p, accounting.stime, "%#-*lx"); +#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME DUMP(p, accounting.utime_scaled, "%#-*lx"); +#endif DUMP(p, accounting.starttime, "%#-*lx"); DUMP(p, accounting.starttime_user, "%#-*lx"); +#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME DUMP(p, accounting.startspurr, "%#-*lx"); DUMP(p, accounting.utime_sspurr, "%#-*lx"); +#endif DUMP(p, accounting.steal_time, "%#-*lx"); #undef DUMP @@ -2988,15 +3002,17 @@ static void show_task(struct task_struct *tsk) #ifdef CONFIG_PPC_BOOK3S_64 void format_pte(void *ptep, unsigned long pte) { + pte_t entry = __pte(pte); + printf("ptep @ 0x%016lx = 0x%016lx\n", (unsigned long)ptep, pte); printf("Maps physical address = 0x%016lx\n", pte & PTE_RPN_MASK); printf("Flags = %s%s%s%s%s\n", - (pte & _PAGE_ACCESSED) ? "Accessed " : "", - (pte & _PAGE_DIRTY) ? "Dirty " : "", - (pte & _PAGE_READ) ? "Read " : "", - (pte & _PAGE_WRITE) ? "Write " : "", - (pte & _PAGE_EXEC) ? "Exec " : ""); + pte_young(entry) ? "Accessed " : "", + pte_dirty(entry) ? "Dirty " : "", + pte_read(entry) ? "Read " : "", + pte_write(entry) ? "Write " : "", + pte_exec(entry) ? "Exec " : ""); } static void show_pte(unsigned long addr) diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c index 26789ad28193..cde370cad4ae 100644 --- a/arch/sh/boards/of-generic.c +++ b/arch/sh/boards/of-generic.c @@ -64,7 +64,7 @@ static void sh_of_smp_probe(void) init_cpu_possible(cpumask_of(0)); - for_each_node_by_type(np, "cpu") { + for_each_of_cpu_node(np) { const __be32 *cell = of_get_property(np, "reg", NULL); u64 id = -1; if (cell) id = of_read_number(cell, of_n_addr_cells(np)); diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h index f71ef3729888..316faa0130ba 100644 --- a/arch/sparc/include/asm/cmpxchg_64.h +++ b/arch/sparc/include/asm/cmpxchg_64.h @@ -52,7 +52,12 @@ static inline unsigned long xchg64(__volatile__ unsigned long *m, unsigned long return val; } -#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) +#define xchg(ptr,x) \ +({ __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \ + __ret; \ +}) void __xchg_called_with_bad_pointer(void); diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h index d955c8df62d6..1902db27ff4b 100644 --- a/arch/sparc/include/asm/prom.h +++ b/arch/sparc/include/asm/prom.h @@ -24,9 +24,6 @@ #include <linux/atomic.h> #include <linux/irqdomain.h> -#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT 2 -#define OF_ROOT_NODE_SIZE_CELLS_DEFAULT 1 - #define of_compat_cmp(s1, s2, l) strncmp((s1), (s2), (l)) #define of_prop_cmp(s1, s2) strcasecmp((s1), (s2)) #define of_node_cmp(s1, s2) strcmp((s1), (s2)) diff --git a/arch/sparc/include/asm/switch_to_64.h b/arch/sparc/include/asm/switch_to_64.h index 4ff29b1406a9..b1d4e2e3210f 100644 --- a/arch/sparc/include/asm/switch_to_64.h +++ b/arch/sparc/include/asm/switch_to_64.h @@ -67,6 +67,7 @@ do { save_and_clear_fpu(); \ } while(0) void synchronize_user_stack(void); -void fault_in_user_windows(void); +struct pt_regs; +void fault_in_user_windows(struct pt_regs *); #endif /* __SPARC64_SWITCH_TO_64_H */ diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 6c086086ca8f..59eaf6227af1 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -36,6 +36,7 @@ #include <linux/sysrq.h> #include <linux/nmi.h> #include <linux/context_tracking.h> +#include <linux/signal.h> #include <linux/uaccess.h> #include <asm/page.h> @@ -521,7 +522,12 @@ static void stack_unaligned(unsigned long sp) force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) sp, 0, current); } -void fault_in_user_windows(void) +static const char uwfault32[] = KERN_INFO \ + "%s[%d]: bad register window fault: SP %08lx (orig_sp %08lx) TPC %08lx O7 %08lx\n"; +static const char uwfault64[] = KERN_INFO \ + "%s[%d]: bad register window fault: SP %016lx (orig_sp %016lx) TPC %08lx O7 %016lx\n"; + +void fault_in_user_windows(struct pt_regs *regs) { struct thread_info *t = current_thread_info(); unsigned long window; @@ -534,9 +540,9 @@ void fault_in_user_windows(void) do { struct reg_window *rwin = &t->reg_window[window]; int winsize = sizeof(struct reg_window); - unsigned long sp; + unsigned long sp, orig_sp; - sp = t->rwbuf_stkptrs[window]; + orig_sp = sp = t->rwbuf_stkptrs[window]; if (test_thread_64bit_stack(sp)) sp += STACK_BIAS; @@ -547,8 +553,16 @@ void fault_in_user_windows(void) stack_unaligned(sp); if (unlikely(copy_to_user((char __user *)sp, - rwin, winsize))) + rwin, winsize))) { + if (show_unhandled_signals) + printk_ratelimited(is_compat_task() ? + uwfault32 : uwfault64, + current->comm, current->pid, + sp, orig_sp, + regs->tpc, + regs->u_regs[UREG_I7]); goto barf; + } } while (window--); } set_thread_wsaved(0); @@ -556,8 +570,7 @@ void fault_in_user_windows(void) barf: set_thread_wsaved(window + 1); - user_exit(); - do_exit(SIGILL); + force_sig(SIGSEGV, current); } asmlinkage long sparc_do_fork(unsigned long clone_flags, diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index 4073e2b87dd0..29aa34f11720 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -39,6 +39,7 @@ __handle_preemption: wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate __handle_user_windows: + add %sp, PTREGS_OFF, %o0 call fault_in_user_windows 661: wrpr %g0, RTRAP_PSTATE, %pstate /* If userspace is using ADI, it could potentially pass diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 44d379db3f64..4c5b3fcbed94 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -371,7 +371,11 @@ static int setup_frame32(struct ksignal *ksig, struct pt_regs *regs, get_sigframe(ksig, regs, sigframe_size); if (invalid_frame_pointer(sf, sigframe_size)) { - do_exit(SIGILL); + if (show_unhandled_signals) + pr_info("%s[%d] bad frame in setup_frame32: %08lx TPC %08lx O7 %08lx\n", + current->comm, current->pid, (unsigned long)sf, + regs->tpc, regs->u_regs[UREG_I7]); + force_sigsegv(ksig->sig, current); return -EINVAL; } @@ -501,7 +505,11 @@ static int setup_rt_frame32(struct ksignal *ksig, struct pt_regs *regs, get_sigframe(ksig, regs, sigframe_size); if (invalid_frame_pointer(sf, sigframe_size)) { - do_exit(SIGILL); + if (show_unhandled_signals) + pr_info("%s[%d] bad frame in setup_rt_frame32: %08lx TPC %08lx O7 %08lx\n", + current->comm, current->pid, (unsigned long)sf, + regs->tpc, regs->u_regs[UREG_I7]); + force_sigsegv(ksig->sig, current); return -EINVAL; } diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index 48366e5eb5b2..e9de1803a22e 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -370,7 +370,11 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) get_sigframe(ksig, regs, sf_size); if (invalid_frame_pointer (sf)) { - do_exit(SIGILL); /* won't return, actually */ + if (show_unhandled_signals) + pr_info("%s[%d] bad frame in setup_rt_frame: %016lx TPC %016lx O7 %016lx\n", + current->comm, current->pid, (unsigned long)sf, + regs->tpc, regs->u_regs[UREG_I7]); + force_sigsegv(ksig->sig, current); return -EINVAL; } diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index f396048a0d68..39822f611c01 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1383,6 +1383,7 @@ int __node_distance(int from, int to) } return numa_latency[from][to]; } +EXPORT_SYMBOL(__node_distance); static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) { diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 5f26962eff42..67ed72f31cc2 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -45,6 +45,8 @@ struct vcpu_data { #ifdef CONFIG_IRQ_REMAP +extern raw_spinlock_t irq_2_ir_lock; + extern bool irq_remapping_cap(enum irq_remap_cap cap); extern void set_irq_remapping_broken(void); extern int irq_remapping_prepare(void); diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index f39f3a06c26f..7299dcbf8e85 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -140,7 +140,7 @@ static void __init dtb_cpu_setup(void) int ret; version = GET_APIC_VERSION(apic_read(APIC_LVR)); - for_each_node_by_type(dn, "cpu") { + for_each_of_cpu_node(dn) { ret = of_property_read_u32(dn, "reg", &apic_id); if (ret < 0) { pr_warn("%pOF: missing local APIC ID\n", dn); diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile index d67e30faff9c..be060dfb1cc3 100644 --- a/arch/xtensa/Makefile +++ b/arch/xtensa/Makefile @@ -80,28 +80,18 @@ LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) head-y := arch/xtensa/kernel/head.o core-y += arch/xtensa/kernel/ arch/xtensa/mm/ core-y += $(buildvar) $(buildplf) +core-y += arch/xtensa/boot/dts/ libs-y += arch/xtensa/lib/ $(LIBGCC) drivers-$(CONFIG_OPROFILE) += arch/xtensa/oprofile/ -ifneq ($(CONFIG_BUILTIN_DTB),"") -core-$(CONFIG_OF) += arch/xtensa/boot/dts/ -endif - boot := arch/xtensa/boot all Image zImage uImage: vmlinux $(Q)$(MAKE) $(build)=$(boot) $@ -%.dtb: - $(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@ - -dtbs: scripts - $(Q)$(MAKE) $(build)=$(boot)/dts - define archhelp @echo '* Image - Kernel ELF image with reset vector' @echo '* zImage - Compressed kernel image (arch/xtensa/boot/images/zImage.*)' @echo '* uImage - U-Boot wrapped image' - @echo ' dtbs - Build device tree blobs for enabled boards' endef diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index 42285f35d313..820e8738af11 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -94,7 +94,7 @@ static void __init xtfpga_clk_setup(struct device_node *np) u32 freq; if (!base) { - pr_err("%s: invalid address\n", np->name); + pr_err("%pOFn: invalid address\n", np); return; } @@ -103,12 +103,12 @@ static void __init xtfpga_clk_setup(struct device_node *np) clk = clk_register_fixed_rate(NULL, np->name, NULL, 0, freq); if (IS_ERR(clk)) { - pr_err("%s: clk registration failed\n", np->name); + pr_err("%pOFn: clk registration failed\n", np); return; } if (of_clk_add_provider(np, of_clk_src_simple_get, clk)) { - pr_err("%s: clk provider registration failed\n", np->name); + pr_err("%pOFn: clk provider registration failed\n", np); return; } } |