From eb214f2dda31ffa989033b1e0f848ba0d3cb6188 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 8 Jan 2019 15:23:11 -0800 Subject: kasan, arm64: use ARCH_SLAB_MINALIGN instead of manual aligning Instead of changing cache->align to be aligned to KASAN_SHADOW_SCALE_SIZE in kasan_cache_create() we can reuse the ARCH_SLAB_MINALIGN macro. Link: http://lkml.kernel.org/r/52ddd881916bcc153a9924c154daacde78522227.1546540962.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Suggested-by: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Christoph Lameter Cc: Dmitry Vyukov Cc: Mark Rutland Cc: Vincenzo Frascino Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/cache.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 13dd42c3ad4e..eb43e09c1980 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -58,6 +58,12 @@ */ #define ARCH_DMA_MINALIGN (128) +#ifdef CONFIG_KASAN_SW_TAGS +#define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT) +#else +#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) +#endif + #ifndef __ASSEMBLY__ #include -- cgit v1.2.3 From d9ed41962ee202f653a5fa8d2ea0f52924abe629 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Jan 2019 10:34:49 +0000 Subject: arm64: asm-prototypes: Fix fat-fingered typo in comment Some of the right letters, not necessarily in the right order: CONFIG_MODEVERIONS -> CONFIG_MODVERSIONS Signed-off-by: Will Deacon --- arch/arm64/include/asm/asm-prototypes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/asm-prototypes.h b/arch/arm64/include/asm/asm-prototypes.h index 2173ad32d550..1c9a3a0c5fa5 100644 --- a/arch/arm64/include/asm/asm-prototypes.h +++ b/arch/arm64/include/asm/asm-prototypes.h @@ -2,7 +2,7 @@ #ifndef __ASM_PROTOTYPES_H #define __ASM_PROTOTYPES_H /* - * CONFIG_MODEVERIONS requires a C declaration to generate the appropriate CRC + * CONFIG_MODVERSIONS requires a C declaration to generate the appropriate CRC * for each symbol. Since commit: * * 4efca4ed05cbdfd1 ("kbuild: modversions for EXPORT_SYMBOL() for asm") -- cgit v1.2.3 From b89d82ef01b33bc50cbaa8ff05607879b40d0704 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 8 Jan 2019 16:19:01 +0000 Subject: arm64: kpti: Avoid rewriting early page tables when KASLR is enabled A side effect of commit c55191e96caa ("arm64: mm: apply r/o permissions of VM areas to its linear alias as well") is that the linear map is created with page granularity, which means that transitioning the early page table from global to non-global mappings when enabling kpti can take a significant amount of time during boot. Given that most CPU implementations do not require kpti, this mainly impacts KASLR builds where kpti is forcefully enabled. However, in these situations we know early on that non-global mappings are required and can avoid the use of global mappings from the beginning. The only gotcha is Cavium erratum #27456, which we must detect based on the MIDR value of the boot CPU. Reviewed-by: Ard Biesheuvel Reported-by: John Garry Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu.h | 41 +++++++++++++++++++++++++++++++++++ arch/arm64/include/asm/pgtable-prot.h | 4 ++-- arch/arm64/kernel/cpu_errata.c | 2 +- arch/arm64/kernel/cpufeature.c | 9 ++++++-- arch/arm64/kernel/head.S | 1 + 5 files changed, 52 insertions(+), 5 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 7689c7aa1d77..ac352accb3d9 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -16,6 +16,8 @@ #ifndef __ASM_MMU_H #define __ASM_MMU_H +#include + #define MMCF_AARCH32 0x1 /* mm context flag for AArch32 executables */ #define USER_ASID_BIT 48 #define USER_ASID_FLAG (UL(1) << USER_ASID_BIT) @@ -44,6 +46,45 @@ static inline bool arm64_kernel_unmapped_at_el0(void) cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0); } +static inline bool arm64_kernel_use_ng_mappings(void) +{ + bool tx1_bug; + + /* What's a kpti? Use global mappings if we don't know. */ + if (!IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) + return false; + + /* + * Note: this function is called before the CPU capabilities have + * been configured, so our early mappings will be global. If we + * later determine that kpti is required, then + * kpti_install_ng_mappings() will make them non-global. + */ + if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) + return arm64_kernel_unmapped_at_el0(); + + /* + * KASLR is enabled so we're going to be enabling kpti on non-broken + * CPUs regardless of their susceptibility to Meltdown. Rather + * than force everybody to go through the G -> nG dance later on, + * just put down non-global mappings from the beginning. + */ + if (!IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) { + tx1_bug = false; +#ifndef MODULE + } else if (!static_branch_likely(&arm64_const_caps_ready)) { + extern const struct midr_range cavium_erratum_27456_cpus[]; + + tx1_bug = is_midr_in_range_list(read_cpuid_id(), + cavium_erratum_27456_cpus); +#endif + } else { + tx1_bug = __cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_27456); + } + + return !tx1_bug && kaslr_offset() > 0; +} + typedef void (*bp_hardening_cb_t)(void); struct bp_hardening_data { diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 78b942c1bea4..986e41c4c32b 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -37,8 +37,8 @@ #define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) -#define PTE_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PTE_NG : 0) -#define PMD_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PMD_SECT_NG : 0) +#define PTE_MAYBE_NG (arm64_kernel_use_ng_mappings() ? PTE_NG : 0) +#define PMD_MAYBE_NG (arm64_kernel_use_ng_mappings() ? PMD_SECT_NG : 0) #define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) #define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 09ac548c9d44..9950bb0cbd52 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -553,7 +553,7 @@ static const struct midr_range arm64_repeat_tlbi_cpus[] = { #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 -static const struct midr_range cavium_erratum_27456_cpus[] = { +const struct midr_range cavium_erratum_27456_cpus[] = { /* Cavium ThunderX, T88 pass 1.x - 2.1 */ MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1), /* Cavium ThunderX, T81 pass 1.0 */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 4f272399de89..f6d84e2c92fe 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -983,7 +983,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, /* Useful for KASLR robustness */ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) - return true; + return kaslr_offset() > 0; /* Don't force KPTI for CPUs that are not vulnerable */ if (is_midr_in_range_list(read_cpuid_id(), kpti_safe_list)) @@ -1003,7 +1003,12 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) static bool kpti_applied = false; int cpu = smp_processor_id(); - if (kpti_applied) + /* + * We don't need to rewrite the page-tables if either we've done + * it already or we have KASLR enabled and therefore have not + * created any global mappings at all. + */ + if (kpti_applied || kaslr_offset() > 0) return; remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index c7213674cb24..15d79a8e5e5e 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -475,6 +475,7 @@ ENDPROC(__primary_switched) ENTRY(kimage_vaddr) .quad _text - TEXT_OFFSET +EXPORT_SYMBOL(kimage_vaddr) /* * If we're fortunate enough to boot at EL2, ensure that the world is -- cgit v1.2.3 From 2f97967503df8e45bc256a348b6f050abd2a38ed Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 15 Jan 2019 18:49:17 +0000 Subject: arm64: kpti: Update arm64_kernel_use_ng_mappings() when forced on Since commit b89d82ef01b3 ("arm64: kpti: Avoid rewriting early page tables when KASLR is enabled"), a kernel built with CONFIG_RANDOMIZE_BASE can decide early whether to use non-global mappings by checking the kaslr_offset(). A kernel built without CONFIG_RANDOMIZE_BASE, instead checks the cpufeature static-key. This leaves a gap where CONFIG_RANDOMIZE_BASE was enabled, no kaslr seed was provided, but kpti was forced on using the cmdline option. When the decision is made late, kpti_install_ng_mappings() will re-write the page tables, but arm64_kernel_use_ng_mappings()'s value does not change as it only tests the cpufeature static-key if CONFIG_RANDOMIZE_BASE is disabled. This function influences PROT_DEFAULT via PTE_MAYBE_NG, and causes pgattr_change_is_safe() to catch nG->G transitions when the unchanged PROT_DEFAULT is used as part of PAGE_KERNEL_RO: [ 1.942255] alternatives: patching kernel code [ 1.998288] ------------[ cut here ]------------ [ 2.000693] kernel BUG at arch/arm64/mm/mmu.c:165! [ 2.019215] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 2.020257] Modules linked in: [ 2.020807] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.0.0-rc2 #51 [ 2.021917] Hardware name: linux,dummy-virt (DT) [ 2.022790] pstate: 40000005 (nZcv daif -PAN -UAO) [ 2.023742] pc : __create_pgd_mapping+0x508/0x6d0 [ 2.024671] lr : __create_pgd_mapping+0x500/0x6d0 [ 2.058059] Process swapper/0 (pid: 1, stack limit = 0x(____ptrval____)) [ 2.059369] Call trace: [ 2.059845] __create_pgd_mapping+0x508/0x6d0 [ 2.060684] update_mapping_prot+0x48/0xd0 [ 2.061477] mark_linear_text_alias_ro+0xdc/0xe4 [ 2.070502] smp_cpus_done+0x90/0x98 [ 2.071216] smp_init+0x100/0x114 [ 2.071878] kernel_init_freeable+0xd4/0x220 [ 2.072750] kernel_init+0x10/0x100 [ 2.073455] ret_from_fork+0x10/0x18 [ 2.075414] ---[ end trace 3572f3a7782292de ]--- [ 2.076389] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b If arm64_kernel_unmapped_at_el0() is true, arm64_kernel_use_ng_mappings() should also be true. Signed-off-by: James Morse CC: Ard Biesheuvel CC: John Garry CC: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index ac352accb3d9..3e8063f4f9d3 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -60,8 +60,11 @@ static inline bool arm64_kernel_use_ng_mappings(void) * later determine that kpti is required, then * kpti_install_ng_mappings() will make them non-global. */ + if (arm64_kernel_unmapped_at_el0()) + return true; + if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) - return arm64_kernel_unmapped_at_el0(); + return false; /* * KASLR is enabled so we're going to be enabling kpti on non-broken -- cgit v1.2.3 From 7fa1e2e6afa7f4c9f46528e61de6a15d9e8dffd9 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 11 Jan 2019 14:47:40 +0100 Subject: kasan, arm64: remove redundant ARCH_SLAB_MINALIGN define Defining ARCH_SLAB_MINALIGN in arch/arm64/include/asm/cache.h when KASAN is off is not needed, as it is defined in defined in include/linux/slab.h as ifndef. Signed-off-by: Andrey Konovalov Signed-off-by: Will Deacon --- arch/arm64/include/asm/cache.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index eb43e09c1980..926434f413fa 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -60,8 +60,6 @@ #ifdef CONFIG_KASAN_SW_TAGS #define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT) -#else -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) #endif #ifndef __ASSEMBLY__ -- cgit v1.2.3 From 60d8cd572f655aac6107a2330dced004ad1fe3d7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Jan 2019 19:01:48 +0100 Subject: arm64/xen: fix xen-swiotlb cache flushing Xen-swiotlb hooks into the arm/arm64 arch code through a copy of the DMA DMA mapping operations stored in the struct device arch data. Switching arm64 to use the direct calls for the merged DMA direct / swiotlb code broke this scheme. Replace the indirect calls with direct-calls in xen-swiotlb as well to fix this problem. Fixes: 356da6d0cde3 ("dma-mapping: bypass indirect calls for dma-direct") Reported-by: Julien Grall Signed-off-by: Christoph Hellwig Reviewed-by: Stefano Stabellini --- arch/arm/include/asm/xen/page-coherent.h | 94 +++++++++++++++++++++++++++++ arch/arm64/include/asm/device.h | 3 - arch/arm64/include/asm/xen/page-coherent.h | 76 +++++++++++++++++++++++ arch/arm64/mm/dma-mapping.c | 4 +- drivers/xen/swiotlb-xen.c | 4 +- include/xen/arm/page-coherent.h | 97 +----------------------------- 6 files changed, 176 insertions(+), 102 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h index b3ef061d8b74..2c403e7c782d 100644 --- a/arch/arm/include/asm/xen/page-coherent.h +++ b/arch/arm/include/asm/xen/page-coherent.h @@ -1 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ARM_XEN_PAGE_COHERENT_H +#define _ASM_ARM_XEN_PAGE_COHERENT_H + +#include +#include #include + +static inline const struct dma_map_ops *xen_get_dma_ops(struct device *dev) +{ + if (dev && dev->archdata.dev_dma_ops) + return dev->archdata.dev_dma_ops; + return get_arch_dma_ops(NULL); +} + +static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) +{ + return xen_get_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs); +} + +static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) +{ + xen_get_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs); +} + +static inline void xen_dma_map_page(struct device *hwdev, struct page *page, + dma_addr_t dev_addr, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + unsigned long page_pfn = page_to_xen_pfn(page); + unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); + unsigned long compound_pages = + (1<map_page(hwdev, page, offset, size, dir, attrs); + else + __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs); +} + +static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + unsigned long pfn = PFN_DOWN(handle); + /* + * Dom0 is mapped 1:1, while the Linux page can be spanned accross + * multiple Xen page, it's not possible to have a mix of local and + * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a + * foreign mfn will always return false. If the page is local we can + * safely call the native dma_ops function, otherwise we call the xen + * specific function. + */ + if (pfn_valid(pfn)) { + if (xen_get_dma_ops(hwdev)->unmap_page) + xen_get_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs); + } else + __xen_dma_unmap_page(hwdev, handle, size, dir, attrs); +} + +static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned long pfn = PFN_DOWN(handle); + if (pfn_valid(pfn)) { + if (xen_get_dma_ops(hwdev)->sync_single_for_cpu) + xen_get_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir); + } else + __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir); +} + +static inline void xen_dma_sync_single_for_device(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned long pfn = PFN_DOWN(handle); + if (pfn_valid(pfn)) { + if (xen_get_dma_ops(hwdev)->sync_single_for_device) + xen_get_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir); + } else + __xen_dma_sync_single_for_device(hwdev, handle, size, dir); +} + +#endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */ diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h index 3dd3d664c5c5..4658c937e173 100644 --- a/arch/arm64/include/asm/device.h +++ b/arch/arm64/include/asm/device.h @@ -20,9 +20,6 @@ struct dev_archdata { #ifdef CONFIG_IOMMU_API void *iommu; /* private IOMMU data */ #endif -#ifdef CONFIG_XEN - const struct dma_map_ops *dev_dma_ops; -#endif }; struct pdev_archdata { diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h index b3ef061d8b74..d88e56b90b93 100644 --- a/arch/arm64/include/asm/xen/page-coherent.h +++ b/arch/arm64/include/asm/xen/page-coherent.h @@ -1 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ARM64_XEN_PAGE_COHERENT_H +#define _ASM_ARM64_XEN_PAGE_COHERENT_H + +#include +#include #include + +static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) +{ + return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs); +} + +static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) +{ + dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs); +} + +static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned long pfn = PFN_DOWN(handle); + + if (pfn_valid(pfn)) + dma_direct_sync_single_for_cpu(hwdev, handle, size, dir); + else + __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir); +} + +static inline void xen_dma_sync_single_for_device(struct device *hwdev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned long pfn = PFN_DOWN(handle); + if (pfn_valid(pfn)) + dma_direct_sync_single_for_device(hwdev, handle, size, dir); + else + __xen_dma_sync_single_for_device(hwdev, handle, size, dir); +} + +static inline void xen_dma_map_page(struct device *hwdev, struct page *page, + dma_addr_t dev_addr, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + unsigned long page_pfn = page_to_xen_pfn(page); + unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); + unsigned long compound_pages = + (1<archdata.dev_dma_ops = dev->dma_ops; + if (xen_initial_domain()) dev->dma_ops = xen_dma_ops; - } #endif } diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 989cf872b98c..bb7888429be6 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -645,7 +645,7 @@ xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { -#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) +#ifdef CONFIG_ARM if (xen_get_dma_ops(dev)->mmap) return xen_get_dma_ops(dev)->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); @@ -662,7 +662,7 @@ xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t handle, size_t size, unsigned long attrs) { -#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) +#ifdef CONFIG_ARM if (xen_get_dma_ops(dev)->get_sgtable) { #if 0 /* diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h index 59a260712a56..2ca9164a79bf 100644 --- a/include/xen/arm/page-coherent.h +++ b/include/xen/arm/page-coherent.h @@ -1,17 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_ARM_XEN_PAGE_COHERENT_H -#define _ASM_ARM_XEN_PAGE_COHERENT_H - -#include -#include -#include - -static inline const struct dma_map_ops *xen_get_dma_ops(struct device *dev) -{ - if (dev && dev->archdata.dev_dma_ops) - return dev->archdata.dev_dma_ops; - return get_arch_dma_ops(NULL); -} +#ifndef _XEN_ARM_PAGE_COHERENT_H +#define _XEN_ARM_PAGE_COHERENT_H void __xen_dma_map_page(struct device *hwdev, struct page *page, dma_addr_t dev_addr, unsigned long offset, size_t size, @@ -21,87 +10,7 @@ void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, unsigned long attrs); void __xen_dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir); - void __xen_dma_sync_single_for_device(struct device *hwdev, dma_addr_t handle, size_t size, enum dma_data_direction dir); -static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) -{ - return xen_get_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs); -} - -static inline void xen_free_coherent_pages(struct device *hwdev, size_t size, - void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) -{ - xen_get_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs); -} - -static inline void xen_dma_map_page(struct device *hwdev, struct page *page, - dma_addr_t dev_addr, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - unsigned long page_pfn = page_to_xen_pfn(page); - unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); - unsigned long compound_pages = - (1<map_page(hwdev, page, offset, size, dir, attrs); - else - __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs); -} - -static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - unsigned long pfn = PFN_DOWN(handle); - /* - * Dom0 is mapped 1:1, while the Linux page can be spanned accross - * multiple Xen page, it's not possible to have a mix of local and - * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a - * foreign mfn will always return false. If the page is local we can - * safely call the native dma_ops function, otherwise we call the xen - * specific function. - */ - if (pfn_valid(pfn)) { - if (xen_get_dma_ops(hwdev)->unmap_page) - xen_get_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs); - } else - __xen_dma_unmap_page(hwdev, handle, size, dir, attrs); -} - -static inline void xen_dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(handle); - if (pfn_valid(pfn)) { - if (xen_get_dma_ops(hwdev)->sync_single_for_cpu) - xen_get_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir); - } else - __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir); -} - -static inline void xen_dma_sync_single_for_device(struct device *hwdev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(handle); - if (pfn_valid(pfn)) { - if (xen_get_dma_ops(hwdev)->sync_single_for_device) - xen_get_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir); - } else - __xen_dma_sync_single_for_device(hwdev, handle, size, dir); -} - -#endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */ +#endif /* _XEN_ARM_PAGE_COHERENT_H */ -- cgit v1.2.3 From 358b28f09f0ab074d781df72b8a671edb1547789 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 20 Dec 2018 11:36:07 +0000 Subject: arm/arm64: KVM: Allow a VCPU to fully reset itself The current kvm_psci_vcpu_on implementation will directly try to manipulate the state of the VCPU to reset it. However, since this is not done on the thread that runs the VCPU, we can end up in a strangely corrupted state when the source and target VCPUs are running at the same time. Fix this by factoring out all reset logic from the PSCI implementation and forwarding the required information along with a request to the target VCPU. Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 10 ++++++++++ arch/arm/kvm/reset.c | 24 ++++++++++++++++++++++++ arch/arm64/include/asm/kvm_host.h | 11 +++++++++++ arch/arm64/kvm/reset.c | 24 ++++++++++++++++++++++++ virt/kvm/arm/arm.c | 10 ++++++++++ virt/kvm/arm/psci.c | 36 ++++++++++++++++-------------------- 6 files changed, 95 insertions(+), 20 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index ca56537b61bc..50e89869178a 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -48,6 +48,7 @@ #define KVM_REQ_SLEEP \ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) +#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); @@ -147,6 +148,13 @@ struct kvm_cpu_context { typedef struct kvm_cpu_context kvm_cpu_context_t; +struct vcpu_reset_state { + unsigned long pc; + unsigned long r0; + bool be; + bool reset; +}; + struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; @@ -186,6 +194,8 @@ struct kvm_vcpu_arch { /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; + struct vcpu_reset_state reset_state; + /* Detect first run of a vcpu */ bool has_run_once; }; diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index 5ed0c3ee33d6..e53327912adc 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -69,6 +70,29 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset CP15 registers */ kvm_reset_coprocs(vcpu); + /* + * Additional reset state handling that PSCI may have imposed on us. + * Must be done after all the sys_reg reset. + */ + if (READ_ONCE(vcpu->arch.reset_state.reset)) { + unsigned long target_pc = vcpu->arch.reset_state.pc; + + /* Gracefully handle Thumb2 entry point */ + if (target_pc & 1) { + target_pc &= ~1UL; + vcpu_set_thumb(vcpu); + } + + /* Propagate caller endianness */ + if (vcpu->arch.reset_state.be) + kvm_vcpu_set_be(vcpu); + + *vcpu_pc(vcpu) = target_pc; + vcpu_set_reg(vcpu, 0, vcpu->arch.reset_state.r0); + + vcpu->arch.reset_state.reset = false; + } + /* Reset arch_timer context */ return kvm_timer_vcpu_reset(vcpu); } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7732d0ba4e60..da3fc7324d68 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -48,6 +48,7 @@ #define KVM_REQ_SLEEP \ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) +#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); @@ -208,6 +209,13 @@ struct kvm_cpu_context { typedef struct kvm_cpu_context kvm_cpu_context_t; +struct vcpu_reset_state { + unsigned long pc; + unsigned long r0; + bool be; + bool reset; +}; + struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; @@ -297,6 +305,9 @@ struct kvm_vcpu_arch { /* Virtual SError ESR to restore when HCR_EL2.VSE is set */ u64 vsesr_el2; + /* Additional reset state */ + struct vcpu_reset_state reset_state; + /* True when deferrable sysregs are loaded on the physical CPU, * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ bool sysregs_loaded_on_cpu; diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f21a2a575939..f16a5f8ff2b4 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -32,6 +32,7 @@ #include #include #include +#include #include /* Maximum phys_shift supported for any VM on this host */ @@ -146,6 +147,29 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset system registers */ kvm_reset_sys_regs(vcpu); + /* + * Additional reset state handling that PSCI may have imposed on us. + * Must be done after all the sys_reg reset. + */ + if (vcpu->arch.reset_state.reset) { + unsigned long target_pc = vcpu->arch.reset_state.pc; + + /* Gracefully handle Thumb2 entry point */ + if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) { + target_pc &= ~1UL; + vcpu_set_thumb(vcpu); + } + + /* Propagate caller endianness */ + if (vcpu->arch.reset_state.be) + kvm_vcpu_set_be(vcpu); + + *vcpu_pc(vcpu) = target_pc; + vcpu_set_reg(vcpu, 0, vcpu->arch.reset_state.r0); + + vcpu->arch.reset_state.reset = false; + } + /* Reset PMU */ kvm_pmu_vcpu_reset(vcpu); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 9e350fd34504..9c486fad3f9f 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -626,6 +626,13 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu) /* Awaken to handle a signal, request we sleep again later. */ kvm_make_request(KVM_REQ_SLEEP, vcpu); } + + /* + * Make sure we will observe a potential reset request if we've + * observed a change to the power state. Pairs with the smp_wmb() in + * kvm_psci_vcpu_on(). + */ + smp_rmb(); } static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) @@ -639,6 +646,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) vcpu_req_sleep(vcpu); + if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) + kvm_reset_vcpu(vcpu); + /* * Clear IRQ_PENDING requests that were made to guarantee * that a VCPU sees new virtual interrupts. diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 9b73d3ad918a..34d08ee63747 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -104,12 +104,10 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) { + struct vcpu_reset_state *reset_state; struct kvm *kvm = source_vcpu->kvm; struct kvm_vcpu *vcpu = NULL; - struct swait_queue_head *wq; unsigned long cpu_id; - unsigned long context_id; - phys_addr_t target_pc; cpu_id = smccc_get_arg1(source_vcpu) & MPIDR_HWID_BITMASK; if (vcpu_mode_is_32bit(source_vcpu)) @@ -130,32 +128,30 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) return PSCI_RET_INVALID_PARAMS; } - target_pc = smccc_get_arg2(source_vcpu); - context_id = smccc_get_arg3(source_vcpu); + reset_state = &vcpu->arch.reset_state; - kvm_reset_vcpu(vcpu); - - /* Gracefully handle Thumb2 entry point */ - if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) { - target_pc &= ~((phys_addr_t) 1); - vcpu_set_thumb(vcpu); - } + reset_state->pc = smccc_get_arg2(source_vcpu); /* Propagate caller endianness */ - if (kvm_vcpu_is_be(source_vcpu)) - kvm_vcpu_set_be(vcpu); + reset_state->be = kvm_vcpu_is_be(source_vcpu); - *vcpu_pc(vcpu) = target_pc; /* * NOTE: We always update r0 (or x0) because for PSCI v0.1 * the general puspose registers are undefined upon CPU_ON. */ - smccc_set_retval(vcpu, context_id, 0, 0, 0); - vcpu->arch.power_off = false; - smp_mb(); /* Make sure the above is visible */ + reset_state->r0 = smccc_get_arg3(source_vcpu); + + WRITE_ONCE(reset_state->reset, true); + kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); - wq = kvm_arch_vcpu_wq(vcpu); - swake_up_one(wq); + /* + * Make sure the reset request is observed if the change to + * power_state is observed. + */ + smp_wmb(); + + vcpu->arch.power_off = false; + kvm_vcpu_wake_up(vcpu); return PSCI_RET_SUCCESS; } -- cgit v1.2.3 From 8a5b403d71affa098009cc3dff1b2c45113021ad Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 15 Feb 2019 13:33:32 +0100 Subject: arm64, mm, efi: Account for GICv3 LPI tables in static memblock reserve table In the irqchip and EFI code, we have what basically amounts to a quirk to work around a peculiarity in the GICv3 architecture, which permits the system memory address of LPI tables to be programmable only once after a CPU reset. This means kexec kernels must use the same memory as the first kernel, and thus ensure that this memory has not been given out for other purposes by the time the ITS init code runs, which is not very early for secondary CPUs. On systems with many CPUs, these reservations could overflow the memblock reservation table, and this was addressed in commit: eff896288872 ("efi/arm: Defer persistent reservations until after paging_init()") However, this turns out to have made things worse, since the allocation of page tables and heap space for the resized memblock reservation table itself may overwrite the regions we are attempting to reserve, which may cause all kinds of corruption, also considering that the ITS will still be poking bits into that memory in response to incoming MSIs. So instead, let's grow the static memblock reservation table on such systems so it can accommodate these reservations at an earlier time. This will permit us to revert the above commit in a subsequent patch. [ mingo: Minor cleanups. ] Signed-off-by: Ard Biesheuvel Acked-by: Mike Rapoport Acked-by: Will Deacon Acked-by: Marc Zyngier Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20190215123333.21209-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/arm64/include/asm/memory.h | 11 +++++++++++ include/linux/memblock.h | 3 --- mm/memblock.c | 11 +++++++++-- 3 files changed, 20 insertions(+), 5 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index e1ec947e7c0c..0c656850eeea 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -332,6 +332,17 @@ static inline void *phys_to_virt(phys_addr_t x) #define virt_addr_valid(kaddr) \ (_virt_addr_is_linear(kaddr) && _virt_addr_valid(kaddr)) +/* + * Given that the GIC architecture permits ITS implementations that can only be + * configured with a LPI table address once, GICv3 systems with many CPUs may + * end up reserving a lot of different regions after a kexec for their LPI + * tables (one per CPU), as we are forced to reuse the same memory after kexec + * (and thus reserve it persistently with EFI beforehand) + */ +#if defined(CONFIG_EFI) && defined(CONFIG_ARM_GIC_V3_ITS) +# define INIT_MEMBLOCK_RESERVED_REGIONS (INIT_MEMBLOCK_REGIONS + NR_CPUS + 1) +#endif + #include #endif diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 64c41cf45590..859b55b66db2 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -29,9 +29,6 @@ extern unsigned long max_pfn; */ extern unsigned long long max_possible_pfn; -#define INIT_MEMBLOCK_REGIONS 128 -#define INIT_PHYSMEM_REGIONS 4 - /** * enum memblock_flags - definition of memory region attributes * @MEMBLOCK_NONE: no special request diff --git a/mm/memblock.c b/mm/memblock.c index 022d4cbb3618..ea31045ba704 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -26,6 +26,13 @@ #include "internal.h" +#define INIT_MEMBLOCK_REGIONS 128 +#define INIT_PHYSMEM_REGIONS 4 + +#ifndef INIT_MEMBLOCK_RESERVED_REGIONS +# define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS +#endif + /** * DOC: memblock overview * @@ -92,7 +99,7 @@ unsigned long max_pfn; unsigned long long max_possible_pfn; static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; -static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; +static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock; #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock; #endif @@ -105,7 +112,7 @@ struct memblock memblock __initdata_memblock = { .reserved.regions = memblock_reserved_init_regions, .reserved.cnt = 1, /* empty dummy entry */ - .reserved.max = INIT_MEMBLOCK_REGIONS, + .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS, .reserved.name = "reserved", #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP -- cgit v1.2.3