From 74c11e300c103af47db5b658fdcf28002421e250 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Fri, 1 Jun 2018 14:34:33 +0300 Subject: ARC: Explicitly add -mmedium-calls to CFLAGS GCC built for arc*-*-linux has "-mmedium-calls" implicitly enabled by default thus we don't see any problems during Linux kernel compilation. ----------------------------->8------------------------ arc-linux-gcc -mcpu=arc700 -Q --help=target | grep calls -mlong-calls [disabled] -mmedium-calls [enabled] ----------------------------->8------------------------ But if we try to use so-called Elf32 toolchain with GCC configured for arc*-*-elf* then we'd see the following failure: ----------------------------->8------------------------ init/do_mounts.o: In function 'init_rootfs': do_mounts.c:(.init.text+0x108): relocation truncated to fit: R_ARC_S21W_PCREL against symbol 'unregister_filesystem' defined in .text section in fs/filesystems.o arc-elf32-ld: final link failed: Symbol needs debug section which does not exist make: *** [vmlinux] Error 1 ----------------------------->8------------------------ That happens because neither "-mmedium-calls" nor "-mlong-calls" are enabled in Elf32 GCC: ----------------------------->8------------------------ arc-elf32-gcc -mcpu=arc700 -Q --help=target | grep calls -mlong-calls [disabled] -mmedium-calls [disabled] ----------------------------->8------------------------ Now to make it possible to use Elf32 toolchain for building Linux kernel we're explicitly add "-mmedium-calls" to CFLAGS. And since we add "-mmedium-calls" to the global CFLAGS there's no point in having per-file copies thus removing them. Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arc/Makefile b/arch/arc/Makefile index d37f49d6a27f..6c1b20dd76ad 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -16,7 +16,7 @@ endif KBUILD_DEFCONFIG := nsim_700_defconfig -cflags-y += -fno-common -pipe -fno-builtin -D__linux__ +cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 cflags-$(CONFIG_ISA_ARCV2) += -mcpu=archs @@ -140,16 +140,3 @@ dtbs: scripts archclean: $(Q)$(MAKE) $(clean)=$(boot) - -# Hacks to enable final link due to absence of link-time branch relexation -# and gcc choosing optimal(shorter) branches at -O3 -# -# vineetg Feb 2010: -mlong-calls switched off for overall kernel build -# However lib/decompress_inflate.o (.init.text) calls -# zlib_inflate_workspacesize (.text) causing relocation errors. -# Thus forcing all exten calls in this file to be long calls -export CFLAGS_decompress_inflate.o = -mmedium-calls -export CFLAGS_initramfs.o = -mmedium-calls -ifdef CONFIG_SMP -export CFLAGS_core.o = -mmedium-calls -endif -- cgit v1.2.3 From 97d1e3dc8d98f97b65350f8ab221e6d061705abc Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Mon, 28 May 2018 18:29:02 +0200 Subject: nds32: define __NDS32_E[BL]__ for sparse nds32 depends on the macros '__NDS32_E[BL]__' to correctly select or define endian-specific macros, structures or pieces of code. These macros are predefined by the compiler but sparse knows nothing about them and thus may pre-process files differently from what GCC would. Fix this by adding '-D__NDS32_E[BL]__' to CHECKFLAGS. Signed-off-by: Luc Van Oostenryck Acked-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/Makefile | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile index 513bb2e9baf9..031c676821ff 100644 --- a/arch/nds32/Makefile +++ b/arch/nds32/Makefile @@ -34,10 +34,12 @@ ifdef CONFIG_CPU_LITTLE_ENDIAN KBUILD_CFLAGS += $(call cc-option, -EL) KBUILD_AFLAGS += $(call cc-option, -EL) LDFLAGS += $(call cc-option, -EL) +CHECKFLAGS += -D__NDS32_EL__ else KBUILD_CFLAGS += $(call cc-option, -EB) KBUILD_AFLAGS += $(call cc-option, -EB) LDFLAGS += $(call cc-option, -EB) +CHECKFLAGS += -D__NDS32_EB__ endif boot := arch/nds32/boot -- cgit v1.2.3 From a78945c357f58665d6a5da8a69e085898e831c70 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 14 Jun 2018 15:28:52 +0100 Subject: nds32: Fix build error caused by configuration flag rename Fix build error on nds32 due to the merge of commit e3d5980568f ("lib: Rename compiler intrinsic selects to GENERIC_LIB_*") during the 4.18 merge window which renames Kconfig symbols. This had raced with commit aeaa7af744fa ("nds32: lib: To use generic lib instead of libgcc to prevent the symbol undefined issue.") merged late in the 4.17 cycle, which added selects to nds32 using the original Kconfig symbol names. When they came together in merge commit 763f96944c95 ("Merge tag 'mips_4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux") this resulted in the following build errors: nds32le-linux-ld: kernel/time/timekeeping.o: in function `timekeeping_init': timekeeping.c:(.init.text+0x140): undefined reference to `__ashldi3' nds32le-linux-ld: timekeeping.c:(.init.text+0x144): undefined reference to `__ashldi3' nds32le-linux-ld: timekeeping.c:(.init.text+0x17e): undefined reference to `__lshrdi3' nds32le-linux-ld: timekeeping.c:(.init.text+0x182): undefined reference to `__lshrdi3' nds32le-linux-ld: drivers/clocksource/mmio.o: in function `clocksource_mmio_init': mmio.c:(.init.text+0x54): undefined reference to `__lshrdi3' nds32le-linux-ld: mmio.c:(.init.text+0x58): undefined reference to `__lshrdi3' Rename all 6 selects in nds32 and adjust the ordering accordingly to be alphabetical. Fixes: 763f96944c95 ("Merge tag 'mips_4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux") Signed-off-by: Guenter Roeck [jhogan@kernel.org: Rename all 6 symbols, sort, update commit message] Signed-off-by: James Hogan Cc: Greentime Hu Cc: Vincent Chen Cc: Matt Redfearn Cc: Palmer Dabbelt Acked-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/Kconfig | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 6aed974276d8..34f7222c5efe 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -12,17 +12,17 @@ config NDS32 select CLONE_BACKWARDS select COMMON_CLK select DMA_NONCOHERENT_OPS - select GENERIC_ASHLDI3 - select GENERIC_ASHRDI3 - select GENERIC_LSHRDI3 - select GENERIC_CMPDI2 - select GENERIC_MULDI3 - select GENERIC_UCMPDI2 select GENERIC_ATOMIC64 select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS select GENERIC_IRQ_CHIP select GENERIC_IRQ_SHOW + select GENERIC_LIB_ASHLDI3 + select GENERIC_LIB_ASHRDI3 + select GENERIC_LIB_CMPDI2 + select GENERIC_LIB_LSHRDI3 + select GENERIC_LIB_MULDI3 + select GENERIC_LIB_UCMPDI2 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL -- cgit v1.2.3 From 2f24ef7413a4d91657ef04e77c27ce0b313e6c95 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 29 Nov 2017 11:21:45 +0300 Subject: ARC: Enable machine_desc->init_per_cpu for !CONFIG_SMP machine_desc->init_per_cpu() hook is supposed to be per cpu initialization and would seem to apply equally to UP and/or SMP. Infact the comment in header file seems to suggest it works for UP too, which was not the case and this patch. This enables !CONFIG_SMP build for platforms such as hsdk. Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta [vgupta: trimmeed changelog] --- arch/arc/include/asm/mach_desc.h | 2 -- arch/arc/kernel/irq.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h index c28e6c347b49..871f3cb16af9 100644 --- a/arch/arc/include/asm/mach_desc.h +++ b/arch/arc/include/asm/mach_desc.h @@ -34,9 +34,7 @@ struct machine_desc { const char *name; const char **dt_compat; void (*init_early)(void); -#ifdef CONFIG_SMP void (*init_per_cpu)(unsigned int); -#endif void (*init_machine)(void); void (*init_late)(void); diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index 538b36afe89e..62b185057c04 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -31,10 +31,10 @@ void __init init_IRQ(void) /* a SMP H/w block could do IPI IRQ request here */ if (plat_smp_ops.init_per_cpu) plat_smp_ops.init_per_cpu(smp_processor_id()); +#endif if (machine_desc->init_per_cpu) machine_desc->init_per_cpu(smp_processor_id()); -#endif } /* -- cgit v1.2.3 From 76053854f7d10ca4fa492495f65798e3d48a64c4 Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Thu, 21 Jun 2018 15:06:14 +0100 Subject: ARC: [plat-hsdk] Add PCIe support Add PCI support to the ARC HSDK platform allowing to use the generic PCI setup functions. Signed-off-by: Gustavo Pimentel Acked-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/plat-hsdk/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig index 19ab3cf98f0f..556bc5ef1257 100644 --- a/arch/arc/plat-hsdk/Kconfig +++ b/arch/arc/plat-hsdk/Kconfig @@ -9,3 +9,4 @@ menuconfig ARC_SOC_HSDK bool "ARC HS Development Kit SOC" select CLK_HSDK select RESET_HSDK + select MIGHT_HAVE_PCI -- cgit v1.2.3 From 49a6ec5b807ea4ad7ebe1f58080ebb8497cb2d2c Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sun, 17 Jun 2018 13:53:09 +0200 Subject: ARM: dts: am437x: make edt-ft5x06 a wakeup source The touchscreen driver no longer configures the device as wakeup source by default. A "wakeup-source" property is needed. Signed-off-by: Daniel Mack Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am437x-sk-evm.dts | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts index 440351ad0b80..d4be3fd0b6f4 100644 --- a/arch/arm/boot/dts/am437x-sk-evm.dts +++ b/arch/arm/boot/dts/am437x-sk-evm.dts @@ -610,6 +610,8 @@ touchscreen-size-x = <480>; touchscreen-size-y = <272>; + + wakeup-source; }; tlv320aic3106: tlv320aic3106@1b { -- cgit v1.2.3 From 891f6a726cacbb87e5b06076693ffab53bd378d7 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 21 Jun 2018 14:49:38 +0200 Subject: s390: Correct register corruption in critical section cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the critical section cleanup we must not mess with r1. For march=z9 or older, larl + ex (instead of exrl) are used with r1 as a temporary register. This can clobber r1 in several interrupt handlers. Fix this by using r11 as a temp register. r11 is being saved by all callers of cleanup_critical. Fixes: 6dd85fbb87 ("s390: move expoline assembler macros to a header") Cc: stable@vger.kernel.org #v4.16 Reported-by: Oliver Kurz Reported-by: Petr Tesařík Signed-off-by: Christian Borntraeger Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index f03402efab4b..3891805bfcdd 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1265,7 +1265,7 @@ cleanup_critical: jl 0f clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end jl .Lcleanup_load_fpu_regs -0: BR_EX %r14 +0: BR_EX %r14,%r11 .align 8 .Lcleanup_table: @@ -1301,7 +1301,7 @@ cleanup_critical: ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit - BR_EX %r14 + BR_EX %r14,%r11 #endif .Lcleanup_system_call: -- cgit v1.2.3 From 560b423dd9af4272a1f3685c2d6b073fdc4af7c7 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Mon, 18 Jun 2018 06:52:58 +0900 Subject: openrisc: Call destructor during __pte_free_tlb This fixes an issue uncovered when a recent change to add the "page table" flag was merged. During bootup we see many errors like the following: BUG: Bad page state in process mkdir pfn:00bae page:c1ff15c0 count:0 mapcount:-1024 mapping:00000000 index:0x0 flags: 0x0() raw: 00000000 00000000 00000000 fffffbff 00000000 00000100 00000200 00000000 page dumped because: nonzero mapcount Modules linked in: CPU: 0 PID: 46 Comm: mkdir Tainted: G B 4.17.0-simple-smp-07461-g1d40a5ea01d5-dirty #993 Call trace: [<(ptrval)>] show_stack+0x44/0x54 [<(ptrval)>] dump_stack+0xb0/0xe8 [<(ptrval)>] bad_page+0x138/0x174 [<(ptrval)>] ? cpumask_next+0x24/0x34 [<(ptrval)>] free_pages_check_bad+0x6c/0xd0 [<(ptrval)>] free_pcppages_bulk+0x174/0x42c [<(ptrval)>] free_unref_page_commit.isra.17+0xb8/0xc8 [<(ptrval)>] free_unref_page_list+0x10c/0x190 [<(ptrval)>] ? set_reset_devices+0x0/0x2c [<(ptrval)>] release_pages+0x3a0/0x414 [<(ptrval)>] tlb_flush_mmu_free+0x5c/0x90 [<(ptrval)>] tlb_flush_mmu+0x90/0xa4 [<(ptrval)>] arch_tlb_finish_mmu+0x50/0x94 [<(ptrval)>] tlb_finish_mmu+0x30/0x64 [<(ptrval)>] exit_mmap+0x110/0x1e0 [<(ptrval)>] mmput+0x50/0xf0 [<(ptrval)>] do_exit+0x274/0xa94 [<(ptrval)>] do_group_exit+0x50/0x110 [<(ptrval)>] __wake_up_parent+0x0/0x38 [<(ptrval)>] _syscall_return+0x0/0x4 During the __pte_free_tlb path openrisc fails to call the page destructor which would clear the new bits that were introduced. To fix this we are calling the destructor. It seem openrisc was the only architecture missing this, all other architectures either call the destructor like we are doing here or use pte_free. Note: failing to call the destructor was also messing up the zone stats (and will be cause other problems if you were using SPLIT_PTE_PTLOCKS, which we are not yet). Fixes: 1d40a5ea01d53 ("mm: mark pages in use for page tables") Acked-by: Matthew Wilcox Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/pgalloc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h index 3e1a46615120..8999b9226512 100644 --- a/arch/openrisc/include/asm/pgalloc.h +++ b/arch/openrisc/include/asm/pgalloc.h @@ -98,8 +98,12 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte) __free_page(pte); } +#define __pte_free_tlb(tlb, pte, addr) \ +do { \ + pgtable_page_dtor(pte); \ + tlb_remove_page((tlb), (pte)); \ +} while (0) -#define __pte_free_tlb(tlb, pte, addr) tlb_remove_page((tlb), (pte)) #define pmd_pgtable(pmd) pmd_page(pmd) #define check_pgt_cache() do { } while (0) -- cgit v1.2.3 From 5a267832c2ec47b2dad0fdb291a96bb5b8869315 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 22 Jun 2018 10:55:45 -0700 Subject: MIPS: Call dump_stack() from show_regs() The generic nmi_cpu_backtrace() function calls show_regs() when a struct pt_regs is available, and dump_stack() otherwise. If we were to make use of the generic nmi_cpu_backtrace() with MIPS' current implementation of show_regs() this would mean that we see only register data with no accompanying stack information, in contrast with our current implementation which calls dump_stack() regardless of whether register state is available. In preparation for making use of the generic nmi_cpu_backtrace() to implement arch_trigger_cpumask_backtrace(), have our implementation of show_regs() call dump_stack() and drop the explicit dump_stack() call in arch_dump_stack() which is invoked by arch_trigger_cpumask_backtrace(). This will allow the output we produce to remain the same after a later patch switches to using nmi_cpu_backtrace(). It may mean that we produce extra stack output in other uses of show_regs(), but this: 1) Seems harmless. 2) Is good for consistency between arch_trigger_cpumask_backtrace() and other users of show_regs(). 3) Matches the behaviour of the ARM & PowerPC architectures. Marked for stable back to v4.9 as a prerequisite of the following patch "MIPS: Call dump_stack() from show_regs()". Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/19596/ Cc: James Hogan Cc: Ralf Baechle Cc: Huacai Chen Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.9+ --- arch/mips/kernel/process.c | 4 ++-- arch/mips/kernel/traps.c | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 8d85046adcc8..d4cfeb931382 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -663,8 +663,8 @@ static void arch_dump_stack(void *info) if (regs) show_regs(regs); - - dump_stack(); + else + dump_stack(); } void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index d67fa74622ee..8d505a21396e 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -351,6 +351,7 @@ static void __show_regs(const struct pt_regs *regs) void show_regs(struct pt_regs *regs) { __show_regs((struct pt_regs *)regs); + dump_stack(); } void show_registers(struct pt_regs *regs) -- cgit v1.2.3 From b63e132b6433a41cf311e8bc382d33fd2b73b505 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 22 Jun 2018 10:55:46 -0700 Subject: MIPS: Use async IPIs for arch_trigger_cpumask_backtrace() The current MIPS implementation of arch_trigger_cpumask_backtrace() is broken because it attempts to use synchronous IPIs despite the fact that it may be run with interrupts disabled. This means that when arch_trigger_cpumask_backtrace() is invoked, for example by the RCU CPU stall watchdog, we may: - Deadlock due to use of synchronous IPIs with interrupts disabled, causing the CPU that's attempting to generate the backtrace output to hang itself. - Not succeed in generating the desired output from remote CPUs. - Produce warnings about this from smp_call_function_many(), for example: [42760.526910] INFO: rcu_sched detected stalls on CPUs/tasks: [42760.535755] 0-...!: (1 GPs behind) idle=ade/140000000000000/0 softirq=526944/526945 fqs=0 [42760.547874] 1-...!: (0 ticks this GP) idle=e4a/140000000000000/0 softirq=547885/547885 fqs=0 [42760.559869] (detected by 2, t=2162 jiffies, g=266689, c=266688, q=33) [42760.568927] ------------[ cut here ]------------ [42760.576146] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:416 smp_call_function_many+0x88/0x20c [42760.587839] Modules linked in: [42760.593152] CPU: 2 PID: 1216 Comm: sh Not tainted 4.15.4-00373-gee058bb4d0c2 #2 [42760.603767] Stack : 8e09bd20 8e09bd20 8e09bd20 fffffff0 00000007 00000006 00000000 8e09bca8 [42760.616937] 95b2b379 95b2b379 807a0080 00000007 81944518 0000018a 00000032 00000000 [42760.630095] 00000000 00000030 80000000 00000000 806eca74 00000009 8017e2b8 000001a0 [42760.643169] 00000000 00000002 00000000 8e09baa4 00000008 808b8008 86d69080 8e09bca0 [42760.656282] 8e09ad50 805e20aa 00000000 00000000 00000000 8017e2b8 00000009 801070ca [42760.669424] ... [42760.673919] Call Trace: [42760.678672] [<27fde568>] show_stack+0x70/0xf0 [42760.685417] [<84751641>] dump_stack+0xaa/0xd0 [42760.692188] [<699d671c>] __warn+0x80/0x92 [42760.698549] [<68915d41>] warn_slowpath_null+0x28/0x36 [42760.705912] [] smp_call_function_many+0x88/0x20c [42760.713696] [<6bbdfc2a>] arch_trigger_cpumask_backtrace+0x30/0x4a [42760.722216] [] rcu_dump_cpu_stacks+0x6a/0x98 [42760.729580] [<796e7629>] rcu_check_callbacks+0x672/0x6ac [42760.737476] [<059b3b43>] update_process_times+0x18/0x34 [42760.744981] [<6eb94941>] tick_sched_handle.isra.5+0x26/0x38 [42760.752793] [<478d3d70>] tick_sched_timer+0x1c/0x50 [42760.759882] [] __hrtimer_run_queues+0xc6/0x226 [42760.767418] [] hrtimer_interrupt+0x88/0x19a [42760.775031] [<6765a19e>] gic_compare_interrupt+0x2e/0x3a [42760.782761] [<0558bf5f>] handle_percpu_devid_irq+0x78/0x168 [42760.790795] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.798117] [<1b6d462c>] gic_handle_local_int+0x38/0x86 [42760.805545] [] gic_irq_dispatch+0xa/0x14 [42760.812534] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.820086] [] do_IRQ+0x16/0x20 [42760.826274] [<9aef3ce6>] plat_irq_dispatch+0x62/0x94 [42760.833458] [<6a94b53c>] except_vec_vi_end+0x70/0x78 [42760.840655] [<22284043>] smp_call_function_many+0x1ba/0x20c [42760.848501] [<54022b58>] smp_call_function+0x1e/0x2c [42760.855693] [] flush_tlb_mm+0x2a/0x98 [42760.862730] [<0844cdd0>] tlb_flush_mmu+0x1c/0x44 [42760.869628] [] arch_tlb_finish_mmu+0x26/0x3e [42760.877021] [<1aeaaf74>] tlb_finish_mmu+0x18/0x66 [42760.883907] [] exit_mmap+0x76/0xea [42760.890428] [] mmput+0x80/0x11a [42760.896632] [] do_exit+0x1f4/0x80c [42760.903158] [] do_group_exit+0x20/0x7e [42760.909990] [<13fa8d54>] __wake_up_parent+0x0/0x1e [42760.917045] [<46cf89d0>] smp_call_function_many+0x1a2/0x20c [42760.924893] [<8c21a93b>] syscall_common+0x14/0x1c [42760.931765] ---[ end trace 02aa09da9dc52a60 ]--- [42760.938342] ------------[ cut here ]------------ [42760.945311] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:291 smp_call_function_single+0xee/0xf8 ... This patch switches MIPS' arch_trigger_cpumask_backtrace() to use async IPIs & smp_call_function_single_async() in order to resolve this problem. We ensure use of the pre-allocated call_single_data_t structures is serialized by maintaining a cpumask indicating that they're busy, and refusing to attempt to send an IPI when a CPU's bit is set in this mask. This should only happen if a CPU hasn't responded to a previous backtrace IPI - ie. if it's hung - and we print a warning to the console in this case. I've marked this for stable branches as far back as v4.9, to which it applies cleanly. Strictly speaking the faulty MIPS implementation can be traced further back to commit 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") in v3.19, but kernel versions v3.19 through v4.8 will require further work to backport due to the rework performed in commit 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods"). Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/19597/ Cc: James Hogan Cc: Ralf Baechle Cc: Huacai Chen Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.9+ Fixes: 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") Fixes: 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods") --- arch/mips/kernel/process.c | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index d4cfeb931382..9670e70139fd 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -655,28 +656,42 @@ unsigned long arch_align_stack(unsigned long sp) return sp & ALMASK; } -static void arch_dump_stack(void *info) -{ - struct pt_regs *regs; +static DEFINE_PER_CPU(call_single_data_t, backtrace_csd); +static struct cpumask backtrace_csd_busy; - regs = get_irq_regs(); - - if (regs) - show_regs(regs); - else - dump_stack(); +static void handle_backtrace(void *info) +{ + nmi_cpu_backtrace(get_irq_regs()); + cpumask_clear_cpu(smp_processor_id(), &backtrace_csd_busy); } -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +static void raise_backtrace(cpumask_t *mask) { - long this_cpu = get_cpu(); + call_single_data_t *csd; + int cpu; - if (cpumask_test_cpu(this_cpu, mask) && !exclude_self) - dump_stack(); + for_each_cpu(cpu, mask) { + /* + * If we previously sent an IPI to the target CPU & it hasn't + * cleared its bit in the busy cpumask then it didn't handle + * our previous IPI & it's not safe for us to reuse the + * call_single_data_t. + */ + if (cpumask_test_and_set_cpu(cpu, &backtrace_csd_busy)) { + pr_warn("Unable to send backtrace IPI to CPU%u - perhaps it hung?\n", + cpu); + continue; + } - smp_call_function_many(mask, arch_dump_stack, NULL, 1); + csd = &per_cpu(backtrace_csd, cpu); + csd->func = handle_backtrace; + smp_call_function_single_async(cpu, csd); + } +} - put_cpu(); +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +{ + nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace); } int mips_get_process_fp_mode(struct task_struct *task) -- cgit v1.2.3 From 18d405af30bf6506bf2fc49056de7691c949812e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 28 Jun 2018 23:34:57 +0200 Subject: bpf, arm32: fix to use bpf_jit_binary_lock_ro api Any eBPF JIT that where its underlying arch supports ARCH_HAS_SET_MEMORY would need to use bpf_jit_binary_{un,}lock_ro() pair instead of the set_memory_{ro,rw}() pair directly as otherwise changes to the former might break. arm32's eBPF conversion missed to change it, so fix this up here. Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- arch/arm/net/bpf_jit_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 6e8b71613039..f6a62ae44a65 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1844,7 +1844,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* there are 2 passes here */ bpf_jit_dump(prog->len, image_size, 2, ctx.target); - set_memory_ro((unsigned long)header, header->pages); + bpf_jit_binary_lock_ro(header); prog->bpf_func = (void *)ctx.target; prog->jited = 1; prog->jited_len = image_size; -- cgit v1.2.3 From f605ce5eb26ac934fb8106d75d46a2c875a2bf23 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 28 Jun 2018 23:34:58 +0200 Subject: bpf, s390: fix potential memleak when later bpf_jit_prog fails If we would ever fail in the bpf_jit_prog() pass that writes the actual insns to the image after we got header via bpf_jit_binary_alloc() then we also need to make sure to free it through bpf_jit_binary_free() again when bailing out. Given we had prior bpf_jit_prog() passes to initially probe for clobbered registers, program size and to fill in addrs arrray for jump targets, this is more of a theoretical one, but at least make sure this doesn't break with future changes. Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend") Signed-off-by: Daniel Borkmann Cc: Martin Schwidefsky Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- arch/s390/net/bpf_jit_comp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index d2db8acb1a55..5f0234ec8038 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1286,6 +1286,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) goto free_addrs; } if (bpf_jit_prog(&jit, fp)) { + bpf_jit_binary_free(header); fp = orig_fp; goto free_addrs; } -- cgit v1.2.3 From 06d793b114e9d922c03aa077ac6c5c51fdda2722 Mon Sep 17 00:00:00 2001 From: Nick Dyer Date: Thu, 21 Jun 2018 19:10:00 +0100 Subject: ARM: dts: imx51-zii-rdu1: fix touchscreen pinctrl The pinctrl settings were incorrect for the touchscreen interrupt line, causing an interrupt storm. This change has been tested with both the atmel_mxt_ts and RMI4 drivers on the RDU1 units. The value 0x4 comes from the value of register IOMUXC_SW_PAD_CTL_PAD_CSI1_D8 from the old vendor kernel. Signed-off-by: Nick Dyer Fixes: ceef0396f367 ("ARM: dts: imx: add ZII RDU1 board") Cc: # 4.15+ Reviewed-by: Fabio Estevam Tested-by: Chris Healy Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx51-zii-rdu1.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx51-zii-rdu1.dts b/arch/arm/boot/dts/imx51-zii-rdu1.dts index df9eca94d812..8a878687197b 100644 --- a/arch/arm/boot/dts/imx51-zii-rdu1.dts +++ b/arch/arm/boot/dts/imx51-zii-rdu1.dts @@ -770,7 +770,7 @@ pinctrl_ts: tsgrp { fsl,pins = < - MX51_PAD_CSI1_D8__GPIO3_12 0x85 + MX51_PAD_CSI1_D8__GPIO3_12 0x04 MX51_PAD_CSI1_D9__GPIO3_13 0x85 >; }; -- cgit v1.2.3 From ae15a41a641449f536578b0d9ec0e4ade130deb5 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Sun, 1 Jul 2018 14:17:36 +0900 Subject: openrisc: entry: Fix delay slot exception detection Originally in patch e6d20c55a4 ("openrisc: entry: Fix delay slot detection") I fixed delay slot detection, but only for QEMU. We missed that hardware delay slot detection using delay slot exception flag (DSX) was still broken. This was because QEMU set the DSX flag in both pre-exception supervision register (ESR) and supervision register (SR) register, but on real hardware the DSX flag is only set on the SR register during exceptions. Fix this by carrying the DSX flag into the SR register during exception. We also update the DSX flag read locations to read the value from the SR register not the pt_regs SR register which represents ESR. The ESR should never have the DSX flag set. In the process I updated/removed a few comments to match the current state. Including removing a comment saying that the DSX detection logic was inefficient and needed to be rewritten. I have tested this on QEMU with a patch ensuring it matches the hardware specification. Link: https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg00000.html Fixes: e6d20c55a4 ("openrisc: entry: Fix delay slot detection") Signed-off-by: Stafford Horne --- arch/openrisc/kernel/entry.S | 8 +------- arch/openrisc/kernel/head.S | 9 ++++++--- arch/openrisc/kernel/traps.c | 2 +- 3 files changed, 8 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index 690d55272ba6..0c826ad6e994 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -277,12 +277,6 @@ EXCEPTION_ENTRY(_data_page_fault_handler) l.addi r3,r1,0 // pt_regs /* r4 set be EXCEPTION_HANDLE */ // effective address of fault - /* - * __PHX__: TODO - * - * all this can be written much simpler. look at - * DTLB miss handler in the CONFIG_GUARD_PROTECTED_CORE part - */ #ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX l.lwz r6,PT_PC(r3) // address of an offending insn l.lwz r6,0(r6) // instruction that caused pf @@ -314,7 +308,7 @@ EXCEPTION_ENTRY(_data_page_fault_handler) #else - l.lwz r6,PT_SR(r3) // SR + l.mfspr r6,r0,SPR_SR // SR l.andi r6,r6,SPR_SR_DSX // check for delay slot exception l.sfne r6,r0 // exception happened in delay slot l.bnf 7f diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index fb02b2a1d6f2..9fc6b60140f0 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -210,8 +210,7 @@ * r4 - EEAR exception EA * r10 - current pointing to current_thread_info struct * r12 - syscall 0, since we didn't come from syscall - * r13 - temp it actually contains new SR, not needed anymore - * r31 - handler address of the handler we'll jump to + * r30 - handler address of the handler we'll jump to * * handler has to save remaining registers to the exception * ksp frame *before* tainting them! @@ -244,6 +243,7 @@ /* r1 is KSP, r30 is __pa(KSP) */ ;\ tophys (r30,r1) ;\ l.sw PT_GPR12(r30),r12 ;\ + /* r4 use for tmp before EA */ ;\ l.mfspr r12,r0,SPR_EPCR_BASE ;\ l.sw PT_PC(r30),r12 ;\ l.mfspr r12,r0,SPR_ESR_BASE ;\ @@ -263,7 +263,10 @@ /* r12 == 1 if we come from syscall */ ;\ CLEAR_GPR(r12) ;\ /* ----- turn on MMU ----- */ ;\ - l.ori r30,r0,(EXCEPTION_SR) ;\ + /* Carry DSX into exception SR */ ;\ + l.mfspr r30,r0,SPR_SR ;\ + l.andi r30,r30,SPR_SR_DSX ;\ + l.ori r30,r30,(EXCEPTION_SR) ;\ l.mtspr r0,r30,SPR_ESR_BASE ;\ /* r30: EA address of handler */ ;\ LOAD_SYMBOL_2_GPR(r30,handler) ;\ diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index fac246e6f37a..d8981cbb852a 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -300,7 +300,7 @@ static inline int in_delay_slot(struct pt_regs *regs) return 0; } #else - return regs->sr & SPR_SR_DSX; + return mfspr(SPR_SR) & SPR_SR_DSX; #endif } -- cgit v1.2.3 From a068b94d74ddb7776ca707b6d39d1ac0d2d057e6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Jun 2018 15:33:23 -0700 Subject: crypto: arm/speck - fix building in Thumb2 mode Building the kernel with CONFIG_THUMB2_KERNEL=y and CONFIG_CRYPTO_SPECK_NEON set fails with the following errors: arch/arm/crypto/speck-neon-core.S: Assembler messages: arch/arm/crypto/speck-neon-core.S:419: Error: r13 not allowed here -- `bic sp,#0xf' arch/arm/crypto/speck-neon-core.S:423: Error: r13 not allowed here -- `bic sp,#0xf' arch/arm/crypto/speck-neon-core.S:427: Error: r13 not allowed here -- `bic sp,#0xf' arch/arm/crypto/speck-neon-core.S:431: Error: r13 not allowed here -- `bic sp,#0xf' The problem is that the 'bic' instruction can't operate on the 'sp' register in Thumb2 mode. Fix it by using a temporary register. This isn't in the main loop, so the performance difference is negligible. This also matches what aes-neonbs-core.S does. Reported-by: Stefan Agner Fixes: ede9622162fa ("crypto: arm/speck - add NEON-accelerated implementation of Speck-XTS") Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Reviewed-by: Stefan Agner Signed-off-by: Herbert Xu --- arch/arm/crypto/speck-neon-core.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/crypto/speck-neon-core.S b/arch/arm/crypto/speck-neon-core.S index 3c1e203e53b9..57caa742016e 100644 --- a/arch/arm/crypto/speck-neon-core.S +++ b/arch/arm/crypto/speck-neon-core.S @@ -272,9 +272,11 @@ * Allocate stack space to store 128 bytes worth of tweaks. For * performance, this space is aligned to a 16-byte boundary so that we * can use the load/store instructions that declare 16-byte alignment. + * For Thumb2 compatibility, don't do the 'bic' directly on 'sp'. */ - sub sp, #128 - bic sp, #0xf + sub r12, sp, #128 + bic r12, #0xf + mov sp, r12 .if \n == 64 // Load first tweak -- cgit v1.2.3 From 221e00d1fce976d8a04ff591a0150caf84e176f8 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 23 Jun 2018 12:36:22 +0200 Subject: crypto: x86 - Add missing RETs Add explicit RETs to the tail calls of AEGIS and MORUS crypto algorithms otherwise they run into INT3 padding due to ("x86/asm: Pad assembly functions with INT3 instructions") leading to spurious debug exceptions. Mike Galbraith took care of all the remaining callsites. Signed-off-by: Borislav Petkov Acked-by: Ondrej Mosnacek Signed-off-by: Herbert Xu --- arch/x86/crypto/aegis128-aesni-asm.S | 1 + arch/x86/crypto/aegis128l-aesni-asm.S | 1 + arch/x86/crypto/aegis256-aesni-asm.S | 1 + arch/x86/crypto/morus1280-avx2-asm.S | 1 + arch/x86/crypto/morus1280-sse2-asm.S | 1 + arch/x86/crypto/morus640-sse2-asm.S | 1 + 6 files changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index 9254e0b6cc06..717bf0776421 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -535,6 +535,7 @@ ENTRY(crypto_aegis128_aesni_enc_tail) movdqu STATE3, 0x40(STATEP) FRAME_END + ret ENDPROC(crypto_aegis128_aesni_enc_tail) .macro decrypt_block a s0 s1 s2 s3 s4 i diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S b/arch/x86/crypto/aegis128l-aesni-asm.S index 9263c344f2c7..4eda2b8db9e1 100644 --- a/arch/x86/crypto/aegis128l-aesni-asm.S +++ b/arch/x86/crypto/aegis128l-aesni-asm.S @@ -645,6 +645,7 @@ ENTRY(crypto_aegis128l_aesni_enc_tail) state_store0 FRAME_END + ret ENDPROC(crypto_aegis128l_aesni_enc_tail) /* diff --git a/arch/x86/crypto/aegis256-aesni-asm.S b/arch/x86/crypto/aegis256-aesni-asm.S index 1d977d515bf9..32aae8397268 100644 --- a/arch/x86/crypto/aegis256-aesni-asm.S +++ b/arch/x86/crypto/aegis256-aesni-asm.S @@ -543,6 +543,7 @@ ENTRY(crypto_aegis256_aesni_enc_tail) state_store0 FRAME_END + ret ENDPROC(crypto_aegis256_aesni_enc_tail) /* diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S index 37d422e77931..07653d4582a6 100644 --- a/arch/x86/crypto/morus1280-avx2-asm.S +++ b/arch/x86/crypto/morus1280-avx2-asm.S @@ -453,6 +453,7 @@ ENTRY(crypto_morus1280_avx2_enc_tail) vmovdqu STATE4, (4 * 32)(%rdi) FRAME_END + ret ENDPROC(crypto_morus1280_avx2_enc_tail) /* diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S index 1fe637c7be9d..bd1aa1b60869 100644 --- a/arch/x86/crypto/morus1280-sse2-asm.S +++ b/arch/x86/crypto/morus1280-sse2-asm.S @@ -652,6 +652,7 @@ ENTRY(crypto_morus1280_sse2_enc_tail) movdqu STATE4_HI, (9 * 16)(%rdi) FRAME_END + ret ENDPROC(crypto_morus1280_sse2_enc_tail) /* diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S index 71c72a0a0862..efa02816d921 100644 --- a/arch/x86/crypto/morus640-sse2-asm.S +++ b/arch/x86/crypto/morus640-sse2-asm.S @@ -437,6 +437,7 @@ ENTRY(crypto_morus640_sse2_enc_tail) movdqu STATE4, (4 * 16)(%rdi) FRAME_END + ret ENDPROC(crypto_morus640_sse2_enc_tail) /* -- cgit v1.2.3 From ecd60532e060e45c63c57ecf1c8549b1d656d34d Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 18 Jun 2018 15:34:14 +1000 Subject: m68k: fix "bad page state" oops on ColdFire boot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Booting a ColdFire m68k core with MMU enabled causes a "bad page state" oops since commit 1d40a5ea01d5 ("mm: mark pages in use for page tables"): BUG: Bad page state in process sh pfn:01ce2 page:004fefc8 count:0 mapcount:-1024 mapping:00000000 index:0x0 flags: 0x0() raw: 00000000 00000000 00000000 fffffbff 00000000 00000100 00000200 00000000 raw: 039c4000 page dumped because: nonzero mapcount Modules linked in: CPU: 0 PID: 22 Comm: sh Not tainted 4.17.0-07461-g1d40a5ea01d5 #13 Fix by calling pgtable_page_dtor() in our __pte_free_tlb() code path, so that the PG_table flag is cleared before we free the pte page. Note that I had to change the type of pte_free() to be static from extern. Otherwise you get a lot of warnings like this: ./arch/m68k/include/asm/mcf_pgalloc.h:80:2: warning: ‘pgtable_page_dtor’ is static but used in inline function ‘pte_free’ which is not static pgtable_page_dtor(page); ^ And making it static is consistent with our use of this in the other m68k pgalloc definitions of pte_free(). Signed-off-by: Greg Ungerer CC: Matthew Wilcox Reviewed-by: Geert Uytterhoeven --- arch/m68k/include/asm/mcf_pgalloc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h index 8b707c249026..12fe700632f4 100644 --- a/arch/m68k/include/asm/mcf_pgalloc.h +++ b/arch/m68k/include/asm/mcf_pgalloc.h @@ -44,6 +44,7 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address) static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page, unsigned long address) { + pgtable_page_dtor(page); __free_page(page); } @@ -74,8 +75,9 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, return page; } -extern inline void pte_free(struct mm_struct *mm, struct page *page) +static inline void pte_free(struct mm_struct *mm, struct page *page) { + pgtable_page_dtor(page); __free_page(page); } -- cgit v1.2.3 From dfa758638fd2d1184760deb2693abf76e982c53a Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 29 Jun 2018 19:54:01 +0200 Subject: s390/mm: fix refcount usage for 4K pgste s390 no longer uses the _mapcount field in struct page to identify the page table format being used. While the code was diligent in handling the different mappings, it neglected to turn "off" the map bits when alloc_pgste was being used. This resulted in bits remaining "on" in the _refcount field, and thus an artifically huge "in use" count that prevents the pages from actually being released by __free_page. There's opportunity for improvement in the "1 vs 3" vs "1U vs 3U" vs "0x1 vs 0x11" etc. variations for all these calls, I am just keeping things simple compared to neighboring code. Fixes: 620b4e903179 ("s390: use _refcount for pgtables") Reported-by: Halil Pasic Bisected-by: Vasily Gorbik Signed-off-by: Eric Farman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgalloc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 84bd6329a88d..e3bd5627afef 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -252,6 +252,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) spin_unlock_bh(&mm->context.lock); if (mask != 0) return; + } else { + atomic_xor_bits(&page->_refcount, 3U << 24); } pgtable_page_dtor(page); @@ -304,6 +306,8 @@ static void __tlb_remove_table(void *_table) break; /* fallthrough */ case 3: /* 4K page table with pgstes */ + if (mask & 3) + atomic_xor_bits(&page->_refcount, 3 << 24); pgtable_page_dtor(page); __free_page(page); break; -- cgit v1.2.3 From 07eaa43e66f505980d00e0f5fe697f3da7c6a730 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 29 May 2018 12:00:54 +0300 Subject: ARM: dts: dra7: Disable metastability workaround for USB2 Disable the metastability workaround for USB2. The original patch disabled the workaround on the wrong USB port. Fixes: b8c9c6fa2002 ("ARM: dts: dra7: Disable USB metastability workaround for USB2") Cc: [4.16+] Signed-off-by: Roger Quadros Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra7.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 9dcd14edc202..e03495a799ce 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1580,7 +1580,6 @@ dr_mode = "otg"; snps,dis_u3_susphy_quirk; snps,dis_u2_susphy_quirk; - snps,dis_metastability_quirk; }; }; @@ -1608,6 +1607,7 @@ dr_mode = "otg"; snps,dis_u3_susphy_quirk; snps,dis_u2_susphy_quirk; + snps,dis_metastability_quirk; }; }; -- cgit v1.2.3 From f706abf188a82c9d961ed267a18ff5cb5e9aace9 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Thu, 28 Jun 2018 18:03:25 +0800 Subject: nds32: To implement these icache invalidation APIs since nds32 cores don't snoop data cache. This issue is found by Guo Ren. Based on the Documentation/core-api/cachetlb.rst and it says: "Any necessary cache flushing or other coherency operations that need to occur should happen here. If the processor's instruction cache does not snoop cpu stores, it is very likely that you will need to flush the instruction cache for copy_to_user_page()." "If the icache does not snoop stores then this routine(flush_icache_range) will need to flush it." Signed-off-by: Guo Ren Signed-off-by: Greentime Hu --- arch/nds32/include/asm/cacheflush.h | 9 +++++-- arch/nds32/mm/cacheflush.c | 53 ++++++++++++++++++++++--------------- 2 files changed, 39 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h index 10b48f0d8e85..8b26198d51bb 100644 --- a/arch/nds32/include/asm/cacheflush.h +++ b/arch/nds32/include/asm/cacheflush.h @@ -8,6 +8,8 @@ #define PG_dcache_dirty PG_arch_1 +void flush_icache_range(unsigned long start, unsigned long end); +void flush_icache_page(struct vm_area_struct *vma, struct page *page); #ifdef CONFIG_CPU_CACHE_ALIASING void flush_cache_mm(struct mm_struct *mm); void flush_cache_dup_mm(struct mm_struct *mm); @@ -34,13 +36,16 @@ void flush_anon_page(struct vm_area_struct *vma, void flush_kernel_dcache_page(struct page *page); void flush_kernel_vmap_range(void *addr, int size); void invalidate_kernel_vmap_range(void *addr, int size); -void flush_icache_range(unsigned long start, unsigned long end); -void flush_icache_page(struct vm_area_struct *vma, struct page *page); #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&(mapping)->i_pages) #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&(mapping)->i_pages) #else #include +#undef flush_icache_range +#undef flush_icache_page +#undef flush_icache_user_range +void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, + unsigned long addr, int len); #endif #endif /* __NDS32_CACHEFLUSH_H__ */ diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c index ce8fd34497bf..7fcaa4e6be78 100644 --- a/arch/nds32/mm/cacheflush.c +++ b/arch/nds32/mm/cacheflush.c @@ -13,6 +13,38 @@ extern struct cache_info L1_cache_info[2]; +void flush_icache_range(unsigned long start, unsigned long end) +{ + unsigned long line_size, flags; + line_size = L1_cache_info[DCACHE].line_size; + start = start & ~(line_size - 1); + end = (end + line_size - 1) & ~(line_size - 1); + local_irq_save(flags); + cpu_cache_wbinval_range(start, end, 1); + local_irq_restore(flags); +} +EXPORT_SYMBOL(flush_icache_range); + +void flush_icache_page(struct vm_area_struct *vma, struct page *page) +{ + unsigned long flags; + unsigned long kaddr; + local_irq_save(flags); + kaddr = (unsigned long)kmap_atomic(page); + cpu_cache_wbinval_page(kaddr, vma->vm_flags & VM_EXEC); + kunmap_atomic((void *)kaddr); + local_irq_restore(flags); +} +EXPORT_SYMBOL(flush_icache_page); + +void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, + unsigned long addr, int len) +{ + unsigned long kaddr; + kaddr = (unsigned long)kmap_atomic(page) + (addr & ~PAGE_MASK); + flush_icache_range(kaddr, kaddr + len); + kunmap_atomic((void *)kaddr); +} #ifndef CONFIG_CPU_CACHE_ALIASING void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t * pte) @@ -318,27 +350,6 @@ void invalidate_kernel_vmap_range(void *addr, int size) } EXPORT_SYMBOL(invalidate_kernel_vmap_range); -void flush_icache_range(unsigned long start, unsigned long end) -{ - unsigned long line_size, flags; - line_size = L1_cache_info[DCACHE].line_size; - start = start & ~(line_size - 1); - end = (end + line_size - 1) & ~(line_size - 1); - local_irq_save(flags); - cpu_cache_wbinval_range(start, end, 1); - local_irq_restore(flags); -} -EXPORT_SYMBOL(flush_icache_range); - -void flush_icache_page(struct vm_area_struct *vma, struct page *page) -{ - unsigned long flags; - local_irq_save(flags); - cpu_cache_wbinval_page((unsigned long)page_address(page), - vma->vm_flags & VM_EXEC); - local_irq_restore(flags); -} - void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t * pte) { -- cgit v1.2.3 From 6897e6ecb3167598cb45e1a1424dd4d5e3778837 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Thu, 28 Jun 2018 18:29:21 +0800 Subject: nds32: Fix the dts pointer is not passed correctly issue. We found that the original implementation will only use the built-in dtb pointer instead of the pointer pass from bootloader. This bug is fixed by this patch. Signed-off-by: Greentime Hu --- arch/nds32/kernel/setup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c index 2f5b2ccebe47..63a1a5ef5219 100644 --- a/arch/nds32/kernel/setup.c +++ b/arch/nds32/kernel/setup.c @@ -278,7 +278,8 @@ static void __init setup_memory(void) void __init setup_arch(char **cmdline_p) { - early_init_devtree( __dtb_start); + early_init_devtree(__atags_pointer ? \ + phys_to_virt(__atags_pointer) : __dtb_start); setup_cpuinfo(); -- cgit v1.2.3 From bc9cb86e6514bed3d317208556d3cd48acff40e0 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Tue, 3 Jul 2018 14:07:11 +0800 Subject: nds32: To simplify the implementation of update_mmu_cache() The checking code is done in kmap_atomic() so that we don't need to check it in update_mmu_cache() again. There is no need to implement it for cache aliasing or cache non-aliasing versions. We can just implement one version for both. Signed-off-by: Greentime Hu --- arch/nds32/mm/cacheflush.c | 47 ++++++++-------------------------------------- 1 file changed, 8 insertions(+), 39 deletions(-) (limited to 'arch') diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c index 7fcaa4e6be78..254703653b6f 100644 --- a/arch/nds32/mm/cacheflush.c +++ b/arch/nds32/mm/cacheflush.c @@ -45,7 +45,7 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, flush_icache_range(kaddr, kaddr + len); kunmap_atomic((void *)kaddr); } -#ifndef CONFIG_CPU_CACHE_ALIASING + void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t * pte) { @@ -67,19 +67,15 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, if ((test_and_clear_bit(PG_dcache_dirty, &page->flags)) || (vma->vm_flags & VM_EXEC)) { - - if (!PageHighMem(page)) { - cpu_cache_wbinval_page((unsigned long) - page_address(page), - vma->vm_flags & VM_EXEC); - } else { - unsigned long kaddr = (unsigned long)kmap_atomic(page); - cpu_cache_wbinval_page(kaddr, vma->vm_flags & VM_EXEC); - kunmap_atomic((void *)kaddr); - } + unsigned long kaddr; + local_irq_save(flags); + kaddr = (unsigned long)kmap_atomic(page); + cpu_cache_wbinval_page(kaddr, vma->vm_flags & VM_EXEC); + kunmap_atomic((void *)kaddr); + local_irq_restore(flags); } } -#else +#ifdef CONFIG_CPU_CACHE_ALIASING extern pte_t va_present(struct mm_struct *mm, unsigned long addr); static inline unsigned long aliasing(unsigned long addr, unsigned long page) @@ -349,31 +345,4 @@ void invalidate_kernel_vmap_range(void *addr, int size) local_irq_restore(flags); } EXPORT_SYMBOL(invalidate_kernel_vmap_range); - -void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, - pte_t * pte) -{ - struct page *page; - unsigned long flags; - unsigned long pfn = pte_pfn(*pte); - - if (!pfn_valid(pfn)) - return; - - if (vma->vm_mm == current->active_mm) { - local_irq_save(flags); - __nds32__mtsr_dsb(addr, NDS32_SR_TLB_VPN); - __nds32__tlbop_rwr(*pte); - __nds32__isb(); - local_irq_restore(flags); - } - - page = pfn_to_page(pfn); - if (test_and_clear_bit(PG_dcache_dirty, &page->flags) || - (vma->vm_flags & VM_EXEC)) { - local_irq_save(flags); - cpu_dcache_wbinval_page((unsigned long)page_address(page)); - local_irq_restore(flags); - } -} #endif -- cgit v1.2.3 From 5d1c115241e4aa0034ddf44693c6bd2e89e10831 Mon Sep 17 00:00:00 2001 From: Robert Nelson Date: Mon, 2 Jul 2018 15:21:43 -0500 Subject: ARM: dts: am335x-bone-common: Fix mmc0 Write Protect Mainline Commit b74c2b21e1551018af53ee6c1efc051dfce2d788 added the pinmux settings for mmc1, however this pin (0x9a0) is routed to P9_42 on the cape header. Thus any BeagleBone cape that utilizes P9_42 triggers mmc0's Write Protect. Fixes: b74c2b21e155 ("ARM: dts: am33xx: Add pinmux data for mmc1 in am335x-evm, evmsk and beaglebone") Signed-off-by: Robert Nelson CC: Faiz Abbas CC: Tony Lindgren CC: Jason Kridner CC: Drew Fustini Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-bone-common.dtsi | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi index f9e8667f5886..73b514dddf65 100644 --- a/arch/arm/boot/dts/am335x-bone-common.dtsi +++ b/arch/arm/boot/dts/am335x-bone-common.dtsi @@ -168,7 +168,6 @@ AM33XX_IOPAD(0x8f0, PIN_INPUT_PULLUP | MUX_MODE0) /* mmc0_dat3.mmc0_dat3 */ AM33XX_IOPAD(0x904, PIN_INPUT_PULLUP | MUX_MODE0) /* mmc0_cmd.mmc0_cmd */ AM33XX_IOPAD(0x900, PIN_INPUT_PULLUP | MUX_MODE0) /* mmc0_clk.mmc0_clk */ - AM33XX_IOPAD(0x9a0, PIN_INPUT | MUX_MODE4) /* mcasp0_aclkr.mmc0_sdwp */ >; }; -- cgit v1.2.3 From 4fb5f58e8d191f7c81637ad81284e4848afb4244 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Mon, 2 Jul 2018 23:49:54 -0700 Subject: x86/mm/32: Initialize the CR4 shadow before __flush_tlb_all() On 32-bit kernels, __flush_tlb_all() may have read the CR4 shadow before the initialization of CR4 shadow in cpu_init(). Fix it by adding an explicit cr4_init_shadow() call into start_secondary() which is the first function called on non-boot SMP CPUs - ahead of the __flush_tlb_all() call. ( This is somewhat of a layering violation, but start_secondary() does CR4 bootstrap in the PCID case anyway. ) Signed-off-by: Zhenzhong Duan Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: "H. Peter Anvin" Link: http://lkml.kernel.org/r/b07b6ae9-4b57-4b40-b9bc-50c2c67f1d91@default Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c2f7d1d2a5c3..db9656e13ea0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -221,6 +221,11 @@ static void notrace start_secondary(void *unused) #ifdef CONFIG_X86_32 /* switch away from the initial page table */ load_cr3(swapper_pg_dir); + /* + * Initialize the CR4 shadow before doing anything that could + * try to read it. + */ + cr4_init_shadow(); __flush_tlb_all(); #endif load_current_idt(); -- cgit v1.2.3 From 0144eb204cdcdf09a76794b4a294291388e739bc Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Mon, 25 Jun 2018 07:41:33 -0500 Subject: ARM: dts: omap3: Fix am3517 mdio and emac clock references A previous patch removed OMAP clock aliases that were perceived to be unnecessary. Unfortunately, it broke the ethernet on the am3517-evm. This patch enables the MDIO clock and EMAC clock. Fixes: 0ed266d7ae5e ("clk: ti: omap3: cleanup unnecessary clock aliases") Cc: stable@vger.kernel.org #4.16+ Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am3517.dtsi | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/am3517.dtsi b/arch/arm/boot/dts/am3517.dtsi index ca294914bbb1..4b6062b631b1 100644 --- a/arch/arm/boot/dts/am3517.dtsi +++ b/arch/arm/boot/dts/am3517.dtsi @@ -39,6 +39,8 @@ ti,davinci-ctrl-ram-size = <0x2000>; ti,davinci-rmii-en = /bits/ 8 <1>; local-mac-address = [ 00 00 00 00 00 00 ]; + clocks = <&emac_ick>; + clock-names = "ick"; }; davinci_mdio: ethernet@5c030000 { @@ -49,6 +51,8 @@ bus_freq = <1000000>; #address-cells = <1>; #size-cells = <0>; + clocks = <&emac_fck>; + clock-names = "fck"; }; uart4: serial@4809e000 { -- cgit v1.2.3 From 845d382bb15c6e7dc5026c0ff919c5b13fc7e11b Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 2 Jul 2018 16:35:53 -0500 Subject: x86/bugs: Update when to check for the LS_CFG SSBD mitigation If either the X86_FEATURE_AMD_SSBD or X86_FEATURE_VIRT_SSBD features are present, then there is no need to perform the check for the LS_CFG SSBD mitigation support. Signed-off-by: Tom Lendacky Cc: Borislav Petkov Cc: David Woodhouse Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180702213553.29202.21089.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 082d7875cef8..38915fbfae73 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -543,7 +543,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) nodes_per_socket = ((value >> 3) & 7) + 1; } - if (c->x86 >= 0x15 && c->x86 <= 0x17) { + if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && + !boot_cpu_has(X86_FEATURE_VIRT_SSBD) && + c->x86 >= 0x15 && c->x86 <= 0x17) { unsigned int bit; switch (c->x86) { -- cgit v1.2.3 From 612bc3b3d4be749f73a513a17d9b3ee1330d3487 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 2 Jul 2018 16:36:02 -0500 Subject: x86/bugs: Fix the AMD SSBD usage of the SPEC_CTRL MSR On AMD, the presence of the MSR_SPEC_CTRL feature does not imply that the SSBD mitigation support should use the SPEC_CTRL MSR. Other features could have caused the MSR_SPEC_CTRL feature to be set, while a different SSBD mitigation option is in place. Update the SSBD support to check for the actual SSBD features that will use the SPEC_CTRL MSR. Signed-off-by: Tom Lendacky Cc: Borislav Petkov Cc: David Woodhouse Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 6ac2f49edb1e ("x86/bugs: Add AMD's SPEC_CTRL MSR usage") Link: http://lkml.kernel.org/r/20180702213602.29202.33151.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/bugs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 404df26b7de8..5c0ea39311fe 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -155,7 +155,8 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) hostval |= ssbd_tif_to_spec_ctrl(ti->flags); if (hostval != guestval) { @@ -533,9 +534,10 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may * use a completely different MSR and bit dependent on family. */ - if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) && + !static_cpu_has(X86_FEATURE_AMD_SSBD)) { x86_amd_ssb_disable(); - else { + } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -- cgit v1.2.3 From 0e2e160033283e20f688d8bad5b89460cc5bfcc4 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 21 Jun 2018 09:23:23 -0700 Subject: x86/asm: Add _ASM_ARG* constants for argument registers to i386 and x86-64 uses different registers for arguments; make them available so we don't have to #ifdef in the actual code. Native size and specified size (q, l, w, b) versions are provided. Signed-off-by: H. Peter Anvin Signed-off-by: Nick Desaulniers Reviewed-by: Sedat Dilek Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@redhat.com Cc: akataria@vmware.com Cc: akpm@linux-foundation.org Cc: andrea.parri@amarulasolutions.com Cc: ard.biesheuvel@linaro.org Cc: arnd@arndb.de Cc: aryabinin@virtuozzo.com Cc: astrachan@google.com Cc: boris.ostrovsky@oracle.com Cc: brijesh.singh@amd.com Cc: caoj.fnst@cn.fujitsu.com Cc: geert@linux-m68k.org Cc: ghackmann@google.com Cc: gregkh@linuxfoundation.org Cc: jan.kiszka@siemens.com Cc: jarkko.sakkinen@linux.intel.com Cc: joe@perches.com Cc: jpoimboe@redhat.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: kstewart@linuxfoundation.org Cc: linux-efi@vger.kernel.org Cc: linux-kbuild@vger.kernel.org Cc: manojgupta@google.com Cc: mawilcox@microsoft.com Cc: michal.lkml@markovi.net Cc: mjg59@google.com Cc: mka@chromium.org Cc: pombredanne@nexb.com Cc: rientjes@google.com Cc: rostedt@goodmis.org Cc: thomas.lendacky@amd.com Cc: tstellar@redhat.com Cc: tweek@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: yamada.masahiro@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulniers@google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/asm.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 219faaec51df..990770f9e76b 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -46,6 +46,65 @@ #define _ASM_SI __ASM_REG(si) #define _ASM_DI __ASM_REG(di) +#ifndef __x86_64__ +/* 32 bit */ + +#define _ASM_ARG1 _ASM_AX +#define _ASM_ARG2 _ASM_DX +#define _ASM_ARG3 _ASM_CX + +#define _ASM_ARG1L eax +#define _ASM_ARG2L edx +#define _ASM_ARG3L ecx + +#define _ASM_ARG1W ax +#define _ASM_ARG2W dx +#define _ASM_ARG3W cx + +#define _ASM_ARG1B al +#define _ASM_ARG2B dl +#define _ASM_ARG3B cl + +#else +/* 64 bit */ + +#define _ASM_ARG1 _ASM_DI +#define _ASM_ARG2 _ASM_SI +#define _ASM_ARG3 _ASM_DX +#define _ASM_ARG4 _ASM_CX +#define _ASM_ARG5 r8 +#define _ASM_ARG6 r9 + +#define _ASM_ARG1Q rdi +#define _ASM_ARG2Q rsi +#define _ASM_ARG3Q rdx +#define _ASM_ARG4Q rcx +#define _ASM_ARG5Q r8 +#define _ASM_ARG6Q r9 + +#define _ASM_ARG1L edi +#define _ASM_ARG2L esi +#define _ASM_ARG3L edx +#define _ASM_ARG4L ecx +#define _ASM_ARG5L r8d +#define _ASM_ARG6L r9d + +#define _ASM_ARG1W di +#define _ASM_ARG2W si +#define _ASM_ARG3W dx +#define _ASM_ARG4W cx +#define _ASM_ARG5W r8w +#define _ASM_ARG6W r9w + +#define _ASM_ARG1B dil +#define _ASM_ARG2B sil +#define _ASM_ARG3B dl +#define _ASM_ARG4B cl +#define _ASM_ARG5B r8b +#define _ASM_ARG6B r9b + +#endif + /* * Macros to generate condition code outputs from inline assembly, * The output operand must be type "bool". -- cgit v1.2.3 From d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 21 Jun 2018 09:23:24 -0700 Subject: x86/paravirt: Make native_save_fl() extern inline native_save_fl() is marked static inline, but by using it as a function pointer in arch/x86/kernel/paravirt.c, it MUST be outlined. paravirt's use of native_save_fl() also requires that no GPRs other than %rax are clobbered. Compilers have different heuristics which they use to emit stack guard code, the emittance of which can break paravirt's callee saved assumption by clobbering %rcx. Marking a function definition extern inline means that if this version cannot be inlined, then the out-of-line version will be preferred. By having the out-of-line version be implemented in assembly, it cannot be instrumented with a stack protector, which might violate custom calling conventions that code like paravirt rely on. The semantics of extern inline has changed since gnu89. This means that folks using GCC versions >= 5.1 may see symbol redefinition errors at link time for subdirs that override KBUILD_CFLAGS (making the C standard used implicit) regardless of this patch. This has been cleaned up earlier in the patch set, but is left as a note in the commit message for future travelers. Reports: https://lkml.org/lkml/2018/5/7/534 https://github.com/ClangBuiltLinux/linux/issues/16 Discussion: https://bugs.llvm.org/show_bug.cgi?id=37512 https://lkml.org/lkml/2018/5/24/1371 Thanks to the many folks that participated in the discussion. Debugged-by: Alistair Strachan Debugged-by: Matthias Kaehlcke Suggested-by: Arnd Bergmann Suggested-by: H. Peter Anvin Suggested-by: Tom Stellar Reported-by: Sedat Dilek Tested-by: Sedat Dilek Signed-off-by: Nick Desaulniers Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@redhat.com Cc: akataria@vmware.com Cc: akpm@linux-foundation.org Cc: andrea.parri@amarulasolutions.com Cc: ard.biesheuvel@linaro.org Cc: aryabinin@virtuozzo.com Cc: astrachan@google.com Cc: boris.ostrovsky@oracle.com Cc: brijesh.singh@amd.com Cc: caoj.fnst@cn.fujitsu.com Cc: geert@linux-m68k.org Cc: ghackmann@google.com Cc: gregkh@linuxfoundation.org Cc: jan.kiszka@siemens.com Cc: jarkko.sakkinen@linux.intel.com Cc: joe@perches.com Cc: jpoimboe@redhat.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: kstewart@linuxfoundation.org Cc: linux-efi@vger.kernel.org Cc: linux-kbuild@vger.kernel.org Cc: manojgupta@google.com Cc: mawilcox@microsoft.com Cc: michal.lkml@markovi.net Cc: mjg59@google.com Cc: mka@chromium.org Cc: pombredanne@nexb.com Cc: rientjes@google.com Cc: rostedt@goodmis.org Cc: thomas.lendacky@amd.com Cc: tweek@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: yamada.masahiro@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-4-ndesaulniers@google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irqflags.h | 2 +- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/irqflags.S | 26 ++++++++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/irqflags.S (limited to 'arch') diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 89f08955fff7..c4fc17220df9 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -13,7 +13,7 @@ * Interrupt control: */ -static inline unsigned long native_save_fl(void) +extern inline unsigned long native_save_fl(void) { unsigned long flags; diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 02d6f5cf4e70..8824d01c0c35 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -61,6 +61,7 @@ obj-y += alternative.o i8253.o hw_breakpoint.o obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o +obj-y += irqflags.o obj-y += process.o obj-y += fpu/ diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S new file mode 100644 index 000000000000..ddeeaac8adda --- /dev/null +++ b/arch/x86/kernel/irqflags.S @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include + +/* + * unsigned long native_save_fl(void) + */ +ENTRY(native_save_fl) + pushf + pop %_ASM_AX + ret +ENDPROC(native_save_fl) +EXPORT_SYMBOL(native_save_fl) + +/* + * void native_restore_fl(unsigned long flags) + * %eax/%rdi: flags + */ +ENTRY(native_restore_fl) + push %_ASM_ARG1 + popf + ret +ENDPROC(native_restore_fl) +EXPORT_SYMBOL(native_restore_fl) -- cgit v1.2.3 From 157bcc06094c3c5800d3f4676527047b79b618e7 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 25 Jun 2018 09:34:03 -0300 Subject: ARM: imx_v6_v7_defconfig: Select ULPI support Select CONFIG_USB_CHIPIDEA_ULPI and CONFIG_USB_ULPI_BUS so that USB ULPI can be functional on some boards like imx51-babbge. This fixes a kernel hang in 4.18-rc1 on i.mx51-babbage, caused by commit 03e6275ae381 ("usb: chipidea: Fix ULPI on imx51"). Suggested-by: Andrey Smirnov Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/configs/imx_v6_v7_defconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index f70507ab91ee..200ebda47e0c 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -302,6 +302,7 @@ CONFIG_USB_STORAGE=y CONFIG_USB_CHIPIDEA=y CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CHIPIDEA_HOST=y +CONFIG_USB_CHIPIDEA_ULPI=y CONFIG_USB_SERIAL=m CONFIG_USB_SERIAL_GENERIC=y CONFIG_USB_SERIAL_FTDI_SIO=m @@ -338,6 +339,7 @@ CONFIG_USB_GADGETFS=m CONFIG_USB_FUNCTIONFS=m CONFIG_USB_MASS_STORAGE=m CONFIG_USB_G_SERIAL=m +CONFIG_USB_ULPI_BUS=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y -- cgit v1.2.3 From 2ceb2780b790b74bc408a949f6aedbad8afa693e Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 26 Jun 2018 08:37:09 -0300 Subject: ARM: imx_v4_v5_defconfig: Select ULPI support Select CONFIG_USB_CHIPIDEA_ULPI and CONFIG_USB_ULPI_BUS so that USB ULPI can be functional on some boards like that use ULPI interface. Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/configs/imx_v4_v5_defconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig index 054591dc9a00..4cd2f4a2bff4 100644 --- a/arch/arm/configs/imx_v4_v5_defconfig +++ b/arch/arm/configs/imx_v4_v5_defconfig @@ -141,9 +141,11 @@ CONFIG_USB_STORAGE=y CONFIG_USB_CHIPIDEA=y CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CHIPIDEA_HOST=y +CONFIG_USB_CHIPIDEA_ULPI=y CONFIG_NOP_USB_XCEIV=y CONFIG_USB_GADGET=y CONFIG_USB_ETH=m +CONFIG_USB_ULPI_BUS=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y -- cgit v1.2.3 From 5e4e290d3751607726a62f0b49e11261a0a9345e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 3 Jul 2018 17:02:49 -0700 Subject: ARM: disable KCOV for trusted foundations code The ARM trusted foundations code is currently broken in linux-next when CONFIG_KCOV_INSTRUMENT_ALL is set: /tmp/ccHdQsCI.s: Assembler messages: /tmp/ccHdQsCI.s:37: Error: .err encountered /tmp/ccHdQsCI.s:38: Error: .err encountered /tmp/ccHdQsCI.s:39: Error: .err encountered scripts/Makefile.build:311: recipe for target 'arch/arm/firmware/trusted_foundations.o' failed I could not find a function attribute that lets me disable -fsanitize-coverage=trace-pc for just one function, so this turns it off for the entire file instead. Link: http://lkml.kernel.org/r/20180529103636.1535457-1-arnd@arndb.de Fixes: 758517202bd2e4 ("arm: port KCOV to arm") Signed-off-by: Arnd Bergmann Acked-by: Olof Johansson Tested-by: Olof Johansson Cc: Dmitry Vyukov Cc: Mark Rutland Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/firmware/Makefile | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/firmware/Makefile b/arch/arm/firmware/Makefile index a71f16536b6c..6e41336b0bc4 100644 --- a/arch/arm/firmware/Makefile +++ b/arch/arm/firmware/Makefile @@ -1 +1,4 @@ obj-$(CONFIG_TRUSTED_FOUNDATIONS) += trusted_foundations.o + +# tf_generic_smc() fails to build with -fsanitize-coverage=trace-pc +KCOV_INSTRUMENT := n -- cgit v1.2.3 From 8bf935501a7ef1b2ec3b51c804d657d5895f221a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 30 Jun 2018 10:53:57 +0200 Subject: s390: wire up io_pgetevents system call Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/syscalls/syscall.tbl | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 8b210ead7956..4baefed5fefb 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -389,3 +389,4 @@ 379 common statx sys_statx compat_sys_statx 380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi 381 common kexec_file_load sys_kexec_file_load compat_sys_kexec_file_load +382 common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents -- cgit v1.2.3 From 9d6d99e3ac8ccfd0945edb3c83cd912838775056 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 30 Jun 2018 10:54:15 +0200 Subject: s390: wire up rseq system call Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + arch/s390/kernel/compat_wrapper.c | 1 + arch/s390/kernel/entry.S | 4 ++++ arch/s390/kernel/signal.c | 3 ++- arch/s390/kernel/syscalls/syscall.tbl | 1 + 5 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index baed39772c84..e44bb2b2873e 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -160,6 +160,7 @@ config S390 select HAVE_OPROFILE select HAVE_PERF_EVENTS select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select MODULES_USE_ELF_RELA diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index 607c5e9fba3d..2ce28bf0c5ec 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -183,3 +183,4 @@ COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb); COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); COMPAT_SYSCALL_WRAP4(s390_sthyi, unsigned long, code, void __user *, info, u64 __user *, rc, unsigned long, flags); COMPAT_SYSCALL_WRAP5(kexec_file_load, int, kernel_fd, int, initrd_fd, unsigned long, cmdline_len, const char __user *, cmdline_ptr, unsigned long, flags) +COMPAT_SYSCALL_WRAP4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3891805bfcdd..150130c897c3 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -357,6 +357,10 @@ ENTRY(system_call) stg %r2,__PT_R2(%r11) # store return value .Lsysc_return: +#ifdef CONFIG_DEBUG_RSEQ + lgr %r2,%r11 + brasl %r14,rseq_syscall +#endif LOCKDEP_SYS_EXIT .Lsysc_tif: TSTMSK __PT_FLAGS(%r11),_PIF_WORK diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 2d2960ab3e10..22f08245aa5d 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -498,7 +498,7 @@ void do_signal(struct pt_regs *regs) } /* No longer in a system call */ clear_pt_regs_flag(regs, PIF_SYSCALL); - + rseq_signal_deliver(&ksig, regs); if (is_compat_task()) handle_signal32(&ksig, oldset, regs); else @@ -537,4 +537,5 @@ void do_notify_resume(struct pt_regs *regs) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 4baefed5fefb..022fc099b628 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -390,3 +390,4 @@ 380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi 381 common kexec_file_load sys_kexec_file_load compat_sys_kexec_file_load 382 common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents +383 common rseq sys_rseq compat_sys_rseq -- cgit v1.2.3 From 38fc4248677552ce35efc09902fdcb06b61d7ef9 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Mon, 2 Jul 2018 11:16:59 +0200 Subject: arm64: Use aarch64elf and aarch64elfb emulation mode variants The aarch64linux and aarch64linuxb emulation modes are not supported by bare-metal toolchains and Linux using them forbids building the kernel with these toolchains. Since there is apparently no reason to target these emulation modes, the more generic elf modes are used instead, allowing to build on bare-metal toolchains as well as the already-supported ones. Fixes: 3d6a7b99e3fa ("arm64: ensure the kernel is compiled for LP64") Cc: stable@vger.kernel.org Acked-by: Will Deacon Signed-off-by: Paul Kocialkowski Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 45272266dafb..27389adf511a 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -61,14 +61,14 @@ KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ AS += -EB LD += -EB -LDFLAGS += -maarch64linuxb +LDFLAGS += -maarch64elfb UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian CHECKFLAGS += -D__AARCH64EL__ AS += -EL LD += -EL -LDFLAGS += -maarch64linux +LDFLAGS += -maarch64elf UTS_MACHINE := aarch64 endif -- cgit v1.2.3 From d5fad48cfb4b183d87960904c177eeeb9412b929 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 25 Jun 2018 16:49:37 +0800 Subject: RISC-V: Add conditional macro for zone of DMA32 The DMA32 is for 64-bit usage. Signed-off-by: Zong Li Reviewed-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index c77df8142be2..58a522f9bcc3 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -28,7 +28,9 @@ static void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, }; +#ifdef CONFIG_ZONE_DMA32 max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G, max_low_pfn)); +#endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; free_area_init_nodes(max_zone_pfns); -- cgit v1.2.3 From 8f79125d285d2d71ed110e875754942256efa51d Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 25 Jun 2018 16:49:38 +0800 Subject: RISC-V: Select GENERIC_UCMPDI2 on RV32I On 32-bit, it need to use __ucmpdi2, otherwise, it can't find the __ucmpdi2 symbol. Signed-off-by: Zong Li Reviewed-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index f12680c9b947..4764fdeb4f1f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -107,6 +107,7 @@ config ARCH_RV32I select GENERIC_LIB_ASHLDI3 select GENERIC_LIB_ASHRDI3 select GENERIC_LIB_LSHRDI3 + select GENERIC_LIB_UCMPDI2 config ARCH_RV64I bool "RV64I" -- cgit v1.2.3 From c480d8911fda96a0f37634bd4dc4e2c8a87c38da Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 25 Jun 2018 16:49:39 +0800 Subject: RISC-V: Add definiion of extract symbol's index and type for 32-bit Use generic marco to get the index and type of symbol. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/elf.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/riscv/include/uapi/asm/elf.h b/arch/riscv/include/uapi/asm/elf.h index 5cae4c30cd8e..1e0dfc36aab9 100644 --- a/arch/riscv/include/uapi/asm/elf.h +++ b/arch/riscv/include/uapi/asm/elf.h @@ -21,8 +21,13 @@ typedef struct user_regs_struct elf_gregset_t; typedef union __riscv_fp_state elf_fpregset_t; -#define ELF_RISCV_R_SYM(r_info) ((r_info) >> 32) -#define ELF_RISCV_R_TYPE(r_info) ((r_info) & 0xffffffff) +#if __riscv_xlen == 64 +#define ELF_RISCV_R_SYM(r_info) ELF64_R_SYM(r_info) +#define ELF_RISCV_R_TYPE(r_info) ELF64_R_TYPE(r_info) +#else +#define ELF_RISCV_R_SYM(r_info) ELF32_R_SYM(r_info) +#define ELF_RISCV_R_TYPE(r_info) ELF32_R_TYPE(r_info) +#endif /* * RISC-V relocation types -- cgit v1.2.3 From 7df85002178e708aa749c63020fd333d9f085ced Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 25 Jun 2018 16:49:40 +0800 Subject: RISC-V: Change variable type for 32-bit compatible Signed-off-by: Zong Li Reviewed-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 1d5e9b934b8c..e8c6aaa2aab4 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -37,7 +37,7 @@ static int apply_r_riscv_64_rela(struct module *me, u32 *location, Elf_Addr v) static int apply_r_riscv_branch_rela(struct module *me, u32 *location, Elf_Addr v) { - s64 offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - (void *)location; u32 imm12 = (offset & 0x1000) << (31 - 12); u32 imm11 = (offset & 0x800) >> (11 - 7); u32 imm10_5 = (offset & 0x7e0) << (30 - 10); @@ -50,7 +50,7 @@ static int apply_r_riscv_branch_rela(struct module *me, u32 *location, static int apply_r_riscv_jal_rela(struct module *me, u32 *location, Elf_Addr v) { - s64 offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - (void *)location; u32 imm20 = (offset & 0x100000) << (31 - 20); u32 imm19_12 = (offset & 0xff000); u32 imm11 = (offset & 0x800) << (20 - 11); @@ -63,7 +63,7 @@ static int apply_r_riscv_jal_rela(struct module *me, u32 *location, static int apply_r_riscv_rcv_branch_rela(struct module *me, u32 *location, Elf_Addr v) { - s64 offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - (void *)location; u16 imm8 = (offset & 0x100) << (12 - 8); u16 imm7_6 = (offset & 0xc0) >> (6 - 5); u16 imm5 = (offset & 0x20) >> (5 - 2); @@ -78,7 +78,7 @@ static int apply_r_riscv_rcv_branch_rela(struct module *me, u32 *location, static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location, Elf_Addr v) { - s64 offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - (void *)location; u16 imm11 = (offset & 0x800) << (12 - 11); u16 imm10 = (offset & 0x400) >> (10 - 8); u16 imm9_8 = (offset & 0x300) << (12 - 11); @@ -96,7 +96,7 @@ static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location, static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, Elf_Addr v) { - s64 offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - (void *)location; s32 hi20; if (offset != (s32)offset) { @@ -178,7 +178,7 @@ static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location, static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location, Elf_Addr v) { - s64 offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - (void *)location; s32 hi20; /* Always emit the got entry */ @@ -200,7 +200,7 @@ static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location, static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, Elf_Addr v) { - s64 offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - (void *)location; s32 fill_v = offset; u32 hi20, lo12; @@ -227,7 +227,7 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, static int apply_r_riscv_call_rela(struct module *me, u32 *location, Elf_Addr v) { - s64 offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - (void *)location; s32 fill_v = offset; u32 hi20, lo12; @@ -347,7 +347,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, unsigned int j; for (j = 0; j < sechdrs[relsec].sh_size / sizeof(*rel); j++) { - u64 hi20_loc = + unsigned long hi20_loc = sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[j].r_offset; u32 hi20_type = ELF_RISCV_R_TYPE(rel[j].r_info); @@ -360,12 +360,12 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, Elf_Sym *hi20_sym = (Elf_Sym *)sechdrs[symindex].sh_addr + ELF_RISCV_R_SYM(rel[j].r_info); - u64 hi20_sym_val = + unsigned long hi20_sym_val = hi20_sym->st_value + rel[j].r_addend; /* Calculate lo12 */ - u64 offset = hi20_sym_val - hi20_loc; + size_t offset = hi20_sym_val - hi20_loc; if (IS_ENABLED(CONFIG_MODULE_SECTIONS) && hi20_type == R_RISCV_GOT_HI20) { offset = module_emit_got_entry( -- cgit v1.2.3 From 781c8fe2da3d2c7c95cd7ffddbab63b80a79da4d Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Tue, 12 Jun 2018 19:26:36 +0200 Subject: RISC-V: fix R_RISCV_ADD32/R_RISCV_SUB32 relocations The R_RISCV_ADD32/R_RISCV_SUB32 relocations should add/subtract the address of the symbol (without overflow check), not its contents. Signed-off-by: Andreas Schwab Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 1d5e9b934b8c..6bb48315c973 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -263,14 +263,14 @@ static int apply_r_riscv_align_rela(struct module *me, u32 *location, static int apply_r_riscv_add32_rela(struct module *me, u32 *location, Elf_Addr v) { - *(u32 *)location += (*(u32 *)v); + *(u32 *)location += (u32)v; return 0; } static int apply_r_riscv_sub32_rela(struct module *me, u32 *location, Elf_Addr v) { - *(u32 *)location -= (*(u32 *)v); + *(u32 *)location -= (u32)v; return 0; } -- cgit v1.2.3 From f67f10b8a6c96ab6c1d6946d269e2ca5f9998cc2 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 19 Jun 2018 15:41:34 -0600 Subject: riscv: remove unnecessary of_platform_populate call The DT core will call of_platform_default_populate, so it is not necessary for arch specific code to call it unless there are custom match entries, auxdata or parent device. Neither of those apply here, so remove the call. Cc: Palmer Dabbelt Cc: Albert Ou Cc: linux-riscv@lists.infradead.org Signed-off-by: Rob Herring Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index ee44a48faf79..f0d2070866d4 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -220,8 +220,3 @@ void __init setup_arch(char **cmdline_p) riscv_fill_hwcap(); } -static int __init riscv_device_init(void) -{ - return of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); -} -subsys_initcall_sync(riscv_device_init); -- cgit v1.2.3 From 8606544890d7dc4f7a740963f70dc1e1d54f8e30 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Fri, 22 Jun 2018 15:46:28 -0700 Subject: RISC-V: Don't include irq-riscv-intc.h This file has never existed in the upstream kernel, but it's guarded by an #ifdef that's also never existed in the upstream kernel. As a part of our interrupt controller refactoring this header is no longer necessary, but this reference managed to sneak in anyway. Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/irq.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index b74cbfbce2d0..7bcdaed15703 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -16,10 +16,6 @@ #include #include -#ifdef CONFIG_RISCV_INTC -#include -#endif - void __init init_IRQ(void) { irqchip_init(); -- cgit v1.2.3 From 1db9b80980d26fe95c942e0bb8bde2ca715029ad Mon Sep 17 00:00:00 2001 From: Jim Wilson Date: Mon, 11 Jun 2018 14:48:22 -0700 Subject: RISC-V: Fix PTRACE_SETREGSET bug. In riscv_gpr_set, pass regs instead of ®s to user_regset_copyin to fix gdb segfault. Signed-off-by: Jim Wilson Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index ba3e80712797..9f82a7e34c64 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -50,7 +50,7 @@ static int riscv_gpr_set(struct task_struct *target, struct pt_regs *regs; regs = task_pt_regs(target); - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®s, 0, -1); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1); return ret; } -- cgit v1.2.3 From 2893af07e507e993ad71ca6d66a7b02be741571c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 3 Jul 2018 10:22:00 +0900 Subject: arm64: add endianness option to LDFLAGS instead of LD With the recent syntax extension, Kconfig is now able to evaluate the compiler / toolchain capability. However, accumulating flags to 'LD' is not compatible with the way it works; 'LD' must be passed to Kconfig to call $(ld-option,...) from Kconfig files. If you tweak 'LD' in arch Makefile depending on CONFIG_CPU_BIG_ENDIAN, this would end up with circular dependency between Makefile and Kconfig. Acked-by: Will Deacon Signed-off-by: Masahiro Yamada Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 27389adf511a..48158c550110 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -60,15 +60,13 @@ ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ AS += -EB -LD += -EB -LDFLAGS += -maarch64elfb +LDFLAGS += -EB -maarch64elfb UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian CHECKFLAGS += -D__AARCH64EL__ AS += -EL -LD += -EL -LDFLAGS += -maarch64elf +LDFLAGS += -EL -maarch64elf UTS_MACHINE := aarch64 endif -- cgit v1.2.3 From 987553894f0ce8c1d83578136603a89009f42d35 Mon Sep 17 00:00:00 2001 From: Greentime Hu Date: Thu, 5 Jul 2018 16:13:47 +0800 Subject: nds32: fix build error "relocation truncated to fit: R_NDS32_25_PCREL_RELA" when make allyesconfig It will cause a linking error because the jump assembly code were using j, however it may be not enough to jump to the destination. We have to change it with pseudo instruction b. In that way, assembler will generate a set of safe assembly codes to make sure the destination is able to jump. Toolchain: https://mirrors.edge.kernel.org/pub/tools/crosstool/files/bin/x86_64/8.1.0/x86_64-gcc-8.1.0-nolibc-nds32le-linux.tar.gz Command: PATH=/NOBACKUP/atcsqa06/greentime/os/toolchain-kernel.org/gcc-8.1.0-nolibc/nds32le-linux/bin:$PATH ARCH=nds32 CROSS_COMPILE=nds32le-linux- make allyesconfig PATH=/NOBACKUP/atcsqa06/greentime/os/toolchain-kernel.org/gcc-8.1.0-nolibc/nds32le-linux/bin:$PATH ARCH=nds32 CROSS_COMPILE=nds32le-linux- make -j8 MODPOST vmlinux.o WARNING: EXPORT symbol "copy_page" [vmlinux] version generation failed, symbol will not be versioned. WARNING: EXPORT symbol "clear_page" [vmlinux] version generation failed, symbol will not be versioned. nds32le-linux-ld: kernel/futex.o:(.fixup+0x4): relocation truncated to fit: R_NDS32_25_PCREL_RELA against `.text' nds32le-linux-ld: kernel/futex.o:(.fixup+0xaa): relocation truncated to fit: R_NDS32_25_PCREL_RELA against `.text' nds32le-linux-ld: kernel/futex.o:(.fixup+0xb0): relocation truncated to fit: R_NDS32_25_PCREL_RELA against `.text' nds32le-linux-ld: kernel/futex.o:(.fixup+0xb6): relocation truncated to fit: R_NDS32_25_PCREL_RELA against `.text' nds32le-linux-ld: kernel/futex.o:(.fixup+0xbc): relocation truncated to fit: R_NDS32_25_PCREL_RELA against `.text' nds32le-linux-ld: kernel/futex.o:(.fixup+0xc4): relocation truncated to fit: R_NDS32_25_PCREL_RELA against `.text' Makefile:1010: recipe for target 'vmlinux' failed make: *** [vmlinux] Error 1 Signed-off-by: Greentime Hu --- arch/nds32/include/asm/futex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/nds32/include/asm/futex.h b/arch/nds32/include/asm/futex.h index eab5e84bd991..cb6cb91cfdf8 100644 --- a/arch/nds32/include/asm/futex.h +++ b/arch/nds32/include/asm/futex.h @@ -16,7 +16,7 @@ " .popsection\n" \ " .pushsection .fixup,\"ax\"\n" \ "4: move %0, " err_reg "\n" \ - " j 3b\n" \ + " b 3b\n" \ " .popsection" #define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \ -- cgit v1.2.3 From 1a381d4a0a9a0f999a13faaba22bf6b3fc80dcb9 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Wed, 27 Jun 2018 12:46:14 -0700 Subject: arm64: remove no-op -p linker flag Linking the ARM64 defconfig kernel with LLVM lld fails with the error: ld.lld: error: unknown argument: -p Makefile:1015: recipe for target 'vmlinux' failed Without this flag, the ARM64 defconfig kernel successfully links with lld and boots on Dragonboard 410c. After digging through binutils source and changelogs, it turns out that -p is only relevant to ancient binutils installations targeting 32-bit ARM. binutils accepts -p for AArch64 too, but it's always been undocumented and silently ignored. A comment in ld/emultempl/aarch64elf.em explains that it's "Only here for backwards compatibility". Since this flag is a no-op on ARM64, we can safely drop it. Acked-by: Will Deacon Reviewed-by: Nick Desaulniers Signed-off-by: Greg Hackmann Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 48158c550110..7976d2d242fa 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux :=-p --no-undefined -X +LDFLAGS_vmlinux :=--no-undefined -X CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) GZFLAGS :=-9 -- cgit v1.2.3 From 523402fa9101090c91d2033b7ebdfdcf65880488 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 5 Jul 2018 14:37:52 -0700 Subject: MIPS: Fix ioremap() RAM check We currently attempt to check whether a physical address range provided to __ioremap() may be in use by the page allocator by examining the value of PageReserved for each page in the region - lowmem pages not marked reserved are presumed to be in use by the page allocator, and requests to ioremap them fail. The way we check this has been broken since commit 92923ca3aace ("mm: meminit: only set page reserved in the memblock region"), because memblock will typically not have any knowledge of non-RAM pages and therefore those pages will not have the PageReserved flag set. Thus when we attempt to ioremap a region outside of RAM we incorrectly fail believing that the region is RAM that may be in use. In most cases ioremap() on MIPS will take a fast-path to use the unmapped kseg1 or xkphys virtual address spaces and never hit this path, so the only way to hit it is for a MIPS32 system to attempt to ioremap() an address range in lowmem with flags other than _CACHE_UNCACHED. Perhaps the most straightforward way to do this is using ioremap_uncached_accelerated(), which is how the problem was discovered. Fix this by making use of walk_system_ram_range() to test the address range provided to __ioremap() against only RAM pages, rather than all lowmem pages. This means that if we have a lowmem I/O region, which is very common for MIPS systems, we're free to ioremap() address ranges within it. A nice bonus is that the test is no longer limited to lowmem. The approach here matches the way x86 performed the same test after commit c81c8a1eeede ("x86, ioremap: Speed up check for RAM pages") until x86 moved towards a slightly more complicated check using walk_mem_res() for unrelated reasons with commit 0e4c12b45aa8 ("x86/mm, resource: Use PAGE_KERNEL protection for ioremap of memory pages"). Signed-off-by: Paul Burton Reported-by: Serge Semin Tested-by: Serge Semin Fixes: 92923ca3aace ("mm: meminit: only set page reserved in the memblock region") Cc: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.2+ Patchwork: https://patchwork.linux-mips.org/patch/19786/ --- arch/mips/mm/ioremap.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c index 1986e09fb457..1601d90b087b 100644 --- a/arch/mips/mm/ioremap.c +++ b/arch/mips/mm/ioremap.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -98,6 +99,20 @@ static int remap_area_pages(unsigned long address, phys_addr_t phys_addr, return error; } +static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, + void *arg) +{ + unsigned long i; + + for (i = 0; i < nr_pages; i++) { + if (pfn_valid(start_pfn + i) && + !PageReserved(pfn_to_page(start_pfn + i))) + return 1; + } + + return 0; +} + /* * Generic mapping function (not visible outside): */ @@ -116,8 +131,8 @@ static int remap_area_pages(unsigned long address, phys_addr_t phys_addr, void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long flags) { + unsigned long offset, pfn, last_pfn; struct vm_struct * area; - unsigned long offset; phys_addr_t last_addr; void * addr; @@ -137,18 +152,16 @@ void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long return (void __iomem *) CKSEG1ADDR(phys_addr); /* - * Don't allow anybody to remap normal RAM that we're using.. + * Don't allow anybody to remap RAM that may be allocated by the page + * allocator, since that could lead to races & data clobbering. */ - if (phys_addr < virt_to_phys(high_memory)) { - char *t_addr, *t_end; - struct page *page; - - t_addr = __va(phys_addr); - t_end = t_addr + (size - 1); - - for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++) - if(!PageReserved(page)) - return NULL; + pfn = PFN_DOWN(phys_addr); + last_pfn = PFN_DOWN(last_addr); + if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL, + __ioremap_check_ram) == 1) { + WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n", + &phys_addr, &last_addr); + return NULL; } /* -- cgit v1.2.3 From 1268ed0c474a5c8f165ef386f3310521b5e00e27 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Tue, 3 Jul 2018 16:01:55 -0700 Subject: x86/hyper-v: Fix the circular dependency in IPI enlightenment The IPI hypercalls depend on being able to map the Linux notion of CPU ID to the hypervisor's notion of the CPU ID. The array hv_vp_index[] provides this mapping. Code for populating this array depends on the IPI functionality. Break this circular dependency. [ tglx: Use a proper define instead of '-1' with a u32 variable as pointed out by Vitaly ] Fixes: 68bb7bfb7985 ("X86/Hyper-V: Enable IPI enlightenments") Signed-off-by: K. Y. Srinivasan Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Cc: gregkh@linuxfoundation.org Cc: devel@linuxdriverproject.org Cc: olaf@aepfle.de Cc: apw@canonical.com Cc: jasowang@redhat.com Cc: hpa@zytor.com Cc: sthemmin@microsoft.com Cc: Michael.H.Kelley@microsoft.com Cc: vkuznets@redhat.com Link: https://lkml.kernel.org/r/20180703230155.15160-1-kys@linuxonhyperv.com --- arch/x86/hyperv/hv_apic.c | 5 +++++ arch/x86/hyperv/hv_init.c | 5 ++++- arch/x86/include/asm/mshyperv.h | 5 ++++- 3 files changed, 13 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index f68855499391..402338365651 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -114,6 +114,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K; nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask); } + if (nr_bank < 0) + goto ipi_mask_ex_done; if (!nr_bank) ipi_arg->vp_set.format = HV_GENERIC_SET_ALL; @@ -158,6 +160,9 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector) for_each_cpu(cur_cpu, mask) { vcpu = hv_cpu_number_to_vp_number(cur_cpu); + if (vcpu == VP_INVAL) + goto ipi_mask_done; + /* * This particular version of the IPI hypercall can * only target upto 64 CPUs. diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 4c431e1c1eff..1ff420217298 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -265,7 +265,7 @@ void __init hyperv_init(void) { u64 guest_id, required_msrs; union hv_x64_msr_hypercall_contents hypercall_msr; - int cpuhp; + int cpuhp, i; if (x86_hyper_type != X86_HYPER_MS_HYPERV) return; @@ -293,6 +293,9 @@ void __init hyperv_init(void) if (!hv_vp_index) return; + for (i = 0; i < num_possible_cpus(); i++) + hv_vp_index[i] = VP_INVAL; + hv_vp_assist_page = kcalloc(num_possible_cpus(), sizeof(*hv_vp_assist_page), GFP_KERNEL); if (!hv_vp_assist_page) { diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 3cd14311edfa..5a7375ed5f7c 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -9,6 +9,8 @@ #include #include +#define VP_INVAL U32_MAX + struct ms_hyperv_info { u32 features; u32 misc_features; @@ -20,7 +22,6 @@ struct ms_hyperv_info { extern struct ms_hyperv_info ms_hyperv; - /* * Generate the guest ID. */ @@ -281,6 +282,8 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset, */ for_each_cpu(cpu, cpus) { vcpu = hv_cpu_number_to_vp_number(cpu); + if (vcpu == VP_INVAL) + return -1; vcpu_bank = vcpu / 64; vcpu_offset = vcpu % 64; __set_bit(vcpu_offset, (unsigned long *) -- cgit v1.2.3 From 818b7587b4d34e989ea6c042eeb8d50ffa5be13e Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 27 Jun 2018 10:31:20 -0500 Subject: x86: irq_remapping: Move irq remapping mode enum The enum is currently defined in Intel-specific DMAR header file, but it is also used by APIC common code. Therefore, move it to a more appropriate interrupt-remapping common header file. This will also be used by subsequent patches. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Joerg Roedel Signed-off-by: Suravee Suthikulpanit Signed-off-by: Joerg Roedel --- arch/x86/include/asm/irq_remapping.h | 5 +++++ include/linux/dmar.h | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 023b4a9fc846..5f26962eff42 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -33,6 +33,11 @@ enum irq_remap_cap { IRQ_POSTING_CAP = 0, }; +enum { + IRQ_REMAP_XAPIC_MODE, + IRQ_REMAP_X2APIC_MODE, +}; + struct vcpu_data { u64 pi_desc_addr; /* Physical address of PI Descriptor */ u32 vector; /* Guest vector of the interrupt */ diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e2433bc50210..843a41ba7e28 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -265,11 +265,6 @@ static inline void dmar_copy_shared_irte(struct irte *dst, struct irte *src) #define PDA_LOW_BIT 26 #define PDA_HIGH_BIT 32 -enum { - IRQ_REMAP_XAPIC_MODE, - IRQ_REMAP_X2APIC_MODE, -}; - /* Can't use the common MSI interrupt functions * since DMAR is not a pci device */ -- cgit v1.2.3 From 568cc2f07c8ea5f71a0486464bd9703e4671045f Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Tue, 3 Jul 2018 06:24:20 +0300 Subject: ARM: dts: armada-38x: use the new thermal binding Commit 2f28e4c24b10e (thermal: armada: Clarify control registers accesses) introduced the new thermal binding. The new binding extends the second registers field size to 8. Switch to the new binding to fix thermal reading values. Without this change the fix for errata #132698 introduced in commit 8c0b888f661 (thermal: armada: Change sensors trim default value) has no effect. Cc: stable@vger.kernel.org # v4.16+ Reviewed-by: Miquel Raynal Signed-off-by: Baruch Siach Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/armada-38x.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi index 18edc9bc7927..929459c42760 100644 --- a/arch/arm/boot/dts/armada-38x.dtsi +++ b/arch/arm/boot/dts/armada-38x.dtsi @@ -547,7 +547,7 @@ thermal: thermal@e8078 { compatible = "marvell,armada380-thermal"; - reg = <0xe4078 0x4>, <0xe4074 0x4>; + reg = <0xe4078 0x4>, <0xe4070 0x8>; status = "okay"; }; -- cgit v1.2.3 From 0c1049dcb4ceec640d8bd797335bcbebdcab44d2 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Fri, 6 Jul 2018 22:15:00 +0200 Subject: ARM: pxa: irq: fix handling of ICMR registers in suspend/resume PXA3xx platforms have 56 interrupts that are stored in two ICMR registers. The code in pxa_irq_suspend() and pxa_irq_resume() however does a simple division by 32 which only leads to one register being saved at suspend and restored at resume time. The NAND interrupt setting, for instance, is lost. Fix this by using DIV_ROUND_UP() instead. Signed-off-by: Daniel Mack Signed-off-by: Robert Jarzmik --- arch/arm/mach-pxa/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c index 9c10248fadcc..4e8c2116808e 100644 --- a/arch/arm/mach-pxa/irq.c +++ b/arch/arm/mach-pxa/irq.c @@ -185,7 +185,7 @@ static int pxa_irq_suspend(void) { int i; - for (i = 0; i < pxa_internal_irq_nr / 32; i++) { + for (i = 0; i < DIV_ROUND_UP(pxa_internal_irq_nr, 32); i++) { void __iomem *base = irq_base(i); saved_icmr[i] = __raw_readl(base + ICMR); @@ -204,7 +204,7 @@ static void pxa_irq_resume(void) { int i; - for (i = 0; i < pxa_internal_irq_nr / 32; i++) { + for (i = 0; i < DIV_ROUND_UP(pxa_internal_irq_nr, 32); i++) { void __iomem *base = irq_base(i); __raw_writel(saved_icmr[i], base + ICMR); -- cgit v1.2.3 From 15279df6f26cf2013d713904b4a0c957ae8abb96 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 6 Jul 2018 23:50:03 +0200 Subject: x86/mtrr: Don't copy out-of-bounds data in mtrr_write Don't access the provided buffer out of bounds - this can cause a kernel out-of-bounds read when invoked through sys_splice() or other things that use kernel_write()/__kernel_write(). Fixes: 7f8ec5a4f01a ("x86/mtrr: Convert to use strncpy_from_user() helper") Signed-off-by: Jann Horn Signed-off-by: Thomas Gleixner Cc: Andy Shevchenko Cc: "H. Peter Anvin" Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180706215003.156702-1-jannh@google.com --- arch/x86/kernel/cpu/mtrr/if.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 4021d3859499..40eee6cc4124 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -106,7 +106,8 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) memset(line, 0, LINE_SIZE); - length = strncpy_from_user(line, buf, LINE_SIZE - 1); + len = min_t(size_t, len, LINE_SIZE - 1); + length = strncpy_from_user(line, buf, len); if (length < 0) return length; -- cgit v1.2.3 From ec58ba16e174d7ca24c8955a21cd0a53e0c32fdf Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel Date: Fri, 6 Jul 2018 11:32:37 +0100 Subject: ARC: [plat-hsdk]: Configure APB GPIO controller on ARC HSDK platform In case of HSDK we have intermediate INTC in for of DW APB GPIO controller which is used as a de-bounce logic for interrupt wires that come from outside the board. We cannot use existing "irq-dw-apb-ictl" driver here because all input lines are routed to corresponding output lines but not muxed into one line (this is configured in RTL and we cannot change this in software). But even if we add such a feature to "irq-dw-apb-ictl" driver that won't benefit us as higher-level INTC (in case of HSDK it is IDU) anyways has per-input control so adding fully-controller intermediate INTC will only bring some overhead on interrupt processing but no other benefits. Thus we just do one-time configuration of DW APB GPIO controller and forget about it. Based on implementation available on arch/arc/plat-axs10x/axs10x.c file. Acked-by: Alexey Brodkin Signed-off-by: Gustavo Pimentel Signed-off-by: Vineet Gupta --- arch/arc/plat-hsdk/platform.c | 62 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'arch') diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index 2958aedb649a..2588b842407c 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -42,6 +42,66 @@ static void __init hsdk_init_per_cpu(unsigned int cpu) #define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108) #define SDIO_UHS_REG_EXT_DIV_2 (2 << 30) +#define HSDK_GPIO_INTC (ARC_PERIPHERAL_BASE + 0x3000) + +static void __init hsdk_enable_gpio_intc_wire(void) +{ + /* + * Peripherals on CPU Card are wired to cpu intc via intermediate + * DW APB GPIO blocks (mainly for debouncing) + * + * --------------------- + * | snps,archs-intc | + * --------------------- + * | + * ---------------------- + * | snps,archs-idu-intc | + * ---------------------- + * | | | | | + * | [eth] [USB] [... other peripherals] + * | + * ------------------- + * | snps,dw-apb-intc | + * ------------------- + * | | | | + * [Bt] [HAPS] [... other peripherals] + * + * Current implementation of "irq-dw-apb-ictl" driver doesn't work well + * with stacked INTCs. In particular problem happens if its master INTC + * not yet instantiated. See discussion here - + * https://lkml.org/lkml/2015/3/4/755 + * + * So setup the first gpio block as a passive pass thru and hide it from + * DT hardware topology - connect intc directly to cpu intc + * The GPIO "wire" needs to be init nevertheless (here) + * + * One side adv is that peripheral interrupt handling avoids one nested + * intc ISR hop + * + * According to HSDK User's Manual [1], "Table 2 Interrupt Mapping" + * we have the following GPIO input lines used as sources of interrupt: + * - GPIO[0] - Bluetooth interrupt of RS9113 module + * - GPIO[2] - HAPS interrupt (on HapsTrak 3 connector) + * - GPIO[3] - Audio codec (MAX9880A) interrupt + * - GPIO[8-23] - Available on Arduino and PMOD_x headers + * For now there's no use of Arduino and PMOD_x headers in Linux + * use-case so we only enable lines 0, 2 and 3. + * + * [1] https://github.com/foss-for-synopsys-dwc-arc-processors/ARC-Development-Systems-Forum/wiki/docs/ARC_HSDK_User_Guide.pdf + */ +#define GPIO_INTEN (HSDK_GPIO_INTC + 0x30) +#define GPIO_INTMASK (HSDK_GPIO_INTC + 0x34) +#define GPIO_INTTYPE_LEVEL (HSDK_GPIO_INTC + 0x38) +#define GPIO_INT_POLARITY (HSDK_GPIO_INTC + 0x3c) +#define GPIO_INT_CONNECTED_MASK 0x0d + + iowrite32(0xffffffff, (void __iomem *) GPIO_INTMASK); + iowrite32(~GPIO_INT_CONNECTED_MASK, (void __iomem *) GPIO_INTMASK); + iowrite32(0x00000000, (void __iomem *) GPIO_INTTYPE_LEVEL); + iowrite32(0xffffffff, (void __iomem *) GPIO_INT_POLARITY); + iowrite32(GPIO_INT_CONNECTED_MASK, (void __iomem *) GPIO_INTEN); +} + static void __init hsdk_init_early(void) { /* @@ -62,6 +122,8 @@ static void __init hsdk_init_early(void) * minimum possible div-by-2. */ iowrite32(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *) SDIO_UHS_REG_EXT); + + hsdk_enable_gpio_intc_wire(); } static const char *hsdk_compat[] __initconst = { -- cgit v1.2.3 From e8708786d4fe21c043d38d760f768949a3d71185 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Jun 2018 17:22:05 +0300 Subject: ARC: Improve cmpxchg syscall implementation This is used in configs lacking hardware atomics to emulate atomic r-m-w for user space, implemented by disabling preemption in kernel. However there are issues in current implementation: 1. Process not terminated if invalid user pointer passed: i.e. __get_user() failed. 2. The reason for this patch was __put_user() failure not being handled either, specifically for the COW break scenario. The zero page is initially wired up and read from __get_user() succeeds. A subsequent write by __put_user() induces a Protection Violation, but COW can't finish as Linux page fault handler is disabled due to preempt disable. And what's worse is we silently return the stale value to user space. Fix this specific case by re-enabling preemption and explicitly fixing up the fault and retrying the whole sequence over. Cc: Max Filippov Cc: linux-arch@vger.kernel.org Signed-off-by: Alexey Brodkin Signed-off-by: Peter Zijlstra Signed-off-by: Vineet Gupta [vgupta: rewrote the changelog] --- arch/arc/kernel/process.c | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 5ac3b547453f..4674541eba3f 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -47,7 +47,8 @@ SYSCALL_DEFINE0(arc_gettls) SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new) { struct pt_regs *regs = current_pt_regs(); - int uval = -EFAULT; + u32 uval; + int ret; /* * This is only for old cores lacking LLOCK/SCOND, which by defintion @@ -60,23 +61,47 @@ SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new) /* Z indicates to userspace if operation succeded */ regs->status32 &= ~STATUS_Z_MASK; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; + ret = access_ok(VERIFY_WRITE, uaddr, sizeof(*uaddr)); + if (!ret) + goto fail; +again: preempt_disable(); - if (__get_user(uval, uaddr)) - goto done; + ret = __get_user(uval, uaddr); + if (ret) + goto fault; - if (uval == expected) { - if (!__put_user(new, uaddr)) - regs->status32 |= STATUS_Z_MASK; - } + if (uval != expected) + goto out; -done: - preempt_enable(); + ret = __put_user(new, uaddr); + if (ret) + goto fault; + + regs->status32 |= STATUS_Z_MASK; +out: + preempt_enable(); return uval; + +fault: + preempt_enable(); + + if (unlikely(ret != -EFAULT)) + goto fail; + + down_read(¤t->mm->mmap_sem); + ret = fixup_user_fault(current, current->mm, (unsigned long) uaddr, + FAULT_FLAG_WRITE, NULL); + up_read(¤t->mm->mmap_sem); + + if (likely(!ret)) + goto again; + +fail: + force_sig(SIGSEGV, current); + return ret; } #ifdef CONFIG_ISA_ARCV2 -- cgit v1.2.3 From 29c2068fdaa51c89af31ea6b83dd1108b2349816 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 26 Jun 2018 17:28:51 +0200 Subject: ARC: configs: remove no longer needed CONFIG_DEVPTS_MULTIPLE_INSTANCES Since commit eedf265aa003 ("devpts: Make each mount of devpts an independent filesystem.") CONFIG_DEVPTS_MULTIPLE_INSTANCES isn't needed in the defconfig anymore. Signed-off-by: Anders Roxell Signed-off-by: Vineet Gupta --- arch/arc/configs/tb10x_defconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index 1aca2e8fd1ba..2cc87f909747 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -56,7 +56,6 @@ CONFIG_STMMAC_ETH=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set -CONFIG_DEVPTS_MULTIPLE_INSTANCES=y # CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y -- cgit v1.2.3 From 64234961c145606b36eaa82c47b11be842b21049 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 6 Jun 2018 15:59:38 +0300 Subject: ARC: configs: Remove CONFIG_INITRAMFS_SOURCE from defconfigs We used to have pre-set CONFIG_INITRAMFS_SOURCE with local path to intramfs in ARC defconfigs. This was quite convenient for in-house development but not that convenient for newcomers who obviusly don't have folders like "arc_initramfs" next to the Linux source tree. Which leads to quite surprising failure of defconfig building: ------------------------------->8----------------------------- ../scripts/gen_initramfs_list.sh: Cannot open '../../arc_initramfs_hs/' ../usr/Makefile:57: recipe for target 'usr/initramfs_data.cpio.gz' failed make[2]: *** [usr/initramfs_data.cpio.gz] Error 1 ------------------------------->8----------------------------- So now when more and more people start to deal with our defconfigs let's make their life easier with removal of CONFIG_INITRAMFS_SOURCE. Signed-off-by: Alexey Brodkin Cc: Kevin Hilman Cc: stable@vger.kernel.org Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/configs/axs101_defconfig | 1 - arch/arc/configs/axs103_defconfig | 1 - arch/arc/configs/axs103_smp_defconfig | 1 - arch/arc/configs/haps_hs_defconfig | 1 - arch/arc/configs/haps_hs_smp_defconfig | 1 - arch/arc/configs/hsdk_defconfig | 1 - arch/arc/configs/nsim_700_defconfig | 1 - arch/arc/configs/nsim_hs_defconfig | 1 - arch/arc/configs/nsim_hs_smp_defconfig | 1 - arch/arc/configs/nsimosci_defconfig | 1 - arch/arc/configs/nsimosci_hs_defconfig | 1 - arch/arc/configs/nsimosci_hs_smp_defconfig | 1 - 12 files changed, 12 deletions(-) (limited to 'arch') diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index 09f85154c5a4..a635ea972304 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs/" CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index 09fed3ef22b6..aa507e423075 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index ea2f6d817d1a..eba07f468654 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig index ab231c040efe..098b19fbaa51 100644 --- a/arch/arc/configs/haps_hs_defconfig +++ b/arch/arc/configs/haps_hs_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" CONFIG_EXPERT=y CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig index cf449cbf440d..0104c404d897 100644 --- a/arch/arc/configs/haps_hs_smp_defconfig +++ b/arch/arc/configs/haps_hs_smp_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 1b54c72f4296..6491be0ddbc9 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -9,7 +9,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index 31c2c70b34a1..99e05cf63fca 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig index a578c721d50f..0dc4f9b737e7 100644 --- a/arch/arc/configs/nsim_hs_defconfig +++ b/arch/arc/configs/nsim_hs_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig index 37d7395f3272..be3c30a15e54 100644 --- a/arch/arc/configs/nsim_hs_smp_defconfig +++ b/arch/arc/configs/nsim_hs_smp_defconfig @@ -9,7 +9,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 1e1470e2a7f0..3a74b9b21772 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index 084a6e42685b..ea2834b4dc1d 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -11,7 +11,6 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/" CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index f36d47990415..80a5a1b4924b 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -9,7 +9,6 @@ CONFIG_IKCONFIG_PROC=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/" CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set CONFIG_KPROBES=y -- cgit v1.2.3 From ca1147fc2487335e9d1d7a931996eae176863a4c Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 27 Jun 2018 16:00:32 -0700 Subject: ARC: [arcompact] entry.S: minor code movement This is a non functional code changw, which moves r25 restore from macro into the caller of macro Signed-off-by: Vineet Gupta --- arch/arc/include/asm/entry-compact.h | 6 ++++++ arch/arc/include/asm/entry.h | 3 --- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index ec36d5b6d435..29f3988c9424 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -234,6 +234,9 @@ POP gp RESTORE_R12_TO_R0 +#ifdef CONFIG_ARC_CURR_IN_REG + ld r25, [sp, 12] +#endif ld sp, [sp] /* restore original sp */ /* orig_r0, ECR, user_r25 skipped automatically */ .endm @@ -315,6 +318,9 @@ POP gp RESTORE_R12_TO_R0 +#ifdef CONFIG_ARC_CURR_IN_REG + ld r25, [sp, 12] +#endif ld sp, [sp] /* restore original sp */ /* orig_r0, ECR, user_r25 skipped automatically */ .endm diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 51597f344a62..302b0db8ea2b 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -86,9 +86,6 @@ POP r1 POP r0 -#ifdef CONFIG_ARC_CURR_IN_REG - ld r25, [sp, 12] -#endif .endm /*-------------------------------------------------------------- -- cgit v1.2.3 From 6e3761145a9ba3ce267c330b6bff51cf6a057b06 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 28 Jun 2018 16:59:14 -0700 Subject: ARC: Fix CONFIG_SWAP swap was broken on ARC due to silly copy-paste issue. We encode offset from swapcache page in __swp_entry() as (off << 13) but were not decoding back in __swp_offset() as (off >> 13) - it was still (off << 13). This finally fixes swap usage on ARC. | # mkswap /dev/sda2 | | # swapon -a -e /dev/sda2 | Adding 500728k swap on /dev/sda2. Priority:-2 extents:1 across:500728k | | # free | total used free shared buffers cached | Mem: 765104 13456 751648 4736 8 4736 | -/+ buffers/cache: 8712 756392 | Swap: 500728 0 500728 Cc: stable@vger.kernel.org Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 08fe33830d4b..77676e18da69 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -379,7 +379,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, /* Decode a PTE containing swap "identifier "into constituents */ #define __swp_type(pte_lookalike) (((pte_lookalike).val) & 0x1f) -#define __swp_offset(pte_lookalike) ((pte_lookalike).val << 13) +#define __swp_offset(pte_lookalike) ((pte_lookalike).val >> 13) /* NOPs, to keep generic kernel happy */ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) -- cgit v1.2.3 From 96f95a17c1cfe65a002e525114d96616e91a8f2d Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 9 Jul 2018 13:09:56 -0700 Subject: Revert "arm64: Use aarch64elf and aarch64elfb emulation mode variants" This reverts commit 38fc4248677552ce35efc09902fdcb06b61d7ef9. Distributions such as Fedora and Debian do not package the ELF linker scripts with their toolchains, resulting in kernel build failures such as: | CHK include/generated/compile.h | LD [M] arch/arm64/crypto/sha512-ce.o | aarch64-linux-gnu-ld: cannot open linker script file ldscripts/aarch64elf.xr: No such file or directory | make[1]: *** [scripts/Makefile.build:530: arch/arm64/crypto/sha512-ce.o] Error 1 | make: *** [Makefile:1029: arch/arm64/crypto] Error 2 Revert back to the linux targets for now, adding a comment to the Makefile so we don't accidentally break this in the future. Cc: Paul Kocialkowski Cc: Fixes: 38fc42486775 ("arm64: Use aarch64elf and aarch64elfb emulation mode variants") Tested-by: Kevin Hilman Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7976d2d242fa..e7101b19d590 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -60,13 +60,15 @@ ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ AS += -EB -LDFLAGS += -EB -maarch64elfb +# We must use the linux target here, since distributions don't tend to package +# the ELF linker scripts with binutils, and this results in a build failure. +LDFLAGS += -EB -maarch64linuxb UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian CHECKFLAGS += -D__AARCH64EL__ AS += -EL -LDFLAGS += -EL -maarch64elf +LDFLAGS += -EL -maarch64linux # See comment above UTS_MACHINE := aarch64 endif -- cgit v1.2.3 From cea394772d3c41d04cb71a032f6ed878392bd134 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 18 Jun 2018 14:33:03 +0100 Subject: ARM: 8775/1: NOMMU: Use instr_sync instead of plain isb in common code Greg reported that commit 3c24121039c9d ("ARM: 8756/1: NOMMU: Postpone MPU activation till __after_proc_init") is causing breakage for the old Versatile platform in no-MMU mode (with out-of-tree patches): AS arch/arm/kernel/head-nommu.o arch/arm/kernel/head-nommu.S: Assembler messages: arch/arm/kernel/head-nommu.S:180: Error: selected processor does not support `isb' in ARM mode scripts/Makefile.build:417: recipe for target 'arch/arm/kernel/head-nommu.o' failed make[2]: *** [arch/arm/kernel/head-nommu.o] Error 1 Makefile:1034: recipe for target 'arch/arm/kernel' failed make[1]: *** [arch/arm/kernel] Error 2 Since the code is common for all NOMMU builds usage of the isb was a bad idea (please, note that isb also used in MPU related code which is fine because MPU has dependency on CPU_V7/CPU_V7M), instead use more robust instr_sync assembler macro. Fixes: 3c24121039c9 ("ARM: 8756/1: NOMMU: Postpone MPU activation till __after_proc_init") Reported-by: Greg Ungerer Tested-by: Greg Ungerer Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/kernel/head-nommu.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index dd546d65a383..7a9b86978ee1 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -177,7 +177,7 @@ M_CLASS(streq r3, [r12, #PMSAv8_MAIR1]) bic r0, r0, #CR_I #endif mcr p15, 0, r0, c1, c0, 0 @ write control reg - isb + instr_sync #elif defined (CONFIG_CPU_V7M) #ifdef CONFIG_ARM_MPU ldreq r3, [r12, MPU_CTRL] -- cgit v1.2.3 From e296701800f30d260a66f8aa1971b5b1bc3d2f81 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 11 Jul 2018 11:02:35 +0200 Subject: efi/x86: Fix mixed mode reboot loop by removing pointless call to PciIo->Attributes() Hans de Goede reported that his mixed EFI mode Bay Trail tablet would not boot at all any more, but enter a reboot loop without any logs printed by the kernel. Unbreak 64-bit Linux/x86 on 32-bit UEFI: When it was first introduced, the EFI stub code that copies the contents of PCI option ROMs originally only intended to do so if the EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM attribute was *not* set. The reason was that the UEFI spec permits PCI option ROM images to be provided by the platform directly, rather than via the ROM BAR, and in this case, the OS can only access them at runtime if they are preserved at boot time by copying them from the areas described by PciIo->RomImage and PciIo->RomSize. However, it implemented this check erroneously, as can be seen in commit: dd5fc854de5fd ("EFI: Stash ROMs if they're not in the PCI BAR") which introduced: if (!attributes & EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM) continue; and given that the numeric value of EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM is 0x4000, this condition never becomes true, and so the option ROMs were copied unconditionally. This was spotted and 'fixed' by commit: 886d751a2ea99a160 ("x86, efi: correct precedence of operators in setup_efi_pci") but inadvertently inverted the logic at the same time, defeating the purpose of the code, since it now only preserves option ROM images that can be read from the ROM BAR as well. Unsurprisingly, this broke some systems, and so the check was removed entirely in the following commit: 739701888f5d ("x86, efi: remove attribute check from setup_efi_pci") It is debatable whether this check should have been included in the first place, since the option ROM image provided to the UEFI driver by the firmware may be different from the one that is actually present in the card's flash ROM, and so whatever PciIo->RomImage points at should be preferred regardless of whether the attribute is set. As this was the only use of the attributes field, we can remove the call to PciIo->Attributes() entirely, which is especially nice because its prototype involves uint64_t type by-value arguments which the EFI mixed mode has trouble dealing with. Any mixed mode system with PCI is likely to be affected. Tested-by: Wilfried Klaebe Tested-by: Hans de Goede Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20180711090235.9327-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index e57665b4ba1c..e98522ea6f09 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -114,18 +114,12 @@ __setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) struct pci_setup_rom *rom = NULL; efi_status_t status; unsigned long size; - uint64_t attributes, romsize; + uint64_t romsize; void *romimage; - status = efi_call_proto(efi_pci_io_protocol, attributes, pci, - EfiPciIoAttributeOperationGet, 0ULL, - &attributes); - if (status != EFI_SUCCESS) - return status; - /* - * Some firmware images contain EFI function pointers at the place where the - * romimage and romsize fields are supposed to be. Typically the EFI + * Some firmware images contain EFI function pointers at the place where + * the romimage and romsize fields are supposed to be. Typically the EFI * code is mapped at high addresses, translating to an unrealistically * large romsize. The UEFI spec limits the size of option ROMs to 16 * MiB so we reject any ROMs over 16 MiB in size to catch this. -- cgit v1.2.3 From 2fd8eb4ad87104c54800ef3cea498c92eb15c78a Mon Sep 17 00:00:00 2001 From: Yandong Zhao Date: Wed, 11 Jul 2018 19:06:28 +0800 Subject: arm64: neon: Fix function may_use_simd() return error status It does not matter if the caller of may_use_simd() migrates to another cpu after the call, but it is still important that the kernel_neon_busy percpu instance that is read matches the cpu the task is running on at the time of the read. This means that raw_cpu_read() is not sufficient. kernel_neon_busy may appear true if the caller migrates during the execution of raw_cpu_read() and the next task to be scheduled in on the initial cpu calls kernel_neon_begin(). This patch replaces raw_cpu_read() with this_cpu_read() to protect against this race. Cc: Fixes: cb84d11e1625 ("arm64: neon: Remove support for nested or hardirq kernel-mode NEON") Acked-by: Ard Biesheuvel Reviewed-by: Dave Martin Reviewed-by: Mark Rutland Signed-off-by: Yandong Zhao Signed-off-by: Will Deacon --- arch/arm64/include/asm/simd.h | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index fa8b3fe932e6..6495cc51246f 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -29,20 +29,15 @@ DECLARE_PER_CPU(bool, kernel_neon_busy); static __must_check inline bool may_use_simd(void) { /* - * The raw_cpu_read() is racy if called with preemption enabled. - * This is not a bug: kernel_neon_busy is only set when - * preemption is disabled, so we cannot migrate to another CPU - * while it is set, nor can we migrate to a CPU where it is set. - * So, if we find it clear on some CPU then we're guaranteed to - * find it clear on any CPU we could migrate to. - * - * If we are in between kernel_neon_begin()...kernel_neon_end(), - * the flag will be set, but preemption is also disabled, so we - * can't migrate to another CPU and spuriously see it become - * false. + * kernel_neon_busy is only set while preemption is disabled, + * and is clear whenever preemption is enabled. Since + * this_cpu_read() is atomic w.r.t. preemption, kernel_neon_busy + * cannot change under our feet -- if it's set we cannot be + * migrated, and if it's clear we cannot be migrated to a CPU + * where it is set. */ return !in_irq() && !irqs_disabled() && !in_nmi() && - !raw_cpu_read(kernel_neon_busy); + !this_cpu_read(kernel_neon_busy); } #else /* ! CONFIG_KERNEL_MODE_NEON */ -- cgit v1.2.3 From 93312b6da4df31e4102ce5420e6217135a16c7ea Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 11 Jul 2018 10:42:20 -0700 Subject: ARC: mm: allow mprotect to make stack mappings executable mprotect(EXEC) was failing for stack mappings as default vm flags was missing MAYEXEC. This was triggered by glibc test suite nptl/tst-execstack testcase What is surprising is that despite running LTP for years on, we didn't catch this issue as it lacks a directed test case. gcc dejagnu tests with nested functions also requiring exec stack work fine though because they rely on the GNU_STACK segment spit out by compiler and handled in kernel elf loader. This glibc case is different as the stack is non exec to begin with and a dlopen of shared lib with GNU_STACK segment triggers the exec stack proceedings using a mprotect(PROT_EXEC) which was broken. CC: stable@vger.kernel.org Signed-off-by: Vineet Gupta --- arch/arc/include/asm/page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 109baa06831c..09ddddf71cc5 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -105,7 +105,7 @@ typedef pte_t * pgtable_t; #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) /* Default Permissions for stack/heaps pages (Non Executable) */ -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE) +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #define WANT_PAGE_VIRTUAL 1 -- cgit v1.2.3 From b4c7e2bd2eb4764afe3af9409ff3b1b87116fa30 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 10 Jul 2018 08:22:40 +0100 Subject: ARM: 8780/1: ftrace: Only set kernel memory back to read-only after boot Dynamic ftrace requires modifying the code segments that are usually set to read-only. To do this, a per arch function is called both before and after the ftrace modifications are performed. The "before" function will set kernel code text to read-write to allow for ftrace to make the modifications, and the "after" function will set the kernel code text back to "read-only" to keep the kernel code text protected. The issue happens when dynamic ftrace is tested at boot up. The test is done before the kernel code text has been set to read-only. But the "before" and "after" calls are still performed. The "after" call will change the kernel code text to read-only prematurely, and other boot code that expects this code to be read-write will fail. The solution is to add a variable that is set when the kernel code text is expected to be converted to read-only, and make the ftrace "before" and "after" calls do nothing if that variable is not yet set. This is similar to the x86 solution from commit 162396309745 ("ftrace, x86: make kernel text writable only for conversions"). Link: http://lkml.kernel.org/r/20180620212906.24b7b66e@vmware.local.home Reported-by: Stefan Agner Tested-by: Stefan Agner Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Russell King --- arch/arm/mm/init.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index c186474422f3..0cc8e04295a4 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -736,20 +736,29 @@ static int __mark_rodata_ro(void *unused) return 0; } +static int kernel_set_to_readonly __read_mostly; + void mark_rodata_ro(void) { + kernel_set_to_readonly = 1; stop_machine(__mark_rodata_ro, NULL, NULL); debug_checkwx(); } void set_kernel_text_rw(void) { + if (!kernel_set_to_readonly) + return; + set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), false, current->active_mm); } void set_kernel_text_ro(void) { + if (!kernel_set_to_readonly) + return; + set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), true, current->active_mm); } -- cgit v1.2.3 From e69b5d308da72cbf4e7911c3979f9a46d28532af Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 2 Jul 2018 12:00:18 +0200 Subject: xen: remove global bit from __default_kernel_pte_mask for pv guests When removing the global bit from __supported_pte_mask do the same for __default_kernel_pte_mask in order to avoid the WARN_ONCE() in check_pgprot() when setting a kernel pte before having called init_mem_mapping(). Cc: # 4.17 Reported-by: Michael Young Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 8d4e2e1ae60b..4816b6f82a9a 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1229,6 +1229,7 @@ asmlinkage __visible void __init xen_start_kernel(void) /* Prevent unwanted bits from being set in PTEs. */ __supported_pte_mask &= ~_PAGE_GLOBAL; + __default_kernel_pte_mask &= ~_PAGE_GLOBAL; /* * Prevent page tables from being allocated in highmem, even -- cgit v1.2.3 From 2f8b5b21830aea95989a6e67d8a971297272a086 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 10 Jul 2018 14:47:25 -0500 Subject: ARM: DRA7/OMAP5: Enable ACTLR[0] (Enable invalidates of BTB) for secondary cores Call secure services to enable ACTLR[0] (Enable invalidates of BTB with ICIALLU) when branch hardening is enabled for kernel. On GP devices OMAP5/DRA7, there is no possibility to update secure side since "secure world" is ROM and there are no override mechanisms possible. On HS devices, appropriate PPA should do the workarounds as well. However, the configuration is only done for secondary core, since it is expected that firmware/bootloader will have enabled the required configuration for the primary boot core (note: bootloaders typically will NOT enable secondary processors, since it has no need to do so). Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap-smp.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index 69df3620eca5..1c73694c871a 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -109,6 +109,45 @@ void omap5_erratum_workaround_801819(void) static inline void omap5_erratum_workaround_801819(void) { } #endif +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +/* + * Configure ACR and enable ACTLR[0] (Enable invalidates of BTB with + * ICIALLU) to activate the workaround for secondary Core. + * NOTE: it is assumed that the primary core's configuration is done + * by the boot loader (kernel will detect a misconfiguration and complain + * if this is not done). + * + * In General Purpose(GP) devices, ACR bit settings can only be done + * by ROM code in "secure world" using the smc call and there is no + * option to update the "firmware" on such devices. This also works for + * High security(HS) devices, as a backup option in case the + * "update" is not done in the "security firmware". + */ +static void omap5_secondary_harden_predictor(void) +{ + u32 acr, acr_mask; + + asm volatile ("mrc p15, 0, %0, c1, c0, 1" : "=r" (acr)); + + /* + * ACTLR[0] (Enable invalidates of BTB with ICIALLU) + */ + acr_mask = BIT(0); + + /* Do we already have it done.. if yes, skip expensive smc */ + if ((acr & acr_mask) == acr_mask) + return; + + acr |= acr_mask; + omap_smc1(OMAP5_DRA7_MON_SET_ACR_INDEX, acr); + + pr_debug("%s: ARM ACR setup for CVE_2017_5715 applied on CPU%d\n", + __func__, smp_processor_id()); +} +#else +static inline void omap5_secondary_harden_predictor(void) { } +#endif + static void omap4_secondary_init(unsigned int cpu) { /* @@ -131,6 +170,8 @@ static void omap4_secondary_init(unsigned int cpu) set_cntfreq(); /* Configure ACR to disable streaming WA for 801819 */ omap5_erratum_workaround_801819(); + /* Enable ACR to allow for ICUALLU workaround */ + omap5_secondary_harden_predictor(); } /* -- cgit v1.2.3 From 923847413f7316b5ced3491769b3fefa6c56a79a Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Wed, 11 Jul 2018 12:54:54 -0500 Subject: ARM: dts: am3517.dtsi: Disable reference to OMAP3 OTG controller The AM3517 has a different OTG controller location than the OMAP3, which is included from omap3.dtsi. This results in a hwmod error. Since the AM3517 has a different OTG controller address, this patch disabes one that is isn't available. Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am3517.dtsi | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/am3517.dtsi b/arch/arm/boot/dts/am3517.dtsi index 4b6062b631b1..23ea381d363f 100644 --- a/arch/arm/boot/dts/am3517.dtsi +++ b/arch/arm/boot/dts/am3517.dtsi @@ -91,6 +91,11 @@ }; }; +/* Table Table 5-79 of the TRM shows 480ab000 is reserved */ +&usb_otg_hs { + status = "disabled"; +}; + &iva { status = "disabled"; }; -- cgit v1.2.3 From 0ce0bba4e5e0eb9b753bb821785de5d23c494392 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 12 Jul 2018 17:40:34 +0200 Subject: xen: setup pv irq ops vector earlier Setting pv_irq_ops for Xen PV domains should be done as early as possible in order to support e.g. very early printk() usage. The same applies to xen_vcpu_info_reset(0), as it is needed for the pv irq ops. Move the call of xen_setup_machphys_mapping() after initializing the pv functions as it contains a WARN_ON(), too. Remove the no longer necessary conditional in xen_init_irq_ops() from PVH V1 times to make clear this is a PV only function. Cc: # 4.14 Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 24 +++++++++++------------- arch/x86/xen/irq.c | 4 +--- 2 files changed, 12 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 4816b6f82a9a..439a94bf89ad 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1207,12 +1207,20 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_setup_features(); - xen_setup_machphys_mapping(); - /* Install Xen paravirt ops */ pv_info = xen_info; pv_init_ops.patch = paravirt_patch_default; pv_cpu_ops = xen_cpu_ops; + xen_init_irq_ops(); + + /* + * Setup xen_vcpu early because it is needed for + * local_irq_disable(), irqs_disabled(), e.g. in printk(). + * + * Don't do the full vcpu_info placement stuff until we have + * the cpu_possible_mask and a non-dummy shared_info. + */ + xen_vcpu_info_reset(0); x86_platform.get_nmi_reason = xen_get_nmi_reason; @@ -1225,6 +1233,7 @@ asmlinkage __visible void __init xen_start_kernel(void) * Set up some pagetable state before starting to set any ptes. */ + xen_setup_machphys_mapping(); xen_init_mmu_ops(); /* Prevent unwanted bits from being set in PTEs. */ @@ -1250,20 +1259,9 @@ asmlinkage __visible void __init xen_start_kernel(void) get_cpu_cap(&boot_cpu_data); x86_configure_nx(); - xen_init_irq_ops(); - /* Let's presume PV guests always boot on vCPU with id 0. */ per_cpu(xen_vcpu_id, 0) = 0; - /* - * Setup xen_vcpu early because idt_setup_early_handler needs it for - * local_irq_disable(), irqs_disabled(). - * - * Don't do the full vcpu_info placement stuff until we have - * the cpu_possible_mask and a non-dummy shared_info. - */ - xen_vcpu_info_reset(0); - idt_setup_early_handler(); xen_init_capabilities(); diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 74179852e46c..7515a19fd324 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -128,8 +128,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = { void __init xen_init_irq_ops(void) { - /* For PVH we use default pv_irq_ops settings. */ - if (!xen_feature(XENFEAT_hvm_callback_vector)) - pv_irq_ops = xen_irq_ops; + pv_irq_ops = xen_irq_ops; x86_init.irqs.intr_init = xen_init_IRQ; } -- cgit v1.2.3 From fa8cbda88db12e632a8987c94b66f5caf25bcec4 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Fri, 13 Jul 2018 16:59:09 -0700 Subject: x86/purgatory: add missing FORCE to Makefile target - Build the kernel without the fix - Add some flag to the purgatories KBUILD_CFLAGS,I used -fno-asynchronous-unwind-tables - Re-build the kernel When you look at makes output you see that sha256.o is not re-build in the last step. Also readelf -S still shows the .eh_frame section for sha256.o. With the fix sha256.o is rebuilt in the last step. Without FORCE make does not detect changes only made to the command line options. So object files might not be re-built even when they should be. Fix this by adding FORCE where it is missing. Link: http://lkml.kernel.org/r/20180704110044.29279-2-prudo@linux.ibm.com Fixes: df6f2801f511 ("kernel/kexec_file.c: move purgatories sha256 to common code") Signed-off-by: Philipp Rudo Acked-by: Dave Young Cc: Ingo Molnar Cc: Thomas Gleixner Cc: [4.17+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/purgatory/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 2e9ee023e6bc..81a8e33115ad 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -6,7 +6,7 @@ purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string targets += $(purgatory-y) PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) -$(obj)/sha256.o: $(srctree)/lib/sha256.c +$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE $(call if_changed_rule,cc_o_c) LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib -- cgit v1.2.3 From 2c991e408df6a407476dbc453d725e1e975479e7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 14 Jul 2018 12:58:07 -0700 Subject: x86/events/intel/ds: Fix bts_interrupt_threshold alignment Markus reported that BTS is sporadically missing the tail of the trace in the perf_event data buffer: [decode error (1): instruction overflow] shown in GDB; and bisected it to the conversion of debug_store to PTI. A little "optimization" crept into alloc_bts_buffer(), which mistakenly placed bts_interrupt_threshold away from the 24-byte record boundary. Intel SDM Vol 3B 17.4.9 says "This address must point to an offset from the BTS buffer base that is a multiple of the BTS record size." Revert "max" from a byte count to a record count, to calculate the bts_interrupt_threshold correctly: which turns out to fix problem seen. Fixes: c1961a4631da ("x86/events/intel/ds: Map debug buffers in cpu_entry_area") Reported-and-tested-by: Markus T Metzger Signed-off-by: Hugh Dickins Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Dave Hansen Cc: Stephane Eranian Cc: stable@vger.kernel.org # v4.14+ Link: https://lkml.kernel.org/r/alpine.LSU.2.11.1807141248290.1614@eggly.anvils --- arch/x86/events/intel/ds.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 8a10a045b57b..8cf03f101938 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -408,9 +408,11 @@ static int alloc_bts_buffer(int cpu) ds->bts_buffer_base = (unsigned long) cea; ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL); ds->bts_index = ds->bts_buffer_base; - max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE); - ds->bts_absolute_maximum = ds->bts_buffer_base + max; - ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16); + max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; + ds->bts_absolute_maximum = ds->bts_buffer_base + + max * BTS_RECORD_SIZE; + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - + (max / 16) * BTS_RECORD_SIZE; return 0; } -- cgit v1.2.3 From cd28325249a1ca0d771557ce823e0308ad629f98 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 25 Jun 2018 14:04:37 +0200 Subject: KVM: VMX: support MSR_IA32_ARCH_CAPABILITIES as a feature MSR This lets userspace read the MSR_IA32_ARCH_CAPABILITIES and check that all requested features are available on the host. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 71e7cda6d014..b91c1e1ff459 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1092,6 +1092,7 @@ static u32 msr_based_features[] = { MSR_F10H_DECFG, MSR_IA32_UCODE_REV, + MSR_IA32_ARCH_CAPABILITIES, }; static unsigned int num_msr_based_features; @@ -1100,7 +1101,8 @@ static int kvm_get_msr_feature(struct kvm_msr_entry *msr) { switch (msr->index) { case MSR_IA32_UCODE_REV: - rdmsrl(msr->index, msr->data); + case MSR_IA32_ARCH_CAPABILITIES: + rdmsrl_safe(msr->index, &msr->data); break; default: if (kvm_x86_ops->get_msr_feature(msr)) -- cgit v1.2.3 From b062b794c7831a70bda4dfac202c1a9418e06ac0 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 11 Jul 2018 19:37:18 +0200 Subject: x86/kvm/vmx: don't read current->thread.{fs,gs}base of legacy tasks When we switched from doing rdmsr() to reading FS/GS base values from current->thread we completely forgot about legacy 32-bit userspaces which we still support in KVM (why?). task->thread.{fsbase,gsbase} are only synced for 64-bit processes, calling save_fsgs_for_kvm() and using its result from current is illegal for legacy processes. There's no ARCH_SET_FS/GS prctls for legacy applications. Base MSRs are, however, not always equal to zero. Intel's manual says (3.4.4 Segment Loading Instructions in IA-32e Mode): "In order to set up compatibility mode for an application, segment-load instructions (MOV to Sreg, POP Sreg) work normally in 64-bit mode. An entry is read from the system descriptor table (GDT or LDT) and is loaded in the hidden portion of the segment register. ... The hidden descriptor register fields for FS.base and GS.base are physically mapped to MSRs in order to load all address bits supported by a 64-bit implementation. " The issue was found by strace test suite where 32-bit ioctl_kvm_run test started segfaulting. Reported-by: Dmitry V. Levin Bisected-by: Masatake YAMATO Fixes: 42b933b59721 ("x86/kvm/vmx: read MSR_{FS,KERNEL_GS}_BASE from current->thread") Cc: stable@vger.kernel.org Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 40aa29204baf..12ed6a8f6287 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2365,6 +2365,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); #ifdef CONFIG_X86_64 int cpu = raw_smp_processor_id(); + unsigned long fs_base, kernel_gs_base; #endif int i; @@ -2380,12 +2381,20 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; #ifdef CONFIG_X86_64 - save_fsgs_for_kvm(); - vmx->host_state.fs_sel = current->thread.fsindex; - vmx->host_state.gs_sel = current->thread.gsindex; -#else - savesegment(fs, vmx->host_state.fs_sel); - savesegment(gs, vmx->host_state.gs_sel); + if (likely(is_64bit_mm(current->mm))) { + save_fsgs_for_kvm(); + vmx->host_state.fs_sel = current->thread.fsindex; + vmx->host_state.gs_sel = current->thread.gsindex; + fs_base = current->thread.fsbase; + kernel_gs_base = current->thread.gsbase; + } else { +#endif + savesegment(fs, vmx->host_state.fs_sel); + savesegment(gs, vmx->host_state.gs_sel); +#ifdef CONFIG_X86_64 + fs_base = read_msr(MSR_FS_BASE); + kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); + } #endif if (!(vmx->host_state.fs_sel & 7)) { vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); @@ -2405,10 +2414,10 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) savesegment(ds, vmx->host_state.ds_sel); savesegment(es, vmx->host_state.es_sel); - vmcs_writel(HOST_FS_BASE, current->thread.fsbase); + vmcs_writel(HOST_FS_BASE, fs_base); vmcs_writel(HOST_GS_BASE, cpu_kernelmode_gs_base(cpu)); - vmx->msr_host_kernel_gs_base = current->thread.gsbase; + vmx->msr_host_kernel_gs_base = kernel_gs_base; if (is_long_mode(&vmx->vcpu)) wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); #else -- cgit v1.2.3 From 0b88abdc3f964c28ec03bc69eb17cb6b3b034564 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 30 May 2018 16:00:02 -0700 Subject: kvm: nVMX: Restore exit qual for VM-entry failure due to MSR loading This exit qualification was inadvertently dropped when the two VM-entry failure blocks were coalesced. Fixes: e79f245ddec1 ("X86/KVM: Properly update 'tsc_offset' to represent the running guest") Signed-off-by: Jim Mattson Reviewed-by: Krish Sadhukhan Reviewed-by: David Hildenbrand Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 12ed6a8f6287..ba981459d706 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11431,7 +11431,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - u32 msr_entry_idx; u32 exit_qual; int r; @@ -11453,10 +11452,10 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) nested_get_vmcs12_pages(vcpu, vmcs12); r = EXIT_REASON_MSR_LOAD_FAIL; - msr_entry_idx = nested_vmx_load_msr(vcpu, - vmcs12->vm_entry_msr_load_addr, - vmcs12->vm_entry_msr_load_count); - if (msr_entry_idx) + exit_qual = nested_vmx_load_msr(vcpu, + vmcs12->vm_entry_msr_load_addr, + vmcs12->vm_entry_msr_load_count); + if (exit_qual) goto fail; /* -- cgit v1.2.3 From d30f370d3a4998c13ed3e5c8ef607d05be0a987a Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Wed, 27 Jun 2018 11:30:53 -0500 Subject: x86/kvm/Kconfig: Ensure CRYPTO_DEV_CCP_DD state at minimum matches KVM_AMD Prevent a config where KVM_AMD=y and CRYPTO_DEV_CCP_DD=m thereby ensuring that AMD Secure Processor device driver will be built-in when KVM_AMD is also built-in. v1->v2: * Removed usage of 'imply' Kconfig option. * Change patch commit message. Fixes: 505c9e94d832 ("KVM: x86: prefer "depends on" to "select" for SEV") Cc: # 4.16.x Signed-off-by: Janakarajan Natarajan Reviewed-by: Brijesh Singh Signed-off-by: Paolo Bonzini --- arch/x86/kvm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 92fd433c50b9..1bbec387d289 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -85,7 +85,7 @@ config KVM_AMD_SEV def_bool y bool "AMD Secure Encrypted Virtualization (SEV) support" depends on KVM_AMD && X86_64 - depends on CRYPTO_DEV_CCP && CRYPTO_DEV_CCP_DD && CRYPTO_DEV_SP_PSP + depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) ---help--- Provides support for launching Encrypted VMs on AMD processors. -- cgit v1.2.3 From 94ffba484663ab3fc695ce2a34871e8c3db499f7 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Sun, 15 Jul 2018 17:43:11 +0200 Subject: x86/kvmclock: set pvti_cpu0_va after enabling kvmclock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pvti_cpu0_va is the address of shared kvmclock data structure. pvti_cpu0_va is currently kept unset (1) on 32 bit systems, (2) when kvmclock vsyscall is disabled, and (3) if kvmclock is not stable. This poses a problem, because kvm_ptp needs pvti_cpu0_va, but (1) can work on 32 bit, (2) has little relation to the vsyscall, and (3) does not need stable kvmclock (although kvmclock won't be used for system clock if it's not stable, so kvm_ptp is pointless in that case). Expose pvti_cpu0_va whenever kvmclock is enabled to allow all users to work with it. This fixes a regression found on Gentoo: https://bugs.gentoo.org/658544. Fixes: 9f08890ab906 ("x86/pvclock: add setter for pvclock_pvti_cpu0_va") Cc: stable@vger.kernel.org Reported-by: Andreas Steinmetz Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvmclock.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 8b26c9e01cc4..d79a18b4cf9d 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -319,6 +319,8 @@ void __init kvmclock_init(void) printk(KERN_INFO "kvm-clock: Using msrs %x and %x", msr_kvm_system_time, msr_kvm_wall_clock); + pvclock_set_pvti_cpu0_va(hv_clock); + if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); @@ -366,14 +368,11 @@ int __init kvm_setup_vsyscall_timeinfo(void) vcpu_time = &hv_clock[cpu].pvti; flags = pvclock_read_flags(vcpu_time); - if (!(flags & PVCLOCK_TSC_STABLE_BIT)) { - put_cpu(); - return 1; - } - - pvclock_set_pvti_cpu0_va(hv_clock); put_cpu(); + if (!(flags & PVCLOCK_TSC_STABLE_BIT)) + return 1; + kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; #endif return 0; -- cgit v1.2.3 From 092b31aa2048cf7561a39697974adcd147fbb27b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 8 Jul 2018 13:46:17 -0700 Subject: x86/asm/memcpy_mcsafe: Fix copy_to_user_mcsafe() exception handling All copy_to_user() implementations need to be prepared to handle faults accessing userspace. The __memcpy_mcsafe() implementation handles both mmu-faults on the user destination and machine-check-exceptions on the source buffer. However, the memcpy_mcsafe() wrapper may silently fallback to memcpy() depending on build options and cpu-capabilities. Force copy_to_user_mcsafe() to always use __memcpy_mcsafe() when available, and otherwise disable all of the copy_to_user_mcsafe() infrastructure when __memcpy_mcsafe() is not available, i.e. CONFIG_X86_MCE=n. This fixes crashes of the form: run fstests generic/323 at 2018-07-02 12:46:23 BUG: unable to handle kernel paging request at 00007f0d50001000 RIP: 0010:__memcpy+0x12/0x20 [..] Call Trace: copyout_mcsafe+0x3a/0x50 _copy_to_iter_mcsafe+0xa1/0x4a0 ? dax_alive+0x30/0x50 dax_iomap_actor+0x1f9/0x280 ? dax_iomap_rw+0x100/0x100 iomap_apply+0xba/0x130 ? dax_iomap_rw+0x100/0x100 dax_iomap_rw+0x95/0x100 ? dax_iomap_rw+0x100/0x100 xfs_file_dax_read+0x7b/0x1d0 [xfs] xfs_file_read_iter+0xa7/0xc0 [xfs] aio_read+0x11c/0x1a0 Reported-by: Ross Zwisler Tested-by: Ross Zwisler Signed-off-by: Dan Williams Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Fixes: 8780356ef630 ("x86/asm/memcpy_mcsafe: Define copy_to_iter_mcsafe()") Link: http://lkml.kernel.org/r/153108277790.37979.1486841789275803399.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/uaccess_64.h | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f1dbb4ee19d7..887d3a7bb646 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -63,7 +63,7 @@ config X86 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 - select ARCH_HAS_UACCESS_MCSAFE if X86_64 + select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 62acb613114b..a9d637bc301d 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -52,7 +52,12 @@ copy_to_user_mcsafe(void *to, const void *from, unsigned len) unsigned long ret; __uaccess_begin(); - ret = memcpy_mcsafe(to, from, len); + /* + * Note, __memcpy_mcsafe() is explicitly used since it can + * handle exceptions / faults. memcpy_mcsafe() may fall back to + * memcpy() which lacks this handling. + */ + ret = __memcpy_mcsafe(to, from, len); __uaccess_end(); return ret; } -- cgit v1.2.3 From 38c0a74fe06da3be133cae3fb7bde6a9438e698b Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 12 Jul 2018 09:33:04 -0700 Subject: MIPS: Fix off-by-one in pci_resource_to_user() The MIPS implementation of pci_resource_to_user() introduced in v3.12 by commit 4c2924b725fb ("MIPS: PCI: Use pci_resource_to_user to map pci memory space properly") incorrectly sets *end to the address of the byte after the resource, rather than the last byte of the resource. This results in userland seeing resources as a byte larger than they actually are, for example a 32 byte BAR will be reported by a tool such as lspci as being 33 bytes in size: Region 2: I/O ports at 1000 [disabled] [size=33] Correct this by subtracting one from the calculated end address, reporting the correct address to userland. Signed-off-by: Paul Burton Reported-by: Rui Wang Fixes: 4c2924b725fb ("MIPS: PCI: Use pci_resource_to_user to map pci memory space properly") Cc: James Hogan Cc: Ralf Baechle Cc: Wolfgang Grandegger Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v3.12+ Patchwork: https://patchwork.linux-mips.org/patch/19829/ --- arch/mips/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index 9632436d74d7..c2e94cf5ecda 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -54,5 +54,5 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar, phys_addr_t size = resource_size(rsrc); *start = fixup_bigphys_addr(rsrc->start, size); - *end = rsrc->start + size; + *end = rsrc->start + size - 1; } -- cgit v1.2.3 From 6f6060a5c9cc76fdbc22748264e6aa3779ec2427 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 9 Jul 2018 16:35:34 +0300 Subject: x86/apm: Don't access __preempt_count with zeroed fs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit APM_DO_POP_SEGS does not restore fs/gs which were zeroed by APM_DO_ZERO_SEGS. Trying to access __preempt_count with zeroed fs doesn't really work. Move the ibrs call outside the APM_DO_SAVE_SEGS/APM_DO_RESTORE_SEGS invocations so that fs is actually restored before calling preempt_enable(). Fixes the following sort of oopses: [ 0.313581] general protection fault: 0000 [#1] PREEMPT SMP [ 0.313803] Modules linked in: [ 0.314040] CPU: 0 PID: 268 Comm: kapmd Not tainted 4.16.0-rc1-triton-bisect-00090-gdd84441a7971 #19 [ 0.316161] EIP: __apm_bios_call_simple+0xc8/0x170 [ 0.316161] EFLAGS: 00210016 CPU: 0 [ 0.316161] EAX: 00000102 EBX: 00000000 ECX: 00000102 EDX: 00000000 [ 0.316161] ESI: 0000530e EDI: dea95f64 EBP: dea95f18 ESP: dea95ef0 [ 0.316161] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 [ 0.316161] CR0: 80050033 CR2: 00000000 CR3: 015d3000 CR4: 000006d0 [ 0.316161] Call Trace: [ 0.316161] ? cpumask_weight.constprop.15+0x20/0x20 [ 0.316161] on_cpu0+0x44/0x70 [ 0.316161] apm+0x54e/0x720 [ 0.316161] ? __switch_to_asm+0x26/0x40 [ 0.316161] ? __schedule+0x17d/0x590 [ 0.316161] kthread+0xc0/0xf0 [ 0.316161] ? proc_apm_show+0x150/0x150 [ 0.316161] ? kthread_create_worker_on_cpu+0x20/0x20 [ 0.316161] ret_from_fork+0x2e/0x38 [ 0.316161] Code: da 8e c2 8e e2 8e ea 57 55 2e ff 1d e0 bb 5d b1 0f 92 c3 5d 5f 07 1f 89 47 0c 90 8d b4 26 00 00 00 00 90 8d b4 26 00 00 00 00 90 <64> ff 0d 84 16 5c b1 74 7f 8b 45 dc 8e e0 8b 45 d8 8e e8 8b 45 [ 0.316161] EIP: __apm_bios_call_simple+0xc8/0x170 SS:ESP: 0068:dea95ef0 [ 0.316161] ---[ end trace 656253db2deaa12c ]--- Fixes: dd84441a7971 ("x86/speculation: Use IBRS if available before calling into firmware") Signed-off-by: Ville Syrjälä Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Cc: David Woodhouse Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: David Woodhouse Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180709133534.5963-1-ville.syrjala@linux.intel.com --- arch/x86/include/asm/apm.h | 6 ------ arch/x86/kernel/apm_32.c | 5 +++++ 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index c356098b6fb9..4d4015ddcf26 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -7,8 +7,6 @@ #ifndef _ASM_X86_MACH_DEFAULT_APM_H #define _ASM_X86_MACH_DEFAULT_APM_H -#include - #ifdef APM_ZERO_SEGS # define APM_DO_ZERO_SEGS \ "pushl %%ds\n\t" \ @@ -34,7 +32,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ - firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -47,7 +44,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, "=S" (*esi) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); - firmware_restrict_branch_speculation_end(); } static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, @@ -60,7 +56,6 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ - firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -73,7 +68,6 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, "=S" (si) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); - firmware_restrict_branch_speculation_end(); return error; } diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 5d0de79fdab0..ec00d1ff5098 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -240,6 +240,7 @@ #include #include #include +#include #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) extern int (*console_blank_hook)(int); @@ -614,11 +615,13 @@ static long __apm_bios_call(void *_call) gdt[0x40 / 8] = bad_bios_desc; apm_irq_save(flags); + firmware_restrict_branch_speculation_start(); APM_DO_SAVE_SEGS; apm_bios_call_asm(call->func, call->ebx, call->ecx, &call->eax, &call->ebx, &call->ecx, &call->edx, &call->esi); APM_DO_RESTORE_SEGS; + firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; put_cpu(); @@ -690,10 +693,12 @@ static long __apm_bios_call_simple(void *_call) gdt[0x40 / 8] = bad_bios_desc; apm_irq_save(flags); + firmware_restrict_branch_speculation_start(); APM_DO_SAVE_SEGS; error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx, &call->eax); APM_DO_RESTORE_SEGS; + firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; put_cpu(); -- cgit v1.2.3 From d3f6daede246038cf2ea38b78d732f9dd8feb1d6 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Mon, 16 Jul 2018 09:10:48 +0200 Subject: ARM: dts: omap4-droid4: fix dts w.r.t. pwm pwm node should not be under gpio6 node in the device tree. This fixes detection of the pwm on Droid 4. Fixes: 6d7bdd328da4 ("ARM: dts: omap4-droid4: update touchscreen") Signed-off-by: Pavel Machek Reviewed-by: Sebastian Reichel [tony@atomide.com: added fixes tag] Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap4-droid4-xt894.dts | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts index bdf73cbcec3a..e7c3c563ff8f 100644 --- a/arch/arm/boot/dts/omap4-droid4-xt894.dts +++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts @@ -159,13 +159,7 @@ dais = <&mcbsp2_port>, <&mcbsp3_port>; }; -}; - -&dss { - status = "okay"; -}; -&gpio6 { pwm8: dmtimer-pwm-8 { pinctrl-names = "default"; pinctrl-0 = <&vibrator_direction_pin>; @@ -192,7 +186,10 @@ pwm-names = "enable", "direction"; direction-duty-cycle-ns = <10000000>; }; +}; +&dss { + status = "okay"; }; &dsi1 { -- cgit v1.2.3 From 941d810725ad48cc21948f4cff8cf70fa2a67cf9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 16 Jul 2018 23:52:14 +1000 Subject: powerpc/xmon: Fix disassembly since printf changes The recent change to add printf annotations to xmon inadvertently made the disassembly output ugly, eg: c00000002001e058 7ee00026 mfcr r23 c00000002001e05c fffffffffae101a0 std r23,416(r1) c00000002001e060 fffffffff8230000 std r1,0(r3) The problem being that negative 32-bit values are being displayed in full 64-bits. The printf conversion was actually correct, we are passing unsigned long so it should use "lx". But powerpc instructions are only 4 bytes and the code only reads 4 bytes, so inst should really just be unsigned int, and that also fixes the printing to look the way we want: c00000002001e058 7ee00026 mfcr r23 c00000002001e05c fae101a0 std r23,416(r1) c00000002001e060 f8230000 std r1,0(r3) Fixes: e70d8f55268b ("powerpc/xmon: Add __printf annotation to xmon_printf()") Signed-off-by: Michael Ellerman --- arch/powerpc/xmon/xmon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 47166ad2a669..196978733e64 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2734,7 +2734,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr, { int nr, dotted; unsigned long first_adr; - unsigned long inst, last_inst = 0; + unsigned int inst, last_inst = 0; unsigned char val[4]; dotted = 0; @@ -2758,7 +2758,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr, dotted = 0; last_inst = inst; if (praddr) - printf(REG" %.8lx", adr, inst); + printf(REG" %.8x", adr, inst); printf("\t"); dump_func(inst, adr); printf("\n"); -- cgit v1.2.3 From fbdb328c6bae0a7c78d75734a738b66b86dffc96 Mon Sep 17 00:00:00 2001 From: Dewet Thibaut Date: Mon, 16 Jul 2018 10:49:27 +0200 Subject: x86/MCE: Remove min interval polling limitation commit b3b7c4795c ("x86/MCE: Serialize sysfs changes") introduced a min interval limitation when setting the check interval for polled MCEs. However, the logic is that 0 disables polling for corrected MCEs, see Documentation/x86/x86_64/machinecheck. The limitation prevents disabling. Remove this limitation and allow the value 0 to disable polling again. Fixes: b3b7c4795c ("x86/MCE: Serialize sysfs changes") Signed-off-by: Dewet Thibaut Signed-off-by: Alexander Sverdlin [ Massage commit message. ] Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180716084927.24869-1-alexander.sverdlin@nokia.com --- arch/x86/kernel/cpu/mcheck/mce.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index c102ad51025e..8c50754c09c1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2165,9 +2165,6 @@ static ssize_t store_int_with_restart(struct device *s, if (check_interval == old_check_interval) return ret; - if (check_interval < 1) - check_interval = 1; - mutex_lock(&mce_sysfs_mutex); mce_restart(); mutex_unlock(&mce_sysfs_mutex); -- cgit v1.2.3 From 76fa4975f3ed12d15762bc979ca44078598ed8ee Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 17 Jul 2018 17:19:13 +1000 Subject: KVM: PPC: Check if IOMMU page is contained in the pinned physical page A VM which has: - a DMA capable device passed through to it (eg. network card); - running a malicious kernel that ignores H_PUT_TCE failure; - capability of using IOMMU pages bigger that physical pages can create an IOMMU mapping that exposes (for example) 16MB of the host physical memory to the device when only 64K was allocated to the VM. The remaining 16MB - 64K will be some other content of host memory, possibly including pages of the VM, but also pages of host kernel memory, host programs or other VMs. The attacking VM does not control the location of the page it can map, and is only allowed to map as many pages as it has pages of RAM. We already have a check in drivers/vfio/vfio_iommu_spapr_tce.c that an IOMMU page is contained in the physical page so the PCI hardware won't get access to unassigned host memory; however this check is missing in the KVM fastpath (H_PUT_TCE accelerated code). We were lucky so far and did not hit this yet as the very first time when the mapping happens we do not have tbl::it_userspace allocated yet and fall back to the userspace which in turn calls VFIO IOMMU driver, this fails and the guest does not retry, This stores the smallest preregistered page size in the preregistered region descriptor and changes the mm_iommu_xxx API to check this against the IOMMU page size. This calculates maximum page size as a minimum of the natural region alignment and compound page size. For the page shift this uses the shift returned by find_linux_pte() which indicates how the page is mapped to the current userspace - if the page is huge and this is not a zero, then it is a leaf pte and the page is mapped within the range. Fixes: 121f80ba68f1 ("KVM: PPC: VFIO: Add in-kernel acceleration for VFIO") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 4 ++-- arch/powerpc/kvm/book3s_64_vio.c | 2 +- arch/powerpc/kvm/book3s_64_vio_hv.c | 6 ++++-- arch/powerpc/mm/mmu_context_iommu.c | 37 ++++++++++++++++++++++++++++++++-- drivers/vfio/vfio_iommu_spapr_tce.c | 2 +- 5 files changed, 43 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 896efa559996..79d570cbf332 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -35,9 +35,9 @@ extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm( extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, unsigned long ua, unsigned long entries); extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa); + unsigned long ua, unsigned int pageshift, unsigned long *hpa); extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa); + unsigned long ua, unsigned int pageshift, unsigned long *hpa); extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); #endif diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index d066e37551ec..8c456fa691a5 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -449,7 +449,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, /* This only handles v2 IOMMU type, v1 is handled via ioctl() */ return H_TOO_HARD; - if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa))) + if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa))) return H_HARDWARE; if (mm_iommu_mapped_inc(mem)) diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 925fc316a104..5b298f5a1a14 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -279,7 +279,8 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, if (!mem) return H_TOO_HARD; - if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa))) + if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift, + &hpa))) return H_HARDWARE; pua = (void *) vmalloc_to_phys(pua); @@ -469,7 +470,8 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); if (mem) - prereg = mm_iommu_ua_to_hpa_rm(mem, ua, &tces) == 0; + prereg = mm_iommu_ua_to_hpa_rm(mem, ua, + IOMMU_PAGE_SHIFT_4K, &tces) == 0; } if (!prereg) { diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index abb43646927a..a4ca57612558 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -19,6 +19,7 @@ #include #include #include +#include static DEFINE_MUTEX(mem_list_mutex); @@ -27,6 +28,7 @@ struct mm_iommu_table_group_mem_t { struct rcu_head rcu; unsigned long used; atomic64_t mapped; + unsigned int pageshift; u64 ua; /* userspace address */ u64 entries; /* number of entries in hpas[] */ u64 *hpas; /* vmalloc'ed */ @@ -125,6 +127,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, { struct mm_iommu_table_group_mem_t *mem; long i, j, ret = 0, locked_entries = 0; + unsigned int pageshift; + unsigned long flags; struct page *page = NULL; mutex_lock(&mem_list_mutex); @@ -159,6 +163,12 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, goto unlock_exit; } + /* + * For a starting point for a maximum page size calculation + * we use @ua and @entries natural alignment to allow IOMMU pages + * smaller than huge pages but still bigger than PAGE_SIZE. + */ + mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT)); mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0]))); if (!mem->hpas) { kfree(mem); @@ -199,6 +209,23 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, } } populate: + pageshift = PAGE_SHIFT; + if (PageCompound(page)) { + pte_t *pte; + struct page *head = compound_head(page); + unsigned int compshift = compound_order(head); + + local_irq_save(flags); /* disables as well */ + pte = find_linux_pte(mm->pgd, ua, NULL, &pageshift); + local_irq_restore(flags); + + /* Double check it is still the same pinned page */ + if (pte && pte_page(*pte) == head && + pageshift == compshift) + pageshift = max_t(unsigned int, pageshift, + PAGE_SHIFT); + } + mem->pageshift = min(mem->pageshift, pageshift); mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; } @@ -349,7 +376,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, EXPORT_SYMBOL_GPL(mm_iommu_find); long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa) + unsigned long ua, unsigned int pageshift, unsigned long *hpa) { const long entry = (ua - mem->ua) >> PAGE_SHIFT; u64 *va = &mem->hpas[entry]; @@ -357,6 +384,9 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, if (entry >= mem->entries) return -EFAULT; + if (pageshift > mem->pageshift) + return -EFAULT; + *hpa = *va | (ua & ~PAGE_MASK); return 0; @@ -364,7 +394,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, - unsigned long ua, unsigned long *hpa) + unsigned long ua, unsigned int pageshift, unsigned long *hpa) { const long entry = (ua - mem->ua) >> PAGE_SHIFT; void *va = &mem->hpas[entry]; @@ -373,6 +403,9 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, if (entry >= mem->entries) return -EFAULT; + if (pageshift > mem->pageshift) + return -EFAULT; + pa = (void *) vmalloc_to_phys(va); if (!pa) return -EFAULT; diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 2da5f054257a..7cd63b0c1a46 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -467,7 +467,7 @@ static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container, if (!mem) return -EINVAL; - ret = mm_iommu_ua_to_hpa(mem, tce, phpa); + ret = mm_iommu_ua_to_hpa(mem, tce, shift, phpa); if (ret) return -EINVAL; -- cgit v1.2.3 From 2307af1c4b2e0ad886f30e31739845322cbd328b Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Fri, 29 Jun 2018 22:59:04 +0300 Subject: KVM: VMX: Mark VMXArea with revision_id of physical CPU even when eVMCS enabled When eVMCS is enabled, all VMCS allocated to be used by KVM are marked with revision_id of KVM_EVMCS_VERSION instead of revision_id reported by MSR_IA32_VMX_BASIC. However, even though not explictly documented by TLFS, VMXArea passed as VMXON argument should still be marked with revision_id reported by physical CPU. This issue was found by the following setup: * L0 = KVM which expose eVMCS to it's L1 guest. * L1 = KVM which consume eVMCS reported by L0. This setup caused the following to occur: 1) L1 execute hardware_enable(). 2) hardware_enable() calls kvm_cpu_vmxon() to execute VMXON. 3) L0 intercept L1 VMXON and execute handle_vmon() which notes vmxarea->revision_id != VMCS12_REVISION and therefore fails with nested_vmx_failInvalid() which sets RFLAGS.CF. 4) L1 kvm_cpu_vmxon() don't check RFLAGS.CF for failure and therefore hardware_enable() continues as usual. 5) L1 hardware_enable() then calls ept_sync_global() which executes INVEPT. 6) L0 intercept INVEPT and execute handle_invept() which notes !vmx->nested.vmxon and thus raise a #UD to L1. 7) Raised #UD caused L1 to panic. Reviewed-by: Krish Sadhukhan Cc: stable@vger.kernel.org Fixes: 773e8a0425c923bc02668a2d6534a5ef5a43cc69 Signed-off-by: Liran Alon Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ba981459d706..c3c85908b8de 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4108,11 +4108,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) vmcs_conf->order = get_order(vmcs_conf->size); vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; - /* KVM supports Enlightened VMCS v1 only */ - if (static_branch_unlikely(&enable_evmcs)) - vmcs_conf->revision_id = KVM_EVMCS_VERSION; - else - vmcs_conf->revision_id = vmx_msr_low; + vmcs_conf->revision_id = vmx_msr_low; vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; @@ -4182,7 +4178,13 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) return NULL; vmcs = page_address(pages); memset(vmcs, 0, vmcs_config.size); - vmcs->revision_id = vmcs_config.revision_id; /* vmcs revision id */ + + /* KVM supports Enlightened VMCS v1 only */ + if (static_branch_unlikely(&enable_evmcs)) + vmcs->revision_id = KVM_EVMCS_VERSION; + else + vmcs->revision_id = vmcs_config.revision_id; + return vmcs; } @@ -4341,6 +4343,19 @@ static __init int alloc_kvm_area(void) return -ENOMEM; } + /* + * When eVMCS is enabled, alloc_vmcs_cpu() sets + * vmcs->revision_id to KVM_EVMCS_VERSION instead of + * revision_id reported by MSR_IA32_VMX_BASIC. + * + * However, even though not explictly documented by + * TLFS, VMXArea passed as VMXON argument should + * still be marked with revision_id reported by + * physical CPU. + */ + if (static_branch_unlikely(&enable_evmcs)) + vmcs->revision_id = vmcs_config.revision_id; + per_cpu(vmxarea, cpu) = vmcs; } return 0; -- cgit v1.2.3 From e10f7805032365cc11c739a97f226ebb48aee042 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Sat, 14 Jul 2018 23:28:29 +0800 Subject: kvmclock: fix TSC calibration for nested guests Inside a nested guest, access to hardware can be slow enough that tsc_read_refs always return ULLONG_MAX, causing tsc_refine_calibration_work to be called periodically and the nested guest to spend a lot of time reading the ACPI timer. However, if the TSC frequency is available from the pvclock page, we can just set X86_FEATURE_TSC_KNOWN_FREQ and avoid the recalibration. 'refine' operation. Suggested-by: Peter Zijlstra Signed-off-by: Peng Hao [Commit message rewritten. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvmclock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index d79a18b4cf9d..4c53d12ca933 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -138,6 +138,7 @@ static unsigned long kvm_get_tsc_khz(void) src = &hv_clock[cpu].pvti; tsc_khz = pvclock_tsc_khz(src); put_cpu(); + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); return tsc_khz; } -- cgit v1.2.3 From 4e4a4b75ccce827f9b8b04b6ee5c90169df4b6ee Mon Sep 17 00:00:00 2001 From: James Clarke Date: Thu, 12 Jul 2018 22:41:49 +0100 Subject: powerpc/Makefile: Assemble with -me500 when building for E500 Some of the assembly files use instructions specific to BookE or E500, which are rejected with the now-default -mcpu=powerpc, so we must pass -me500 to the assembler just as we pass -me200 for E200. Fixes: 4bf4f42a2feb ("powerpc/kbuild: Set default generic machine type for 32-bit compile") Signed-off-by: James Clarke Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 2ea575cb3401..fb96206de317 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -243,6 +243,7 @@ endif cpu-as-$(CONFIG_4xx) += -Wa,-m405 cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) cpu-as-$(CONFIG_E200) += -Wa,-me200 +cpu-as-$(CONFIG_E500) += -Wa,-me500 cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4 cpu-as-$(CONFIG_PPC_E500MC) += $(call as-option,-Wa$(comma)-me500mc) -- cgit v1.2.3 From b03897cf318dfc47de33a7ecbc7655584266f034 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Wed, 18 Jul 2018 14:03:16 +0530 Subject: powerpc/powernv: Fix save/restore of SPRG3 on entry/exit from stop (idle) On 64-bit servers, SPRN_SPRG3 and its userspace read-only mirror SPRN_USPRG3 are used as userspace VDSO write and read registers respectively. SPRN_SPRG3 is lost when we enter stop4 and above, and is currently not restored. As a result, any read from SPRN_USPRG3 returns zero on an exit from stop4 (Power9 only) and above. Thus in this situation, on POWER9, any call from sched_getcpu() always returns zero, as on powerpc, we call __kernel_getcpu() which relies upon SPRN_USPRG3 to report the CPU and NUMA node information. Fix this by restoring SPRN_SPRG3 on wake up from a deep stop state with the sprg_vdso value that is cached in PACA. Fixes: e1c1cfed5432 ("powerpc/powernv: Save/Restore additional SPRs for stop4 cpuidle") Cc: stable@vger.kernel.org # v4.14+ Reported-by: Florian Weimer Signed-off-by: Gautham R. Shenoy Reviewed-by: Michael Ellerman Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index e734f6e45abc..689306118b48 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -144,7 +144,9 @@ power9_restore_additional_sprs: mtspr SPRN_MMCR1, r4 ld r3, STOP_MMCR2(r13) + ld r4, PACA_SPRG_VDSO(r13) mtspr SPRN_MMCR2, r3 + mtspr SPRN_SPRG3, r4 blr /* -- cgit v1.2.3 From af1fc5baa724c63ce1733dfcf855bad5ef6078e3 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 17 Jul 2018 15:21:56 -0700 Subject: ARCv2: [plat-hsdk]: Save accl reg pair by default This manifsted as strace segfaulting on HSDK because gcc was targetting the accumulator registers as GPRs, which kernek was not saving/restoring by default. Cc: stable@vger.kernel.org #4.14+ Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 2 +- arch/arc/plat-hsdk/Kconfig | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index d76bf4a83740..bc0bcf01ec98 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -408,7 +408,7 @@ config ARC_HAS_DIV_REM config ARC_HAS_ACCL_REGS bool "Reg Pair ACCL:ACCH (FPU and/or MPY > 6)" - default n + default y help Depending on the configuration, CPU can contain accumulator reg-pair (also referred to as r58:r59). These can also be used by gcc as GPR so diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig index 556bc5ef1257..9356753c2ed8 100644 --- a/arch/arc/plat-hsdk/Kconfig +++ b/arch/arc/plat-hsdk/Kconfig @@ -7,6 +7,8 @@ menuconfig ARC_SOC_HSDK bool "ARC HS Development Kit SOC" + depends on ISA_ARCV2 + select ARC_HAS_ACCL_REGS select CLK_HSDK select RESET_HSDK select MIGHT_HAVE_PCI -- cgit v1.2.3 From b9c1e60e7bf4e64ac1b4f4d6d593f0bb57886973 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 19 Jul 2018 18:18:35 +0200 Subject: bpf, ppc64: fix unexpected r0=0 exit path inside bpf_xadd None of the JITs is allowed to implement exit paths from the BPF insn mappings other than BPF_JMP | BPF_EXIT. In the BPF core code we have a couple of rewrites in eBPF (e.g. LD_ABS / LD_IND) and in eBPF to cBPF translation to retain old existing behavior where exceptions may occur; they are also tightly controlled by the verifier where it disallows some of the features such as BPF to BPF calls when legacy LD_ABS / LD_IND ops are present in the BPF program. During recent review of all BPF_XADD JIT implementations I noticed that the ppc64 one is buggy in that it contains two jumps to exit paths. This is problematic as this can bypass verifier expectations e.g. pointed out in commit f6b1b3bf0d5f ("bpf: fix subprog verifier bypass by div/mod by 0 exception"). The first exit path is obsoleted by the fix in ca36960211eb ("bpf: allow xadd only on aligned memory") anyway, and for the second one we need to do a fetch, add and store loop if the reservation from lwarx/ldarx was lost in the meantime. Fixes: 156d0e290e96 ("powerpc/ebpf/jit: Implement JIT compiler for extended BPF") Reviewed-by: Naveen N. Rao Reviewed-by: Sandipan Das Tested-by: Sandipan Das Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- arch/powerpc/net/bpf_jit_comp64.c | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 380cbf9a40d9..c0a9bcd28356 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -286,6 +286,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u64 imm64; u8 *func; u32 true_cond; + u32 tmp_idx; /* * addrs[] maps a BPF bytecode address into a real offset from @@ -637,11 +638,7 @@ emit_clear: case BPF_STX | BPF_XADD | BPF_W: /* Get EA into TMP_REG_1 */ PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); - /* error if EA is not word-aligned */ - PPC_ANDI(b2p[TMP_REG_2], b2p[TMP_REG_1], 0x03); - PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + 12); - PPC_LI(b2p[BPF_REG_0], 0); - PPC_JMP(exit_addr); + tmp_idx = ctx->idx * 4; /* load value from memory into TMP_REG_2 */ PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); /* add value from src_reg into this */ @@ -649,32 +646,16 @@ emit_clear: /* store result back */ PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); /* we're done if this succeeded */ - PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (7*4)); - /* otherwise, let's try once more */ - PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); - PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); - PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); - /* exit if the store was not successful */ - PPC_LI(b2p[BPF_REG_0], 0); - PPC_BCC(COND_NE, exit_addr); + PPC_BCC_SHORT(COND_NE, tmp_idx); break; /* *(u64 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_DW: PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); - /* error if EA is not doubleword-aligned */ - PPC_ANDI(b2p[TMP_REG_2], b2p[TMP_REG_1], 0x07); - PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (3*4)); - PPC_LI(b2p[BPF_REG_0], 0); - PPC_JMP(exit_addr); - PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); - PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); - PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); - PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (7*4)); + tmp_idx = ctx->idx * 4; PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); - PPC_LI(b2p[BPF_REG_0], 0); - PPC_BCC(COND_NE, exit_addr); + PPC_BCC_SHORT(COND_NE, tmp_idx); break; /* -- cgit v1.2.3 From e01a06c8089132bb4da035c6a83df23916ca3ebf Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 19 Jul 2018 21:38:23 +0200 Subject: ARM: dts: imx6: RDU2: fix irq type for mv88e6xxx switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Marvell switches report their interrupts in a level sensitive way. When using edge sensitive detection a race condition in the interrupt handler of the swich might result in the OS to miss all future events which might make the switch non-functional. The problem is that both mv88e6xxx_g2_irq_thread_fn() and mv88e6xxx_g1_irq_thread_work() sample the irq cause register (MV88E6XXX_G2_INT_SRC and MV88E6XXX_G1_STS respectively) once and then handle the observed sources. If after sampling but before all observed irq sources are handled a new irq source gets active this is not noticed by the handler which returns unsuspecting, but the interrupt line stays active which prevents the edge detector to kick in. All device trees but imx6qdl-zii-rdu2 get this right (most of them by not specifying an interrupt parent). So fix imx6qdl-zii-rdu2 accordingly. Signed-off-by: Uwe Kleine-König Fixes: f64992d1a916 ("ARM: dts: imx6: RDU2: Add Switch interrupts") Reviewed-by: Andrew Lunn Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi index 19a075aee19e..f14df0baf2ab 100644 --- a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi +++ b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi @@ -692,7 +692,7 @@ dsa,member = <0 0>; eeprom-length = <512>; interrupt-parent = <&gpio6>; - interrupts = <3 IRQ_TYPE_EDGE_FALLING>; + interrupts = <3 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; #interrupt-cells = <2>; -- cgit v1.2.3 From 0a06d4256674c4e041945b52044941995fee237d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 19 Jul 2018 10:31:00 -0700 Subject: KVM: vmx: use local variable for current_vmptr when emulating VMPTRST Do not expose the address of vmx->nested.current_vmptr to kvm_write_guest_virt_system() as the resulting __copy_to_user() call will trigger a WARN when CONFIG_HARDENED_USERCOPY is enabled. Opportunistically clean up variable names in handle_vmptrst() to improve readability, e.g. vmcs_gva is misleading as the memory operand of VMPTRST is plain memory, not a VMCS. Signed-off-by: Sean Christopherson Tested-by: Peter Shier Reviewed-by: Peter Shier Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e30da9a2430c..548bef5359e6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8480,21 +8480,20 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) /* Emulate the VMPTRST instruction */ static int handle_vmptrst(struct kvm_vcpu *vcpu) { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - gva_t vmcs_gva; + unsigned long exit_qual = vmcs_readl(EXIT_QUALIFICATION); + u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); + gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr; struct x86_exception e; + gva_t gva; if (!nested_vmx_check_permission(vcpu)) return 1; - if (get_vmx_mem_address(vcpu, exit_qualification, - vmx_instruction_info, true, &vmcs_gva)) + if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva)) return 1; /* *_system ok, nested_vmx_check_permission has verified cpl=0 */ - if (kvm_write_guest_virt_system(vcpu, vmcs_gva, - (void *)&to_vmx(vcpu)->nested.current_vmptr, - sizeof(u64), &e)) { + if (kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr, + sizeof(gpa_t), &e)) { kvm_inject_page_fault(vcpu, &e); return 1; } -- cgit v1.2.3 From 63aff65573d73eb8dda4732ad4ef222dd35e4862 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Thu, 19 Jul 2018 21:59:07 +0300 Subject: kvm: x86: vmx: fix vpid leak VPID for the nested vcpu is allocated at vmx_create_vcpu whenever nested vmx is turned on with the module parameter. However, it's only freed if the L1 guest has executed VMXON which is not a given. As a result, on a system with nested==on every creation+deletion of an L1 vcpu without running an L2 guest results in leaking one vpid. Since the total number of vpids is limited to 64k, they can eventually get exhausted, preventing L2 from starting. Delay allocation of the L2 vpid until VMXON emulation, thus matching its freeing. Fixes: 5c614b3583e7b6dab0c86356fa36c2bcbb8322a0 Cc: stable@vger.kernel.org Signed-off-by: Roman Kagan Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 548bef5359e6..5d8e317c2b04 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7893,6 +7893,8 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) HRTIMER_MODE_REL_PINNED); vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; + vmx->nested.vpid02 = allocate_vpid(); + vmx->nested.vmxon = true; return 0; @@ -10369,11 +10371,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_vmcs; } - if (nested) { + if (nested) nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, kvm_vcpu_apicv_active(&vmx->vcpu)); - vmx->nested.vpid02 = allocate_vpid(); - } vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; @@ -10390,7 +10390,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) return &vmx->vcpu; free_vmcs: - free_vpid(vmx->nested.vpid02); free_loaded_vmcs(vmx->loaded_vmcs); free_msrs: kfree(vmx->guest_msrs); -- cgit v1.2.3 From bc88ad2efd11f29e00a4fd60fcd1887abfe76833 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 20 Jul 2018 13:58:21 +0200 Subject: MIPS: ath79: fix register address in ath79_ddr_wb_flush() ath79_ddr_wb_flush_base has the type void __iomem *, so register offsets need to be a multiple of 4 in order to access the intended register. Signed-off-by: Felix Fietkau Signed-off-by: John Crispin Signed-off-by: Paul Burton Fixes: 24b0e3e84fbf ("MIPS: ath79: Improve the DDR controller interface") Patchwork: https://patchwork.linux-mips.org/patch/19912/ Cc: Alban Bedel Cc: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 4.2+ --- arch/mips/ath79/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/ath79/common.c b/arch/mips/ath79/common.c index 10a405d593df..c782b10ddf50 100644 --- a/arch/mips/ath79/common.c +++ b/arch/mips/ath79/common.c @@ -58,7 +58,7 @@ EXPORT_SYMBOL_GPL(ath79_ddr_ctrl_init); void ath79_ddr_wb_flush(u32 reg) { - void __iomem *flush_reg = ath79_ddr_wb_flush_base + reg; + void __iomem *flush_reg = ath79_ddr_wb_flush_base + (reg * 4); /* Flush the DDR write buffer. */ __raw_writel(0x1, flush_reg); -- cgit v1.2.3 From 3928d4f5ee37cdc523894f6e549e6aae521d8980 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 21 Jul 2018 13:48:51 -0700 Subject: mm: use helper functions for allocating and freeing vm_area structs The vm_area_struct is one of the most fundamental memory management objects, but the management of it is entirely open-coded evertwhere, ranging from allocation and freeing (using kmem_cache_[z]alloc and kmem_cache_free) to initializing all the fields. We want to unify this in order to end up having some unified initialization of the vmas, and the first step to this is to at least have basic allocation functions. Right now those functions are literally just wrappers around the kmem_cache_*() calls. This is a purely mechanical conversion: # new vma: kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL) -> vm_area_alloc() # copy old vma kmem_cache_alloc(vm_area_cachep, GFP_KERNEL) -> vm_area_dup(old) # free vma kmem_cache_free(vm_area_cachep, vma) -> vm_area_free(vma) to the point where the old vma passed in to the vm_area_dup() function isn't even used yet (because I've left all the old manual initialization alone). Signed-off-by: Linus Torvalds --- arch/ia64/kernel/perfmon.c | 4 ++-- arch/ia64/mm/init.c | 8 ++++---- fs/exec.c | 4 ++-- include/linux/mm.h | 4 +++- kernel/fork.c | 21 ++++++++++++++++++--- mm/mmap.c | 22 +++++++++++----------- mm/nommu.c | 8 ++++---- 7 files changed, 44 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 3b38c717008a..e859246badca 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2278,7 +2278,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t DPRINT(("smpl_buf @%p\n", smpl_buf)); /* allocate vma */ - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(); if (!vma) { DPRINT(("Cannot allocate vma\n")); goto error_kmem; @@ -2346,7 +2346,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t return 0; error: - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); error_kmem: pfm_rvfree(smpl_buf, size); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 18278b448530..3f2321bffb72 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -114,7 +114,7 @@ ia64_init_addr_space (void) * the problem. When the process attempts to write to the register backing store * for the first time, it will get a SEGFAULT in this case. */ - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(); if (vma) { INIT_LIST_HEAD(&vma->anon_vma_chain); vma->vm_mm = current->mm; @@ -125,7 +125,7 @@ ia64_init_addr_space (void) down_write(¤t->mm->mmap_sem); if (insert_vm_struct(current->mm, vma)) { up_write(¤t->mm->mmap_sem); - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return; } up_write(¤t->mm->mmap_sem); @@ -133,7 +133,7 @@ ia64_init_addr_space (void) /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */ if (!(current->personality & MMAP_PAGE_ZERO)) { - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(); if (vma) { INIT_LIST_HEAD(&vma->anon_vma_chain); vma->vm_mm = current->mm; @@ -144,7 +144,7 @@ ia64_init_addr_space (void) down_write(¤t->mm->mmap_sem); if (insert_vm_struct(current->mm, vma)) { up_write(¤t->mm->mmap_sem); - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return; } up_write(¤t->mm->mmap_sem); diff --git a/fs/exec.c b/fs/exec.c index 2d4e0075bd24..9bd83989ea25 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -290,7 +290,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) struct vm_area_struct *vma = NULL; struct mm_struct *mm = bprm->mm; - bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + bprm->vma = vma = vm_area_alloc(); if (!vma) return -ENOMEM; @@ -326,7 +326,7 @@ err: up_write(&mm->mmap_sem); err_free: bprm->vma = NULL; - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return err; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 3982c83fdcbf..de2fd86c6154 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -155,7 +155,9 @@ extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *, * mmap() functions). */ -extern struct kmem_cache *vm_area_cachep; +struct vm_area_struct *vm_area_alloc(void); +struct vm_area_struct *vm_area_dup(struct vm_area_struct *); +void vm_area_free(struct vm_area_struct *); #ifndef CONFIG_MMU extern struct rb_root nommu_region_tree; diff --git a/kernel/fork.c b/kernel/fork.c index 9440d61b925c..0e23deb5acfc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -303,11 +303,26 @@ struct kmem_cache *files_cachep; struct kmem_cache *fs_cachep; /* SLAB cache for vm_area_struct structures */ -struct kmem_cache *vm_area_cachep; +static struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; +struct vm_area_struct *vm_area_alloc(void) +{ + return kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); +} + +struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) +{ + return kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); +} + +void vm_area_free(struct vm_area_struct *vma) +{ + kmem_cache_free(vm_area_cachep, vma); +} + static void account_kernel_stack(struct task_struct *tsk, int account) { void *stack = task_stack_page(tsk); @@ -455,7 +470,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, goto fail_nomem; charge = len; } - tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + tmp = vm_area_dup(mpnt); if (!tmp) goto fail_nomem; *tmp = *mpnt; @@ -539,7 +554,7 @@ fail_uprobe_end: fail_nomem_anon_vma_fork: mpol_put(vma_policy(tmp)); fail_nomem_policy: - kmem_cache_free(vm_area_cachep, tmp); + vm_area_free(tmp); fail_nomem: retval = -ENOMEM; vm_unacct_memory(charge); diff --git a/mm/mmap.c b/mm/mmap.c index 5801b5f0a634..4286ad2dd1f5 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -182,7 +182,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) if (vma->vm_file) fput(vma->vm_file); mpol_put(vma_policy(vma)); - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return next; } @@ -911,7 +911,7 @@ again: anon_vma_merge(vma, next); mm->map_count--; mpol_put(vma_policy(next)); - kmem_cache_free(vm_area_cachep, next); + vm_area_free(next); /* * In mprotect's case 6 (see comments on vma_merge), * we must remove another next too. It would clutter @@ -1729,7 +1729,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, * specific mapper. the address has already been validated, but * not unmapped, but the maps are removed from the list. */ - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(); if (!vma) { error = -ENOMEM; goto unacct_error; @@ -1832,7 +1832,7 @@ allow_write_and_free_vma: if (vm_flags & VM_DENYWRITE) allow_write_access(file); free_vma: - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); unacct_error: if (charged) vm_unacct_memory(charged); @@ -2620,7 +2620,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, return err; } - new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + new = vm_area_dup(vma); if (!new) return -ENOMEM; @@ -2669,7 +2669,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, out_free_mpol: mpol_put(vma_policy(new)); out_free_vma: - kmem_cache_free(vm_area_cachep, new); + vm_area_free(new); return err; } @@ -2984,7 +2984,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla /* * create a vma struct for an anonymous mapping */ - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(); if (!vma) { vm_unacct_memory(len >> PAGE_SHIFT); return -ENOMEM; @@ -3202,7 +3202,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, } *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff); } else { - new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + new_vma = vm_area_dup(vma); if (!new_vma) goto out; *new_vma = *vma; @@ -3226,7 +3226,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, out_free_mempol: mpol_put(vma_policy(new_vma)); out_free_vma: - kmem_cache_free(vm_area_cachep, new_vma); + vm_area_free(new_vma); out: return NULL; } @@ -3350,7 +3350,7 @@ static struct vm_area_struct *__install_special_mapping( int ret; struct vm_area_struct *vma; - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(); if (unlikely(vma == NULL)) return ERR_PTR(-ENOMEM); @@ -3376,7 +3376,7 @@ static struct vm_area_struct *__install_special_mapping( return vma; out: - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return ERR_PTR(ret); } diff --git a/mm/nommu.c b/mm/nommu.c index 4452d8bd9ae4..006e3fe65017 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -769,7 +769,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) if (vma->vm_file) fput(vma->vm_file); put_nommu_region(vma->vm_region); - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); } /* @@ -1204,7 +1204,7 @@ unsigned long do_mmap(struct file *file, if (!region) goto error_getting_region; - vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + vma = vm_area_alloc(); if (!vma) goto error_getting_vma; @@ -1368,7 +1368,7 @@ error: kmem_cache_free(vm_region_jar, region); if (vma->vm_file) fput(vma->vm_file); - kmem_cache_free(vm_area_cachep, vma); + vm_area_free(vma); return ret; sharing_violation: @@ -1469,7 +1469,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, if (!region) return -ENOMEM; - new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); + new = vm_area_dup(vma); if (!new) { kmem_cache_free(vm_region_jar, region); return -ENOMEM; -- cgit v1.2.3 From 490fc053865c9cc40f1085ef8a5504f5341f79d2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 21 Jul 2018 15:24:03 -0700 Subject: mm: make vm_area_alloc() initialize core fields Like vm_area_dup(), it initializes the anon_vma_chain head, and the basic mm pointer. The rest of the fields end up being different for different users, although the plan is to also initialize the 'vm_ops' field to a dummy entry. Signed-off-by: Linus Torvalds --- arch/ia64/kernel/perfmon.c | 4 +--- arch/ia64/mm/init.c | 8 ++------ fs/exec.c | 4 +--- include/linux/mm.h | 2 +- kernel/fork.c | 10 ++++++++-- mm/mmap.c | 12 +++--------- mm/nommu.c | 3 +-- 7 files changed, 17 insertions(+), 26 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index e859246badca..46bff1661836 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2278,17 +2278,15 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t DPRINT(("smpl_buf @%p\n", smpl_buf)); /* allocate vma */ - vma = vm_area_alloc(); + vma = vm_area_alloc(mm); if (!vma) { DPRINT(("Cannot allocate vma\n")); goto error_kmem; } - INIT_LIST_HEAD(&vma->anon_vma_chain); /* * partially initialize the vma for the sampling buffer */ - vma->vm_mm = mm; vma->vm_file = get_file(filp); vma->vm_flags = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP; vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 3f2321bffb72..bdb14a369137 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -114,10 +114,8 @@ ia64_init_addr_space (void) * the problem. When the process attempts to write to the register backing store * for the first time, it will get a SEGFAULT in this case. */ - vma = vm_area_alloc(); + vma = vm_area_alloc(current->mm); if (vma) { - INIT_LIST_HEAD(&vma->anon_vma_chain); - vma->vm_mm = current->mm; vma->vm_start = current->thread.rbs_bot & PAGE_MASK; vma->vm_end = vma->vm_start + PAGE_SIZE; vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT; @@ -133,10 +131,8 @@ ia64_init_addr_space (void) /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */ if (!(current->personality & MMAP_PAGE_ZERO)) { - vma = vm_area_alloc(); + vma = vm_area_alloc(current->mm); if (vma) { - INIT_LIST_HEAD(&vma->anon_vma_chain); - vma->vm_mm = current->mm; vma->vm_end = PAGE_SIZE; vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | diff --git a/fs/exec.c b/fs/exec.c index 9bd83989ea25..72e961a62adb 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -290,7 +290,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) struct vm_area_struct *vma = NULL; struct mm_struct *mm = bprm->mm; - bprm->vma = vma = vm_area_alloc(); + bprm->vma = vma = vm_area_alloc(mm); if (!vma) return -ENOMEM; @@ -298,7 +298,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm) err = -EINTR; goto err_free; } - vma->vm_mm = mm; /* * Place the stack at the largest stack address the architecture @@ -311,7 +310,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm) vma->vm_start = vma->vm_end - PAGE_SIZE; vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); - INIT_LIST_HEAD(&vma->anon_vma_chain); err = insert_vm_struct(mm, vma); if (err) diff --git a/include/linux/mm.h b/include/linux/mm.h index de2fd86c6154..d3a3842316b8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -155,7 +155,7 @@ extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *, * mmap() functions). */ -struct vm_area_struct *vm_area_alloc(void); +struct vm_area_struct *vm_area_alloc(struct mm_struct *); struct vm_area_struct *vm_area_dup(struct vm_area_struct *); void vm_area_free(struct vm_area_struct *); diff --git a/kernel/fork.c b/kernel/fork.c index 67253e41bfb0..a191c05e757d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -308,9 +308,15 @@ static struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; -struct vm_area_struct *vm_area_alloc(void) +struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) { - return kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + + if (vma) { + vma->vm_mm = mm; + INIT_LIST_HEAD(&vma->anon_vma_chain); + } + return vma; } struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) diff --git a/mm/mmap.c b/mm/mmap.c index b0ed8ce1b67e..ff1944d8d458 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1729,19 +1729,17 @@ unsigned long mmap_region(struct file *file, unsigned long addr, * specific mapper. the address has already been validated, but * not unmapped, but the maps are removed from the list. */ - vma = vm_area_alloc(); + vma = vm_area_alloc(mm); if (!vma) { error = -ENOMEM; goto unacct_error; } - vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_flags = vm_flags; vma->vm_page_prot = vm_get_page_prot(vm_flags); vma->vm_pgoff = pgoff; - INIT_LIST_HEAD(&vma->anon_vma_chain); if (file) { if (vm_flags & VM_DENYWRITE) { @@ -2979,14 +2977,12 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla /* * create a vma struct for an anonymous mapping */ - vma = vm_area_alloc(); + vma = vm_area_alloc(mm); if (!vma) { vm_unacct_memory(len >> PAGE_SHIFT); return -ENOMEM; } - INIT_LIST_HEAD(&vma->anon_vma_chain); - vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_pgoff = pgoff; @@ -3343,12 +3339,10 @@ static struct vm_area_struct *__install_special_mapping( int ret; struct vm_area_struct *vma; - vma = vm_area_alloc(); + vma = vm_area_alloc(mm); if (unlikely(vma == NULL)) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&vma->anon_vma_chain); - vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; diff --git a/mm/nommu.c b/mm/nommu.c index c2560e9cc803..1d22fdbf7d7c 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1204,7 +1204,7 @@ unsigned long do_mmap(struct file *file, if (!region) goto error_getting_region; - vma = vm_area_alloc(); + vma = vm_area_alloc(current->mm); if (!vma) goto error_getting_vma; @@ -1212,7 +1212,6 @@ unsigned long do_mmap(struct file *file, region->vm_flags = vm_flags; region->vm_pgoff = pgoff; - INIT_LIST_HEAD(&vma->anon_vma_chain); vma->vm_flags = vm_flags; vma->vm_pgoff = pgoff; -- cgit v1.2.3 From 9b788f32bee6b0b293a4bdfca4ad4bb0206407fb Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Fri, 20 Jul 2018 10:28:46 +0900 Subject: x86/efi: Access EFI MMIO data as unencrypted when SEV is active SEV guest fails to update the UEFI runtime variables stored in the flash. The following commit: 1379edd59673 ("x86/efi: Access EFI data as encrypted when SEV is active") unconditionally maps all the UEFI runtime data as 'encrypted' (C=1). When SEV is active the UEFI runtime data marked as EFI_MEMORY_MAPPED_IO should be mapped as 'unencrypted' so that both guest and hypervisor can access the data. Signed-off-by: Brijesh Singh Signed-off-by: Ard Biesheuvel Reviewed-by: Tom Lendacky Cc: # 4.15.x Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 1379edd59673 ("x86/efi: Access EFI data as encrypted ...") Link: http://lkml.kernel.org/r/20180720012846.23560-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 77873ce700ae..5f2eb3231607 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -417,7 +417,7 @@ static void __init __map_region(efi_memory_desc_t *md, u64 va) if (!(md->attribute & EFI_MEMORY_WB)) flags |= _PAGE_PCD; - if (sev_active()) + if (sev_active() && md->type != EFI_MEMORY_MAPPED_IO) flags |= _PAGE_ENC; pfn = md->phys_addr >> PAGE_SHIFT; -- cgit v1.2.3 From f88a333b44318643282b8acc92af90deda441f5e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 Jul 2018 15:07:11 +0100 Subject: alpha: fix osf_wait4() breakage kernel_wait4() expects a userland address for status - it's only rusage that goes as a kernel one (and needs a copyout afterwards) [ Also, fix the prototype of kernel_wait4() to have that __user annotation - Linus ] Fixes: 92ebce5ac55d ("osf_wait4: switch to kernel_wait4()") Cc: stable@kernel.org # v4.13+ Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/alpha/kernel/osf_sys.c | 5 +---- include/linux/sched/task.h | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 6e921754c8fc..c210a25dd6da 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1180,13 +1180,10 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru) SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options, struct rusage32 __user *, ur) { - unsigned int status = 0; struct rusage r; - long err = kernel_wait4(pid, &status, options, &r); + long err = kernel_wait4(pid, ustatus, options, &r); if (err <= 0) return err; - if (put_user(status, ustatus)) - return -EFAULT; if (!ur) return err; if (put_tv_to_tv32(&ur->ru_utime, &r.ru_utime)) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 5be31eb7b266..108ede99e533 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -75,7 +75,7 @@ extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); -extern long kernel_wait4(pid_t, int *, int, struct rusage *); +extern long kernel_wait4(pid_t, int __user *, int, struct rusage *); extern void free_task(struct task_struct *tsk); -- cgit v1.2.3 From 2fba3573f1cf876ad94992c256c5c410039e60b4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 24 Jul 2018 07:55:18 +0200 Subject: s390: disable gcc plugins The s390 build currently fails with the latent entropy plugin: arch/s390/kernel/als.o: In function `verify_facilities': als.c:(.init.text+0x24): undefined reference to `latent_entropy' als.c:(.init.text+0xae): undefined reference to `latent_entropy' make[3]: *** [arch/s390/boot/compressed/vmlinux] Error 1 make[2]: *** [arch/s390/boot/compressed/vmlinux] Error 2 make[1]: *** [bzImage] Error 2 This will be fixed with the early boot rework from Vasily, which is planned for the 4.19 merge window. For 4.18 the simplest solution is to disable the gcc plugins and reenable them after the early boot rework is upstream. Reported-by: Guenter Roeck Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e44bb2b2873e..8a1863d9ed53 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -140,7 +140,7 @@ config S390 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if FUTEX - select HAVE_GCC_PLUGINS + select HAVE_GCC_PLUGINS if BROKEN select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 -- cgit v1.2.3 From d2753e6b4882a637a0e8fb3b9c2e15f33265300e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Jul 2018 10:39:07 +0200 Subject: perf/x86/amd/ibs: Don't access non-started event Paul Menzel reported the following bug: > Enabling the undefined behavior sanitizer and building GNU/Linux 4.18-rc5+ > (with some unrelated commits) with GCC 8.1.0 from Debian Sid/unstable, the > warning below is shown. > > > [ 2.111913] > > ================================================================================ > > [ 2.111917] UBSAN: Undefined behaviour in arch/x86/events/amd/ibs.c:582:24 > > [ 2.111919] member access within null pointer of type 'struct perf_event' > > [ 2.111926] CPU: 0 PID: 144 Comm: udevadm Not tainted 4.18.0-rc5-00316-g4864b68cedf2 #104 > > [ 2.111928] Hardware name: ASROCK E350M1/E350M1, BIOS TIMELESS 01/01/1970 > > [ 2.111930] Call Trace: > > [ 2.111943] dump_stack+0x55/0x89 > > [ 2.111949] ubsan_epilogue+0xb/0x33 > > [ 2.111953] handle_null_ptr_deref+0x7f/0x90 > > [ 2.111958] __ubsan_handle_type_mismatch_v1+0x55/0x60 > > [ 2.111964] perf_ibs_handle_irq+0x596/0x620 The code dereferences event before checking the STARTED bit. Patch below should cure the issue. The warning should not trigger, if I analyzed the thing correctly. (And Paul's testing confirms this.) Reported-by: Paul Menzel Tested-by: Paul Menzel Signed-off-by: Thomas Gleixner Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Paul Menzel Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Vince Weaver Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1807200958390.1580@nanos.tec.linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/events/amd/ibs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 4b98101209a1..d50bb4dc0650 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -579,7 +579,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) { struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); struct perf_event *event = pcpu->event; - struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event *hwc; struct perf_sample_data data; struct perf_raw_record raw; struct pt_regs regs; @@ -602,6 +602,10 @@ fail: return 0; } + if (WARN_ON_ONCE(!event)) + goto fail; + + hwc = &event->hw; msr = hwc->config_base; buf = ibs_data.regs; rdmsrl(msr, *buf); -- cgit v1.2.3 From d9e6dbcf28f383bf08e6a3180972f5722e514a54 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 21 Jul 2018 17:19:19 -0400 Subject: x86/apic: Future-proof the TSC_DEADLINE quirk for SKX All SKX with stepping higher than 4 support the TSC_DEADLINE, no matter the microcode version. Without this patch, upcoming SKX steppings will not be able to use their TSC_DEADLINE timer. Signed-off-by: Len Brown Cc: # v4.14+ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 616dd5872e ("x86/apic: Update TSC_DEADLINE quirk with additional SKX stepping") Link: http://lkml.kernel.org/r/d0c7129e509660be9ec6b233284b8d42d90659e8.1532207856.git.len.brown@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 2aabd4cb0e3f..adbda5847b14 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -573,6 +573,9 @@ static u32 skx_deadline_rev(void) case 0x04: return 0x02000014; } + if (boot_cpu_data.x86_stepping > 4) + return 0; + return ~0U; } -- cgit v1.2.3 From b3681dd548d06deb2e1573890829dff4b15abf46 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 22 Jul 2018 11:05:09 -0700 Subject: x86/entry/64: Remove %ebx handling from error_entry/exit error_entry and error_exit communicate the user vs. kernel status of the frame using %ebx. This is unnecessary -- the information is in regs->cs. Just use regs->cs. This makes error_entry simpler and makes error_exit more robust. It also fixes a nasty bug. Before all the Spectre nonsense, the xen_failsafe_callback entry point returned like this: ALLOC_PT_GPREGS_ON_STACK SAVE_C_REGS SAVE_EXTRA_REGS ENCODE_FRAME_POINTER jmp error_exit And it did not go through error_entry. This was bogus: RBX contained garbage, and error_exit expected a flag in RBX. Fortunately, it generally contained *nonzero* garbage, so the correct code path was used. As part of the Spectre fixes, code was added to clear RBX to mitigate certain speculation attacks. Now, depending on kernel configuration, RBX got zeroed and, when running some Wine workloads, the kernel crashes. This was introduced by: commit 3ac6d8c787b8 ("x86/entry/64: Clear registers for exceptions/interrupts, to reduce speculation attack surface") With this patch applied, RBX is no longer needed as a flag, and the problem goes away. I suspect that malicious userspace could use this bug to crash the kernel even without the offending patch applied, though. [ Historical note: I wrote this patch as a cleanup before I was aware of the bug it fixed. ] [ Note to stable maintainers: this should probably get applied to all kernels. If you're nervous about that, a more conservative fix to add xorl %ebx,%ebx; incl %ebx before the jump to error_exit should also fix the problem. ] Reported-and-tested-by: M. Vefa Bicakci Signed-off-by: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Dominik Brodowski Cc: Greg KH Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Cc: xen-devel@lists.xenproject.org Fixes: 3ac6d8c787b8 ("x86/entry/64: Clear registers for exceptions/interrupts, to reduce speculation attack surface") Link: http://lkml.kernel.org/r/b5010a090d3586b2d6e06c7ad3ec5542d1241c45.1532282627.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 73a522d53b53..8ae7ffda8f98 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -981,7 +981,7 @@ ENTRY(\sym) call \do_sym - jmp error_exit /* %ebx: no swapgs flag */ + jmp error_exit .endif END(\sym) .endm @@ -1222,7 +1222,6 @@ END(paranoid_exit) /* * Save all registers in pt_regs, and switch GS if needed. - * Return: EBX=0: came from user mode; EBX=1: otherwise */ ENTRY(error_entry) UNWIND_HINT_FUNC @@ -1269,7 +1268,6 @@ ENTRY(error_entry) * for these here too. */ .Lerror_kernelspace: - incl %ebx leaq native_irq_return_iret(%rip), %rcx cmpq %rcx, RIP+8(%rsp) je .Lerror_bad_iret @@ -1303,28 +1301,20 @@ ENTRY(error_entry) /* * Pretend that the exception came from user mode: set up pt_regs - * as if we faulted immediately after IRET and clear EBX so that - * error_exit knows that we will be returning to user mode. + * as if we faulted immediately after IRET. */ mov %rsp, %rdi call fixup_bad_iret mov %rax, %rsp - decl %ebx jmp .Lerror_entry_from_usermode_after_swapgs END(error_entry) - -/* - * On entry, EBX is a "return to kernel mode" flag: - * 1: already in kernel mode, don't need SWAPGS - * 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode - */ ENTRY(error_exit) UNWIND_HINT_REGS DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF - testl %ebx, %ebx - jnz retint_kernel + testb $3, CS(%rsp) + jz retint_kernel jmp retint_user END(error_exit) -- cgit v1.2.3 From c0dc373a780f4ec63e45a573b9551763abd8cd1a Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 17 Jul 2018 16:16:00 -0400 Subject: locking/pvqspinlock/x86: Use LOCK_PREFIX in __pv_queued_spin_unlock() assembly code The LOCK_PREFIX macro should be used in the __raw_callee_save___pv_queued_spin_unlock() assembly code, so that the lock prefix can be patched out on UP systems. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Joe Mario Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1531858560-21547-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/qspinlock_paravirt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 9ef5ee03d2d7..159622ee0674 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -43,7 +43,7 @@ asm (".pushsection .text;" "push %rdx;" "mov $0x1,%eax;" "xor %edx,%edx;" - "lock cmpxchg %dl,(%rdi);" + LOCK_PREFIX "cmpxchg %dl,(%rdi);" "cmp $0x1,%al;" "jne .slowpath;" "pop %rdx;" -- cgit v1.2.3 From 6cbc304f2f360f25cc8607817239d6f4a2fd3dc5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 May 2018 15:48:41 +0200 Subject: perf/x86/intel: Fix unwind errors from PEBS entries (mk-II) Vince reported the perf_fuzzer giving various unwinder warnings and Josh reported: > Deja vu. Most of these are related to perf PEBS, similar to the > following issue: > > b8000586c90b ("perf/x86/intel: Cure bogus unwind from PEBS entries") > > This is basically the ORC version of that. setup_pebs_sample_data() is > assembling a franken-pt_regs which ORC isn't happy about. RIP is > inconsistent with some of the other registers (like RSP and RBP). And where the previous unwinder only needed BP,SP ORC also requires IP. But we cannot spoof IP because then the sample will get displaced, entirely negating the point of PEBS. So cure the whole thing differently by doing the unwind early; this does however require a means to communicate we did the unwind early. We (ab)use an unused sample_type bit for this, which we set on events that fill out the data->callchain before the normal perf_prepare_sample(). Debugged-by: Josh Poimboeuf Reported-by: Vince Weaver Tested-by: Josh Poimboeuf Tested-by: Prashant Bhole Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 3 +++ arch/x86/events/intel/ds.c | 25 +++++++++++-------------- include/linux/perf_event.h | 1 + include/uapi/linux/perf_event.h | 2 ++ kernel/events/core.c | 6 ++++-- 5 files changed, 21 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 707b2a96e516..86f0c15dcc2d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2997,6 +2997,9 @@ static int intel_pmu_hw_config(struct perf_event *event) } if (x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); + + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY; } if (needs_branch_stack(event)) { diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 8cf03f101938..8dbba77e0518 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1185,17 +1185,21 @@ static void setup_pebs_sample_data(struct perf_event *event, data->data_src.val = val; } + /* + * We must however always use iregs for the unwinder to stay sane; the + * record BP,SP,IP can point into thin air when the record is from a + * previous PMI context or an (I)RET happend between the record and + * PMI. + */ + if (sample_type & PERF_SAMPLE_CALLCHAIN) + data->callchain = perf_callchain(event, iregs); + /* * We use the interrupt regs as a base because the PEBS record does not * contain a full regs set, specifically it seems to lack segment * descriptors, which get used by things like user_mode(). * * In the simple case fix up only the IP for PERF_SAMPLE_IP. - * - * We must however always use BP,SP from iregs for the unwinder to stay - * sane; the record BP,SP can point into thin air when the record is - * from a previous PMI context or an (I)RET happend between the record - * and PMI. */ *regs = *iregs; @@ -1214,15 +1218,8 @@ static void setup_pebs_sample_data(struct perf_event *event, regs->si = pebs->si; regs->di = pebs->di; - /* - * Per the above; only set BP,SP if we don't need callchains. - * - * XXX: does this make sense? - */ - if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { - regs->bp = pebs->bp; - regs->sp = pebs->sp; - } + regs->bp = pebs->bp; + regs->sp = pebs->sp; #ifndef CONFIG_X86_32 regs->r8 = pebs->r8; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1fa12887ec02..87f6db437e4a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1130,6 +1130,7 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct extern struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, u32 max_stack, bool crosstask, bool add_mark); +extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs); extern int get_callchain_buffers(int max_stack); extern void put_callchain_buffers(void); diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b8e288a1f740..eeb787b1c53c 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -143,6 +143,8 @@ enum perf_event_sample_format { PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */ + + __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, }; /* diff --git a/kernel/events/core.c b/kernel/events/core.c index 8f0434a9951a..cdb32cf8e33c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6343,7 +6343,7 @@ static u64 perf_virt_to_phys(u64 virt) static struct perf_callchain_entry __empty_callchain = { .nr = 0, }; -static struct perf_callchain_entry * +struct perf_callchain_entry * perf_callchain(struct perf_event *event, struct pt_regs *regs) { bool kernel = !event->attr.exclude_callchain_kernel; @@ -6382,7 +6382,9 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_CALLCHAIN) { int size = 1; - data->callchain = perf_callchain(event, regs); + if (!(sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY)) + data->callchain = perf_callchain(event, regs); + size += data->callchain->nr; header->size += size * sizeof(u64); -- cgit v1.2.3 From 92a4728608a8fd228c572bc8ff50dd98aa0ddf2a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Jul 2018 16:08:27 -0700 Subject: x86/boot: Fix if_changed build flip/flop bug Dirk Gouders reported that two consecutive "make" invocations on an already compiled tree will show alternating behaviors: $ make CALL scripts/checksyscalls.sh DESCEND objtool CHK include/generated/compile.h DATAREL arch/x86/boot/compressed/vmlinux Kernel: arch/x86/boot/bzImage is ready (#48) Building modules, stage 2. MODPOST 165 modules $ make CALL scripts/checksyscalls.sh DESCEND objtool CHK include/generated/compile.h LD arch/x86/boot/compressed/vmlinux ZOFFSET arch/x86/boot/zoffset.h AS arch/x86/boot/header.o LD arch/x86/boot/setup.elf OBJCOPY arch/x86/boot/setup.bin OBJCOPY arch/x86/boot/vmlinux.bin BUILD arch/x86/boot/bzImage Setup is 15644 bytes (padded to 15872 bytes). System is 6663 kB CRC 3eb90f40 Kernel: arch/x86/boot/bzImage is ready (#48) Building modules, stage 2. MODPOST 165 modules He bisected it back to: commit 98f78525371b ("x86/boot: Refuse to build with data relocations") The root cause was the use of the "if_changed" kbuild function multiple times for the same target. It was designed to only be used once per target, otherwise it will effectively always trigger, flipping back and forth between the two commands getting recorded by "if_changed". Instead, this patch merges the two commands into a single function to get stable build artifacts (i.e. .vmlinux.cmd), and a single build behavior. Bisected-and-Reported-by: Dirk Gouders Fix-Suggested-by: Masahiro Yamada Signed-off-by: Kees Cook Reviewed-by: Masahiro Yamada Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180724230827.GA37823@beast Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index fa42f895fdde..169c2feda14a 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -106,9 +106,13 @@ define cmd_check_data_rel done endef +# We need to run two commands under "if_changed", so merge them into a +# single invocation. +quiet_cmd_check-and-link-vmlinux = LD $@ + cmd_check-and-link-vmlinux = $(cmd_check_data_rel); $(cmd_ld) + $(obj)/vmlinux: $(vmlinux-objs-y) FORCE - $(call if_changed,check_data_rel) - $(call if_changed,ld) + $(call if_changed,check-and-link-vmlinux) OBJCOPYFLAGS_vmlinux.bin := -R .comment -S $(obj)/vmlinux.bin: vmlinux FORCE -- cgit v1.2.3 From dc0e36581eb2da1aa3c63ceeff0f10ef1e899b2a Mon Sep 17 00:00:00 2001 From: Dirk Mueller Date: Wed, 25 Jul 2018 13:10:28 +0200 Subject: arm64: Check for errata before evaluating cpu features Since commit d3aec8a28be3b8 ("arm64: capabilities: Restrict KPTI detection to boot-time CPUs") we rely on errata flags being already populated during feature enumeration. The order of errata and features was flipped as part of commit ed478b3f9e4a ("arm64: capabilities: Group handling of features and errata workarounds"). Return to the orginal order of errata and feature evaluation to ensure errata flags are present during feature evaluation. Fixes: ed478b3f9e4a ("arm64: capabilities: Group handling of features and errata workarounds") CC: Suzuki K Poulose CC: Marc Zyngier Signed-off-by: Dirk Mueller Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f24892a40d2c..c6d80743f4ed 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1351,9 +1351,9 @@ static void __update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, static void update_cpu_capabilities(u16 scope_mask) { - __update_cpu_capabilities(arm64_features, scope_mask, "detected:"); __update_cpu_capabilities(arm64_errata, scope_mask, "enabling workaround for"); + __update_cpu_capabilities(arm64_features, scope_mask, "detected:"); } static int __enable_cpu_capability(void *arg) @@ -1408,8 +1408,8 @@ __enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps, static void __init enable_cpu_capabilities(u16 scope_mask) { - __enable_cpu_capabilities(arm64_features, scope_mask); __enable_cpu_capabilities(arm64_errata, scope_mask); + __enable_cpu_capabilities(arm64_features, scope_mask); } /* -- cgit v1.2.3 From 7b0eb6b41a08fa1fa0d04b1c53becd62b5fbfaee Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 23 Jul 2018 10:18:23 -0400 Subject: arm64: fix vmemmap BUILD_BUG_ON() triggering on !vmemmap setups Arnd reports the following arm64 randconfig build error with the PSI patches that add another page flag: /git/arm-soc/arch/arm64/mm/init.c: In function 'mem_init': /git/arm-soc/include/linux/compiler.h:357:38: error: call to '__compiletime_assert_618' declared with attribute error: BUILD_BUG_ON failed: sizeof(struct page) > (1 << STRUCT_PAGE_MAX_SHIFT) The additional page flag causes other information stored in page->flags to get bumped into their own struct page member: #if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT #else #define LAST_CPUPID_WIDTH 0 #endif #if defined(CONFIG_NUMA_BALANCING) && LAST_CPUPID_WIDTH == 0 #define LAST_CPUPID_NOT_IN_PAGE_FLAGS #endif which in turn causes the struct page size to exceed the size set in STRUCT_PAGE_MAX_SHIFT. This value is an an estimate used to size the VMEMMAP page array according to address space and struct page size. However, the check is performed - and triggers here - on a !VMEMMAP config, which consumes an additional 22 page bits for the sparse section id. When VMEMMAP is enabled, those bits are returned, cpupid doesn't need its own member, and the page passes the VMEMMAP check. Restrict that check to the situation it was meant to check: that we are sizing the VMEMMAP page array correctly. Says Arnd: Further experiments show that the build error already existed before, but was only triggered with larger values of CONFIG_NR_CPU and/or CONFIG_NODES_SHIFT that might be used in actual configurations but not in randconfig builds. With longer CPU and node masks, I could recreate the problem with kernels as old as linux-4.7 when arm64 NUMA support got added. Reported-by: Arnd Bergmann Tested-by: Arnd Bergmann Cc: stable@vger.kernel.org Fixes: 1a2db300348b ("arm64, numa: Add NUMA support for arm64 platforms.") Fixes: 3e1907d5bf5a ("arm64: mm: move vmemmap region right below the linear region") Signed-off-by: Johannes Weiner Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 325cfb3b858a..9abf8a1e7b25 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -611,11 +611,13 @@ void __init mem_init(void) BUILD_BUG_ON(TASK_SIZE_32 > TASK_SIZE_64); #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP /* * Make sure we chose the upper bound of sizeof(struct page) - * correctly. + * correctly when sizing the VMEMMAP array. */ BUILD_BUG_ON(sizeof(struct page) > (1 << STRUCT_PAGE_MAX_SHIFT)); +#endif if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { extern int sysctl_overcommit_memory; -- cgit v1.2.3 From 9e4e5b5c86661e767f3074bedbbf2ca59f8fbd18 Mon Sep 17 00:00:00 2001 From: Wang YanQing Date: Thu, 26 Jul 2018 00:46:11 +0800 Subject: bpf, x32: Fix regression caused by commit 24dea04767e6 Commit 24dea04767e6 ("bpf, x32: remove ld_abs/ld_ind") removed the 4 /* Extra space for skb_copy_bits buffer */ from _STACK_SIZE, but it didn't fix the concerned code in emit_prologue and emit_epilogue, and this error will bring very strange kernel runtime errors. This patch fixes it. Fixes: 24dea04767e6 ("bpf, x32: remove ld_abs/ld_ind") Reported-by: Meelis Roos Bisected-by: Meelis Roos Signed-off-by: Wang YanQing Signed-off-by: Daniel Borkmann --- arch/x86/net/bpf_jit_comp32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 55799873ebe5..8f6cc71e0848 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -1441,8 +1441,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth) /* sub esp,STACK_SIZE */ EMIT2_off32(0x81, 0xEC, STACK_SIZE); - /* sub ebp,SCRATCH_SIZE+4+12*/ - EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 16); + /* sub ebp,SCRATCH_SIZE+12*/ + EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 12); /* xor ebx,ebx */ EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX)); @@ -1475,8 +1475,8 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth) /* mov edx,dword ptr [ebp+off]*/ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1])); - /* add ebp,SCRATCH_SIZE+4+12*/ - EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 16); + /* add ebp,SCRATCH_SIZE+12*/ + EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 12); /* mov ebx,dword ptr [ebp-12]*/ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12); -- cgit v1.2.3 From 2c4541e24c55e2847bede93e33d749280edd429a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 26 Jul 2018 16:37:30 -0700 Subject: mm: use vma_init() to initialize VMAs on stack and data segments Make sure to initialize all VMAs properly, not only those which come from vm_area_cachep. Link: http://lkml.kernel.org/r/20180724121139.62570-3-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Acked-by: Linus Torvalds Reviewed-by: Andrew Morton Cc: Dmitry Vyukov Cc: Oleg Nesterov Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/process.c | 1 + arch/arm/mach-rpc/ecard.c | 2 +- arch/arm64/include/asm/tlb.h | 4 +++- arch/arm64/mm/hugetlbpage.c | 7 +++++-- arch/ia64/include/asm/tlb.h | 2 +- arch/ia64/mm/init.c | 2 +- arch/x86/um/mem_32.c | 2 +- fs/hugetlbfs/inode.c | 2 ++ mm/mempolicy.c | 1 + mm/shmem.c | 1 + 10 files changed, 17 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 225d1c58d2de..d9c299133111 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -338,6 +338,7 @@ static struct vm_area_struct gate_vma = { static int __init gate_vma_init(void) { + vma_init(&gate_vma, NULL); gate_vma.vm_page_prot = PAGE_READONLY_EXEC; return 0; } diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c index 39aef4876ed4..8db62cc54a6a 100644 --- a/arch/arm/mach-rpc/ecard.c +++ b/arch/arm/mach-rpc/ecard.c @@ -237,8 +237,8 @@ static void ecard_init_pgtables(struct mm_struct *mm) memcpy(dst_pgd, src_pgd, sizeof(pgd_t) * (EASI_SIZE / PGDIR_SIZE)); + vma_init(&vma, mm); vma.vm_flags = VM_EXEC; - vma.vm_mm = mm; flush_tlb_range(&vma, IO_START, IO_START + IO_SIZE); flush_tlb_range(&vma, EASI_START, EASI_START + EASI_SIZE); diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index ffdaea7954bb..d87f2d646caa 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -37,7 +37,9 @@ static inline void __tlb_remove_table(void *_table) static inline void tlb_flush(struct mmu_gather *tlb) { - struct vm_area_struct vma = { .vm_mm = tlb->mm, }; + struct vm_area_struct vma; + + vma_init(&vma, tlb->mm); /* * The ASID allocator will either invalidate the ASID or mark diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index ecc6818191df..1854e49aa18a 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -108,11 +108,13 @@ static pte_t get_clear_flush(struct mm_struct *mm, unsigned long pgsize, unsigned long ncontig) { - struct vm_area_struct vma = { .vm_mm = mm }; + struct vm_area_struct vma; pte_t orig_pte = huge_ptep_get(ptep); bool valid = pte_valid(orig_pte); unsigned long i, saddr = addr; + vma_init(&vma, mm); + for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { pte_t pte = ptep_get_and_clear(mm, addr, ptep); @@ -145,9 +147,10 @@ static void clear_flush(struct mm_struct *mm, unsigned long pgsize, unsigned long ncontig) { - struct vm_area_struct vma = { .vm_mm = mm }; + struct vm_area_struct vma; unsigned long i, saddr = addr; + vma_init(&vma, mm); for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) pte_clear(mm, addr, ptep); diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index 44f0ac0df308..db89e7306081 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -120,7 +120,7 @@ ia64_tlb_flush_mmu_tlbonly(struct mmu_gather *tlb, unsigned long start, unsigned */ struct vm_area_struct vma; - vma.vm_mm = tlb->mm; + vma_init(&vma, tlb->mm); /* flush the address range from the tlb: */ flush_tlb_range(&vma, start, end); /* now flush the virt. page-table area mapping the address range: */ diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index bdb14a369137..e6c6dfd98de2 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -273,7 +273,7 @@ static struct vm_area_struct gate_vma; static int __init gate_vma_init(void) { - gate_vma.vm_mm = NULL; + vma_init(&gate_vma, NULL); gate_vma.vm_start = FIXADDR_USER_START; gate_vma.vm_end = FIXADDR_USER_END; gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c index 744afdc18cf3..56c44d865f7b 100644 --- a/arch/x86/um/mem_32.c +++ b/arch/x86/um/mem_32.c @@ -16,7 +16,7 @@ static int __init gate_vma_init(void) if (!FIXADDR_USER_START) return 0; - gate_vma.vm_mm = NULL; + vma_init(&gate_vma, NULL); gate_vma.vm_start = FIXADDR_USER_START; gate_vma.vm_end = FIXADDR_USER_END; gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index d508c7844681..40d4c66c7751 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -411,6 +411,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, bool truncate_op = (lend == LLONG_MAX); memset(&pseudo_vma, 0, sizeof(struct vm_area_struct)); + vma_init(&pseudo_vma, current->mm); pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED); pagevec_init(&pvec); next = start; @@ -595,6 +596,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, * as input to create an allocation policy. */ memset(&pseudo_vma, 0, sizeof(struct vm_area_struct)); + vma_init(&pseudo_vma, mm); pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED); pseudo_vma.vm_file = file; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9ac49ef17b4e..01f1a14facc4 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2505,6 +2505,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) /* Create pseudo-vma that contains just the policy */ memset(&pvma, 0, sizeof(struct vm_area_struct)); + vma_init(&pvma, NULL); pvma.vm_end = TASK_SIZE; /* policy covers entire file */ mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ diff --git a/mm/shmem.c b/mm/shmem.c index 2cab84403055..41b9bbf24e16 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1421,6 +1421,7 @@ static void shmem_pseudo_vma_init(struct vm_area_struct *vma, { /* Create a pseudo vma that just contains the policy */ memset(vma, 0, sizeof(*vma)); + vma_init(vma, NULL); /* Bias interleave by inode number to distribute better across nodes */ vma->vm_pgoff = index + info->vfs_inode.i_ino; vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index); -- cgit v1.2.3 From d97e5e6160c0e0a23963ec198c7cb1c69e6bf9e8 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 26 Jul 2018 16:37:45 -0700 Subject: kvm, mm: account shadow page tables to kmemcg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The size of kvm's shadow page tables corresponds to the size of the guest virtual machines on the system. Large VMs can spend a significant amount of memory as shadow page tables which can not be left as system memory overhead. So, account shadow page tables to the kmemcg. [shakeelb@google.com: replace (GFP_KERNEL|__GFP_ACCOUNT) with GFP_KERNEL_ACCOUNT] Link: http://lkml.kernel.org/r/20180629140224.205849-1-shakeelb@google.com Link: http://lkml.kernel.org/r/20180627181349.149778-1-shakeelb@google.com Signed-off-by: Shakeel Butt Cc: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Paolo Bonzini Cc: Greg Thelen Cc: Radim Krčmář Cc: Peter Feiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d594690d8b95..6b8f11521c41 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -890,7 +890,7 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, if (cache->nobjs >= min) return 0; while (cache->nobjs < ARRAY_SIZE(cache->objects)) { - page = (void *)__get_free_page(GFP_KERNEL); + page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT); if (!page) return -ENOMEM; cache->objects[cache->nobjs++] = page; -- cgit v1.2.3 From 58d1131777a4b7c228267b809bd88f7be66edcfb Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Fri, 20 Jul 2018 11:02:23 -0700 Subject: iommu: Add config option to set passthrough as default This allows the default behavior to be controlled by a kernel config option instead of changing the commandline for the kernel to include "iommu.passthrough=on" or "iommu=pt" on machines where this is desired. Likewise, for machines where this config option is enabled, it can be disabled at boot time with "iommu.passthrough=off" or "iommu=nopt". Also corrected iommu=pt documentation for IA-64, since it has no code that parses iommu= at all. Signed-off-by: Olof Johansson Signed-off-by: Joerg Roedel --- Documentation/admin-guide/kernel-parameters.txt | 3 ++- arch/x86/kernel/pci-dma.c | 8 ++++++++ drivers/iommu/Kconfig | 11 +++++++++++ drivers/iommu/iommu.c | 4 ++++ 4 files changed, 25 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index efc7aa7a0670..d79c14d0b1f0 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1716,7 +1716,8 @@ merge nomerge soft - pt [x86, IA-64] + pt [x86] + nopt [x86] nobypass [PPC/POWERNV] Disable IOMMU bypass, using IOMMU for PCI devices. diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index ab5d9dd668d2..0acb135de7fb 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -40,8 +40,14 @@ int iommu_detected __read_mostly = 0; * devices and allow every device to access to whole physical memory. This is * useful if a user wants to use an IOMMU only for KVM device assignment to * guests and not for driver dma translation. + * It is also possible to disable by default in kernel config, and enable with + * iommu=nopt at boot time. */ +#ifdef CONFIG_IOMMU_DEFAULT_PASSTHROUGH +int iommu_pass_through __read_mostly = 1; +#else int iommu_pass_through __read_mostly; +#endif extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; @@ -135,6 +141,8 @@ static __init int iommu_setup(char *p) #endif if (!strncmp(p, "pt", 2)) iommu_pass_through = 1; + if (!strncmp(p, "nopt", 4)) + iommu_pass_through = 0; gart_parse_options(p); diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 8d0a2886658f..08c957759e74 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -70,6 +70,17 @@ config IOMMU_DEBUGFS debug/iommu directory, and then populate a subdirectory with entries as required. +config IOMMU_DEFAULT_PASSTHROUGH + bool "IOMMU passthrough by default" + depends on IOMMU_API + help + Enable passthrough by default, removing the need to pass in + iommu.passthrough=on or iommu=pt through command line. If this + is enabled, you can still disable with iommu.passthrough=off + or iommu=nopt depending on the architecture. + + If unsure, say N here. + config IOMMU_IOVA tristate diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 7f50013b0bcf..f3698006cb53 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -36,7 +36,11 @@ static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); +#ifdef CONFIG_IOMMU_DEFAULT_PASSTHROUGH +static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; +#else static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA; +#endif struct iommu_callback_data { const struct iommu_ops *ops; -- cgit v1.2.3 From d5ea019f8a381f88545bb26993b62ec24a2796b7 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Fri, 27 Jul 2018 13:13:39 +0200 Subject: Revert "MIPS: BCM47XX: Enable 74K Core ExternalSync for PCIe erratum" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 2a027b47dba6 ("MIPS: BCM47XX: Enable 74K Core ExternalSync for PCIe erratum"). Enabling ExternalSync caused a regression for BCM4718A1 (used e.g. in Netgear E3000 and ASUS RT-N16): it simply hangs during PCIe initialization. It's likely that BCM4717A1 is also affected. I didn't notice that earlier as the only BCM47XX devices with PCIe I own are: 1) BCM4706 with 2 x 14e4:4331 2) BCM4706 with 14e4:4360 and 14e4:4331 it appears that BCM4706 is unaffected. While BCM5300X-ES300-RDS.pdf seems to document that erratum and its workarounds (according to quotes provided by Tokunori) it seems not even Broadcom follows them. According to the provided info Broadcom should define CONF7_ES in their SDK's mipsinc.h and implement workaround in the si_mips_init(). Checking both didn't reveal such code. It *could* mean Broadcom also had some problems with the given workaround. Signed-off-by: Rafał Miłecki Signed-off-by: Paul Burton Reported-by: Michael Marley Patchwork: https://patchwork.linux-mips.org/patch/20032/ URL: https://bugs.openwrt.org/index.php?do=details&task_id=1688 Cc: Tokunori Ikegami Cc: Hauke Mehrtens Cc: Chris Packham Cc: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org --- arch/mips/bcm47xx/setup.c | 6 ------ arch/mips/include/asm/mipsregs.h | 3 --- 2 files changed, 9 deletions(-) (limited to 'arch') diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index 8c9cbf13d32a..6054d49e608e 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -212,12 +212,6 @@ static int __init bcm47xx_cpu_fixes(void) */ if (bcm47xx_bus.bcma.bus.chipinfo.id == BCMA_CHIP_ID_BCM4706) cpu_wait = NULL; - - /* - * BCM47XX Erratum "R10: PCIe Transactions Periodically Fail" - * Enable ExternalSync for sync instruction to take effect - */ - set_c0_config7(MIPS_CONF7_ES); break; #endif } diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 0bc270806ec5..ae461d91cd1f 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -681,8 +681,6 @@ #define MIPS_CONF7_WII (_ULCAST_(1) << 31) #define MIPS_CONF7_RPS (_ULCAST_(1) << 2) -/* ExternalSync */ -#define MIPS_CONF7_ES (_ULCAST_(1) << 8) #define MIPS_CONF7_IAR (_ULCAST_(1) << 10) #define MIPS_CONF7_AR (_ULCAST_(1) << 16) @@ -2767,7 +2765,6 @@ __BUILD_SET_C0(status) __BUILD_SET_C0(cause) __BUILD_SET_C0(config) __BUILD_SET_C0(config5) -__BUILD_SET_C0(config7) __BUILD_SET_C0(intcontrol) __BUILD_SET_C0(intctl) __BUILD_SET_C0(srsmap) -- cgit v1.2.3 From 4c612add7b18844ddd733ebdcbe754520155999b Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Tue, 24 Jul 2018 17:13:02 +0300 Subject: ARC: dma [non IOC]: fix arc_dma_sync_single_for_(device|cpu) ARC backend for dma_sync_single_for_(device|cpu) was broken as it was not honoring the @dir argument and simply forcing it based on the call: - arc_dma_sync_single_for_device(dir) assumed DMA_TO_DEVICE (cache wback) - arc_dma_sync_single_for_cpu(dir) assumed DMA_FROM_DEVICE (cache inv) This is not true given the DMA API programming model and has been discussed here [1] in some detail. Interestingly while the deficiency has been there forever, it only started showing up after 4.17 dma common ops rework, commit a8eb92d02dd7 ("arc: fix arc_dma_{map,unmap}_page") which wired up these calls under the more commonly used dma_map_page API triggering the issue. [1]: https://lkml.org/lkml/2018/5/18/979 Fixes: commit a8eb92d02dd7 ("arc: fix arc_dma_{map,unmap}_page") Cc: stable@kernel.org # v4.17+ Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta [vgupta: reworked changelog] --- arch/arc/mm/dma.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 8c1071840979..ec47e6079f5d 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -129,14 +129,59 @@ int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, return ret; } +/* + * Cache operations depending on function and direction argument, inspired by + * https://lkml.org/lkml/2018/5/18/979 + * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20] + * dma-mapping: provide a generic dma-noncoherent implementation)" + * + * | map == for_device | unmap == for_cpu + * |---------------------------------------------------------------- + * TO_DEV | writeback writeback | none none + * FROM_DEV | invalidate invalidate | invalidate* invalidate* + * BIDIR | writeback+inv writeback+inv | invalidate invalidate + * + * [*] needed for CPU speculative prefetches + * + * NOTE: we don't check the validity of direction argument as it is done in + * upper layer functions (in include/linux/dma-mapping.h) + */ + void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - dma_cache_wback(paddr, size); + switch (dir) { + case DMA_TO_DEVICE: + dma_cache_wback(paddr, size); + break; + + case DMA_FROM_DEVICE: + dma_cache_inv(paddr, size); + break; + + case DMA_BIDIRECTIONAL: + dma_cache_wback_inv(paddr, size); + break; + + default: + break; + } } void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - dma_cache_inv(paddr, size); + switch (dir) { + case DMA_TO_DEVICE: + break; + + /* FROM_DEVICE invalidate needed if speculative CPU prefetch only */ + case DMA_FROM_DEVICE: + case DMA_BIDIRECTIONAL: + dma_cache_inv(paddr, size); + break; + + default: + break; + } } -- cgit v1.2.3 From eb2777397fd83a4a7eaa26984d09d3babb845d2a Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 26 Jul 2018 16:15:43 +0300 Subject: ARC: dma [non-IOC] setup SMP_CACHE_BYTES and cache_line_size As for today we don't setup SMP_CACHE_BYTES and cache_line_size for ARC, so they are set to L1_CACHE_BYTES by default. L1 line length (L1_CACHE_BYTES) might be easily smaller than L2 line (which is usually the case BTW). This breaks code. For example this breaks ethernet infrastructure on HSDK/AXS103 boards with IOC disabled, involving manual cache flushes Functions which alloc and manage sk_buff packet data area rely on SMP_CACHE_BYTES define. In the result we can share last L2 cache line in sk_buff linear packet data area between DMA buffer and some useful data in other structure. So we can lose this data when we invalidate DMA buffer. sk_buff linear packet data area | | | skb->end skb->tail V | | V V ----------------------------------------------. packet data | | ----------------------------------------------. ---------------------.--------------------------------------------------. SLC line | SLC (L2 cache) line (128B) | ---------------------.--------------------------------------------------. ^ ^ | | These cache lines will be invalidated when we invalidate skb linear packet data area before DMA transaction starting. This leads to issues painful to debug as it reproduces only if (sk_buff->end - sk_buff->tail) < SLC_LINE_SIZE and if we have some useful data right after sk_buff->end. Fix that by hardcode SMP_CACHE_BYTES to max line length we may have. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 3 +++ arch/arc/include/asm/cache.h | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 9cf59fc60eab..5151d81476a1 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -50,6 +50,9 @@ config ARC select HAVE_KERNEL_LZMA select ARCH_HAS_PTE_SPECIAL +config ARCH_HAS_CACHE_LINE_SIZE + def_bool y + config MIGHT_HAVE_PCI bool diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index 8486f328cc5d..ff7d3232764a 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -48,7 +48,9 @@ }) /* Largest line length for either L1 or L2 is 128 bytes */ -#define ARCH_DMA_MINALIGN 128 +#define SMP_CACHE_BYTES 128 +#define cache_line_size() SMP_CACHE_BYTES +#define ARCH_DMA_MINALIGN SMP_CACHE_BYTES extern void arc_cache_init(void); extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); -- cgit v1.2.3 From afc9f65e01cd114cb2cedf544d22239116ce0cc6 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 13 Jul 2018 11:12:22 +0100 Subject: ARM: 8781/1: Fix Thumb-2 syscall return for binutils 2.29+ When building the kernel as Thumb-2 with binutils 2.29 or newer, if the assembler has seen the .type directive (via ENDPROC()) for a symbol, it automatically handles the setting of the lowest bit when the symbol is used with ADR. The badr macro on the other hand handles this lowest bit manually. This leads to a jump to a wrong address in the wrong state in the syscall return path: Internal error: Oops - undefined instruction: 0 [#2] SMP THUMB2 Modules linked in: CPU: 0 PID: 652 Comm: modprobe Tainted: G D 4.18.0-rc3+ #8 PC is at ret_fast_syscall+0x4/0x62 LR is at sys_brk+0x109/0x128 pc : [<80101004>] lr : [<801c8a35>] psr: 60000013 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 50c5387d Table: 9e82006a DAC: 00000051 Process modprobe (pid: 652, stack limit = 0x(ptrval)) 80101000 : 80101000: b672 cpsid i 80101002: f8d9 2008 ldr.w r2, [r9, #8] 80101006: f1b2 4ffe cmp.w r2, #2130706432 ; 0x7f000000 80101184 : 80101184: f8d9 a000 ldr.w sl, [r9] 80101188: e92d 0030 stmdb sp!, {r4, r5} 8010118c: f01a 0ff0 tst.w sl, #240 ; 0xf0 80101190: d117 bne.n 801011c2 <__sys_trace> 80101192: 46ba mov sl, r7 80101194: f5ba 7fc8 cmp.w sl, #400 ; 0x190 80101198: bf28 it cs 8010119a: f04f 0a00 movcs.w sl, #0 8010119e: f3af 8014 nop.w {20} 801011a2: f2af 1ea2 subw lr, pc, #418 ; 0x1a2 To fix this, add a new symbol name which doesn't have ENDPROC used on it and use that with badr. We can't remove the badr usage since that would would cause breakage with older binutils. Signed-off-by: Vincent Whitchurch Signed-off-by: Russell King --- arch/arm/kernel/entry-common.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 106a1466518d..746565a876dc 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -48,6 +48,7 @@ saved_pc .req lr * from those features make this path too inefficient. */ ret_fast_syscall: +__ret_fast_syscall: UNWIND(.fnstart ) UNWIND(.cantunwind ) disable_irq_notrace @ disable interrupts @@ -78,6 +79,7 @@ fast_work_pending: * call. */ ret_fast_syscall: +__ret_fast_syscall: UNWIND(.fnstart ) UNWIND(.cantunwind ) str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 @@ -255,7 +257,7 @@ local_restart: tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls? bne __sys_trace - invoke_syscall tbl, scno, r10, ret_fast_syscall + invoke_syscall tbl, scno, r10, __ret_fast_syscall add r1, sp, #S_OFF 2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) -- cgit v1.2.3 From 386177da9e601ed176d54c04324d9ebf44c70620 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 26 Jul 2018 16:15:44 +0300 Subject: ARC: add SMP_CACHE_BYTES value validate Check that SMP_CACHE_BYTES (and hence ARCH_DMA_MINALIGN) is larger or equal to any cache line length by comparing it with values previously read from ARC cache BCR registers. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/mm/cache.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 9dbe645ee127..b95365e1253a 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -1246,6 +1246,16 @@ void __init arc_cache_init_master(void) } } + /* + * Check that SMP_CACHE_BYTES (and hence ARCH_DMA_MINALIGN) is larger + * or equal to any cache line length. + */ + BUILD_BUG_ON_MSG(L1_CACHE_BYTES > SMP_CACHE_BYTES, + "SMP_CACHE_BYTES must be >= any cache line length"); + if (is_isa_arcv2() && (l2_line_sz > SMP_CACHE_BYTES)) + panic("L2 Cache line [%d] > kernel Config [%d]\n", + l2_line_sz, SMP_CACHE_BYTES); + /* Note that SLC disable not formally supported till HS 3.0 */ if (is_isa_arcv2() && l2_line_sz && !slc_enable) arc_slc_disable(); -- cgit v1.2.3 From 05b466bf846d2e8d2f0baf8dfd81a42cc933e237 Mon Sep 17 00:00:00 2001 From: Ofer Levi Date: Sat, 28 Jul 2018 10:54:41 +0300 Subject: ARC: [plat-eznps] Add missing struct nps_host_reg_aux_dpc Fixing compilation issue caused by missing struct nps_host_reg_aux_dpc definition. Fixes: 3f9cd874dcc87 ("ARC: [plat-eznps] avoid toggling of DPC register") Reported-by: Randy Dunlap Signed-off-by: Ofer Levi Signed-off-by: Vineet Gupta --- arch/arc/plat-eznps/include/plat/ctop.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h index 0c7d11022d0f..bd34b96bc591 100644 --- a/arch/arc/plat-eznps/include/plat/ctop.h +++ b/arch/arc/plat-eznps/include/plat/ctop.h @@ -143,6 +143,15 @@ struct nps_host_reg_gim_p_int_dst { }; /* AUX registers definition */ +struct nps_host_reg_aux_dpc { + union { + struct { + u32 ien:1, men:1, hen:1, reserved:29; + }; + u32 value; + }; +}; + struct nps_host_reg_aux_udmc { union { struct { -- cgit v1.2.3 From b1f32ce1c3d2c11959b7e6a2c58dc5197c581966 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 29 Jul 2018 11:10:51 -0700 Subject: arc: [plat-eznps] fix data type errors in platform headers Add to fix build errors. Both ctop.h and use u32 types and cause many errors. Examples: ../include/soc/nps/common.h:71:4: error: unknown type name 'u32' u32 __reserved:20, cluster:4, core:4, thread:4; ../include/soc/nps/common.h:76:3: error: unknown type name 'u32' u32 value; ../include/soc/nps/common.h:124:4: error: unknown type name 'u32' u32 base:8, cl_x:4, cl_y:4, ../include/soc/nps/common.h:127:3: error: unknown type name 'u32' u32 value; ../arch/arc/plat-eznps/include/plat/ctop.h:83:4: error: unknown type name 'u32' u32 gen:1, gdis:1, clk_gate_dis:1, asb:1, ../arch/arc/plat-eznps/include/plat/ctop.h:86:3: error: unknown type name 'u32' u32 value; ../arch/arc/plat-eznps/include/plat/ctop.h:93:4: error: unknown type name 'u32' u32 csa:22, dmsid:6, __reserved:3, cs:1; ../arch/arc/plat-eznps/include/plat/ctop.h:95:3: error: unknown type name 'u32' u32 value; Cc: linux-snps-arc@lists.infradead.org Cc: Ofer Levi Reviewed-by: Leon Romanovsky Signed-off-by: Randy Dunlap Signed-off-by: Vineet Gupta --- arch/arc/plat-eznps/include/plat/ctop.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h index bd34b96bc591..4f6a1673b3a6 100644 --- a/arch/arc/plat-eznps/include/plat/ctop.h +++ b/arch/arc/plat-eznps/include/plat/ctop.h @@ -21,6 +21,7 @@ #error "Incorrect ctop.h include" #endif +#include #include /* core auxiliary registers */ -- cgit v1.2.3 From 9e2ea405543d9ddfe05b351f1679e53bd9c11f80 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Jul 2018 20:16:35 -0700 Subject: arc: [plat-eznps] fix printk warning in arc/plat-eznps/mtm.c Fix printk format warning in arch/arc/plat-eznps/mtm.c: In file included from ../include/linux/printk.h:7, from ../include/linux/kernel.h:14, from ../include/linux/list.h:9, from ../include/linux/smp.h:12, from ../arch/arc/plat-eznps/mtm.c:17: ../arch/arc/plat-eznps/mtm.c: In function 'set_mtm_hs_ctr': ../include/linux/kern_levels.h:5:18: warning: format '%d' expects argument of type 'int', but argument 2 has type 'long int' [-Wformat=] #define KERN_SOH "\001" /* ASCII Start Of Header */ ^~~~~~ ../include/linux/kern_levels.h:11:18: note: in expansion of macro 'KERN_SOH' #define KERN_ERR KERN_SOH "3" /* error conditions */ ^~~~~~~~ ../include/linux/printk.h:308:9: note: in expansion of macro 'KERN_ERR' printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) ^~~~~~~~ ../arch/arc/plat-eznps/mtm.c:166:3: note: in expansion of macro 'pr_err' pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n", ^~~~~~ ../arch/arc/plat-eznps/mtm.c:166:40: note: format string is defined here pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n", ~^ %ld The hs_ctr variable can just be int instead of long, so also change kstrtol() to kstrtoint() and leave the format string as %d. Also add 2 header files since they are used in mtm.c and we prefer not to depend on accidental/indirect #includes. Cc: linux-snps-arc@lists.infradead.org Cc: Ofer Levi Reviewed-by: Leon Romanovsky Signed-off-by: Randy Dunlap Signed-off-by: Vineet Gupta --- arch/arc/plat-eznps/mtm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arc/plat-eznps/mtm.c b/arch/arc/plat-eznps/mtm.c index 2388de3d09ef..ed0077ef666e 100644 --- a/arch/arc/plat-eznps/mtm.c +++ b/arch/arc/plat-eznps/mtm.c @@ -15,6 +15,8 @@ */ #include +#include +#include #include #include #include @@ -157,10 +159,10 @@ void mtm_enable_core(unsigned int cpu) /* Verify and set the value of the mtm hs counter */ static int __init set_mtm_hs_ctr(char *ctr_str) { - long hs_ctr; + int hs_ctr; int ret; - ret = kstrtol(ctr_str, 0, &hs_ctr); + ret = kstrtoint(ctr_str, 0, &hs_ctr); if (ret || hs_ctr > MT_HS_CNT_MAX || hs_ctr < MT_HS_CNT_MIN) { pr_err("** Invalid @nps_mtm_hs_ctr [%d] needs to be [%d:%d] (incl)\n", -- cgit v1.2.3 From 2423665ec53f2a29191b35382075e9834288a975 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Jul 2018 20:16:35 -0700 Subject: arc: fix build errors in arc/include/asm/delay.h Fix build errors in arch/arc/'s delay.h: - add "extern unsigned long loops_per_jiffy;" - add for "u64" In file included from ../drivers/infiniband/hw/cxgb3/cxio_hal.c:32: ../arch/arc/include/asm/delay.h: In function '__udelay': ../arch/arc/include/asm/delay.h:61:12: error: 'u64' undeclared (first use in this function) loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32; ^~~ In file included from ../drivers/infiniband/hw/cxgb3/cxio_hal.c:32: ../arch/arc/include/asm/delay.h: In function '__udelay': ../arch/arc/include/asm/delay.h:63:37: error: 'loops_per_jiffy' undeclared (first use in this function) loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32; ^~~~~~~~~~~~~~~ Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Cc: Elad Kanfi Cc: Leon Romanovsky Cc: Ofer Levi Signed-off-by: Vineet Gupta --- arch/arc/include/asm/delay.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h index d5da2115d78a..03d6bb0f4e13 100644 --- a/arch/arc/include/asm/delay.h +++ b/arch/arc/include/asm/delay.h @@ -17,8 +17,11 @@ #ifndef __ASM_ARC_UDELAY_H #define __ASM_ARC_UDELAY_H +#include #include /* HZ */ +extern unsigned long loops_per_jiffy; + static inline void __delay(unsigned long loops) { __asm__ __volatile__( -- cgit v1.2.3 From ec837d620c750c0d4996a907c8c4f7febe1bbeee Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Jul 2018 20:16:35 -0700 Subject: arc: fix type warnings in arc/mm/cache.c Fix type warnings in arch/arc/mm/cache.c. ../arch/arc/mm/cache.c: In function 'flush_anon_page': ../arch/arc/mm/cache.c:1062:55: warning: passing argument 2 of '__flush_dcache_page' makes integer from pointer without a cast [-Wint-conversion] __flush_dcache_page((phys_addr_t)page_address(page), page_address(page)); ^~~~~~~~~~~~~~~~~~ ../arch/arc/mm/cache.c:1013:59: note: expected 'long unsigned int' but argument is of type 'void *' void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr) ~~~~~~~~~~~~~~^~~~~ Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Cc: Elad Kanfi Cc: Leon Romanovsky Cc: Ofer Levi Signed-off-by: Vineet Gupta --- arch/arc/mm/cache.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index b95365e1253a..25c631942500 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -1038,7 +1038,7 @@ void flush_cache_mm(struct mm_struct *mm) void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, unsigned long pfn) { - unsigned int paddr = pfn << PAGE_SHIFT; + phys_addr_t paddr = pfn << PAGE_SHIFT; u_vaddr &= PAGE_MASK; @@ -1058,8 +1058,9 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long u_vaddr) { /* TBD: do we really need to clear the kernel mapping */ - __flush_dcache_page(page_address(page), u_vaddr); - __flush_dcache_page(page_address(page), page_address(page)); + __flush_dcache_page((phys_addr_t)page_address(page), u_vaddr); + __flush_dcache_page((phys_addr_t)page_address(page), + (phys_addr_t)page_address(page)); } -- cgit v1.2.3 From 6f57ed681ed817a4ec444e83f3aa2ad695d5ef34 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 6 Jun 2018 10:11:10 -0400 Subject: sparc/time: Add missing __init to init_tick_ops() Code that was added to force gcc not to inline any function that isn't explicitly declared as inline uncovered that init_tick_ops() isn't marked as "__init". It is only called by __init functions and more importantly it too calls an __init function which would require it to be __init as well. Link: http://lkml.kernel.org/r/201806060444.hdHcKOBy%fengguang.wu@intel.com Reported-by: kbuild test robot Signed-off-by: Steven Rostedt (VMware) Signed-off-by: David S. Miller --- arch/sparc/kernel/time_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 2ef8cfa9677e..f0eba72aa1ad 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -814,7 +814,7 @@ static void __init get_tick_patch(void) } } -static void init_tick_ops(struct sparc64_tick_ops *ops) +static void __init init_tick_ops(struct sparc64_tick_ops *ops) { unsigned long freq, quotient, tick; -- cgit v1.2.3 From f0afc6b18d3953fb96f836e4d1483eb9855a36b0 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 24 Jul 2018 13:53:04 +0200 Subject: sparc: move MSI related definitions to where they are used The definitions in arch/sparc/include/asm/msi.h are only used in arch/sparc/mm/srmmu.c, so it makes sense to have them in the C file directly. In addition, having a custom arch/sparc/include/asm/msi.h prevents from using the asm-generic version of this header, which is necessary to be able to include when CONFIG_GENERIC_MSI_IRQ_DOMAIN is enabled. Signed-off-by: Thomas Petazzoni Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/include/asm/msi.h | 32 -------------------------------- arch/sparc/mm/srmmu.c | 20 +++++++++++++++++++- 2 files changed, 19 insertions(+), 33 deletions(-) delete mode 100644 arch/sparc/include/asm/msi.h (limited to 'arch') diff --git a/arch/sparc/include/asm/msi.h b/arch/sparc/include/asm/msi.h deleted file mode 100644 index 3c17c1074431..000000000000 --- a/arch/sparc/include/asm/msi.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * msi.h: Defines specific to the MBus - Sbus - Interface. - * - * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) - * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) - */ - -#ifndef _SPARC_MSI_H -#define _SPARC_MSI_H - -/* - * Locations of MSI Registers. - */ -#define MSI_MBUS_ARBEN 0xe0001008 /* MBus Arbiter Enable register */ - -/* - * Useful bits in the MSI Registers. - */ -#define MSI_ASYNC_MODE 0x80000000 /* Operate the MSI asynchronously */ - - -static inline void msi_set_sync(void) -{ - __asm__ __volatile__ ("lda [%0] %1, %%g3\n\t" - "andn %%g3, %2, %%g3\n\t" - "sta %%g3, [%0] %1\n\t" : : - "r" (MSI_MBUS_ARBEN), - "i" (ASI_M_CTL), "r" (MSI_ASYNC_MODE) : "g3"); -} - -#endif /* !(_SPARC_MSI_H) */ diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index 1d70c3f6d986..be9cb0065179 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include @@ -116,6 +115,25 @@ static inline void srmmu_ctxd_set(ctxd_t *ctxp, pgd_t *pgdp) set_pte((pte_t *)ctxp, pte); } +/* + * Locations of MSI Registers. + */ +#define MSI_MBUS_ARBEN 0xe0001008 /* MBus Arbiter Enable register */ + +/* + * Useful bits in the MSI Registers. + */ +#define MSI_ASYNC_MODE 0x80000000 /* Operate the MSI asynchronously */ + +static void msi_set_sync(void) +{ + __asm__ __volatile__ ("lda [%0] %1, %%g3\n\t" + "andn %%g3, %2, %%g3\n\t" + "sta %%g3, [%0] %1\n\t" : : + "r" (MSI_MBUS_ARBEN), + "i" (ASI_M_CTL), "r" (MSI_ASYNC_MODE) : "g3"); +} + void pmd_set(pmd_t *pmdp, pte_t *ptep) { unsigned long ptp; /* Physical address, shifted right by 4 */ -- cgit v1.2.3 From 12be1036c536f849ad6f9bba73cffa708aa965c3 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 24 Jul 2018 13:53:05 +0200 Subject: sparc: use asm-generic version of msi.h This is necessary to be able to include when CONFIG_GENERIC_MSI_IRQ_DOMAIN is enabled. Without this, a build with CONFIG_GENERIC_MSI_IRQ_DOMAIN fails with: In file included from drivers//ata/ahci.c:45:0: >> include/linux/msi.h:226:10: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? msi_alloc_info_t *arg); ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:230:9: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? msi_alloc_info_t *arg); ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:239:12: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? msi_alloc_info_t *arg); ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:240:22: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? void (*msi_finish)(msi_alloc_info_t *arg, int retval); ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:241:20: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? void (*set_desc)(msi_alloc_info_t *arg, ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:316:18: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? int nvec, msi_alloc_info_t *args); ^~~~~~~~~~~~~~~~ sg_alloc_fn include/linux/msi.h:318:29: error: unknown type name 'msi_alloc_info_t'; did you mean 'sg_alloc_fn'? int virq, int nvec, msi_alloc_info_t *args); ^~~~~~~~~~~~~~~~ sg_alloc_fn Signed-off-by: Thomas Petazzoni Signed-off-by: David S. Miller --- arch/sparc/include/asm/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index ac67828da201..410b263ef5c8 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -13,6 +13,7 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += module.h +generic-y += msi.h generic-y += preempt.h generic-y += rwsem.h generic-y += serial.h -- cgit v1.2.3 From 156c8b58ef5cfd97245928c95669fd4cb0f9c388 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 30 Jul 2018 08:28:08 -0400 Subject: perf/x86/intel/uncore: Fix hardcoded index of Broadwell extra PCI devices Masayoshi Mizuma reported that a warning message is shown while a CPU is hot-removed on Broadwell servers: WARNING: CPU: 126 PID: 6 at arch/x86/events/intel/uncore.c:988 uncore_pci_remove+0x10b/0x150 Call Trace: pci_device_remove+0x42/0xd0 device_release_driver_internal+0x148/0x220 pci_stop_bus_device+0x76/0xa0 pci_stop_root_bus+0x44/0x60 acpi_pci_root_remove+0x1f/0x80 acpi_bus_trim+0x57/0x90 acpi_bus_trim+0x2e/0x90 acpi_device_hotplug+0x2bc/0x4b0 acpi_hotplug_work_fn+0x1a/0x30 process_one_work+0x174/0x3a0 worker_thread+0x4c/0x3d0 kthread+0xf8/0x130 This bug was introduced by: commit 15a3e845b01c ("perf/x86/intel/uncore: Fix SBOX support for Broadwell CPUs") The index of "QPI Port 2 filter" was hardcode to 2, but this conflicts with the index of "PCU.3" which is "HSWEP_PCI_PCU_3", which equals to 2 as well. To fix the conflict, the hardcoded index needs to be cleaned up: - introduce a new enumerator "BDX_PCI_QPI_PORT2_FILTER" for "QPI Port 2 filter" on Broadwell, - increase UNCORE_EXTRA_PCI_DEV_MAX by one, - clean up the hardcoded index. Debugged-by: Masayoshi Mizuma Suggested-by: Ingo Molnar Reported-by: Masayoshi Mizuma Tested-by: Masayoshi Mizuma Signed-off-by: Kan Liang Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: msys.mizuma@gmail.com Cc: stable@vger.kernel.org Fixes: 15a3e845b01c ("perf/x86/intel/uncore: Fix SBOX support for Broadwell CPUs") Link: http://lkml.kernel.org/r/1532953688-15008-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.h | 2 +- arch/x86/events/intel/uncore_snbep.c | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index c9e1e0bef3c3..e17ab885b1e9 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -28,7 +28,7 @@ #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) #define UNCORE_PCI_DEV_IDX(data) (data & 0xff) #define UNCORE_EXTRA_PCI_DEV 0xff -#define UNCORE_EXTRA_PCI_DEV_MAX 3 +#define UNCORE_EXTRA_PCI_DEV_MAX 4 #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 87dc0263a2e1..51d7c117e3c7 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1029,6 +1029,7 @@ void snbep_uncore_cpu_init(void) enum { SNBEP_PCI_QPI_PORT0_FILTER, SNBEP_PCI_QPI_PORT1_FILTER, + BDX_PCI_QPI_PORT2_FILTER, HSWEP_PCI_PCU_3, }; @@ -3286,15 +3287,18 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = { }, { /* QPI Port 0 filter */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f86), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 0), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT0_FILTER), }, { /* QPI Port 1 filter */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f96), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 1), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT1_FILTER), }, { /* QPI Port 2 filter */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + BDX_PCI_QPI_PORT2_FILTER), }, { /* PCU.3 (for Capability registers) */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fc0), -- cgit v1.2.3 From c7513c2a2714204d3588ecaa170ae628fd0d217e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 27 Jul 2018 14:59:15 +0200 Subject: crypto/arm64: aes-ce-gcm - add missing kernel_neon_begin/end pair Calling pmull_gcm_encrypt_block() requires kernel_neon_begin() and kernel_neon_end() to be used since the routine touches the NEON register file. Add the missing calls. Also, since NEON register contents are not preserved outside of a kernel mode NEON region, pass the key schedule array again. Fixes: 7c50136a8aba ("crypto: arm64/aes-ghash - yield NEON after every ...") Acked-by: Herbert Xu Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/crypto/ghash-ce-glue.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 7cf0b1aa6ea8..8a10f1d7199a 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -488,9 +488,13 @@ static int gcm_decrypt(struct aead_request *req) err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - if (walk.nbytes) - pmull_gcm_encrypt_block(iv, iv, NULL, + if (walk.nbytes) { + kernel_neon_begin(); + pmull_gcm_encrypt_block(iv, iv, ctx->aes_key.key_enc, num_rounds(&ctx->aes_key)); + kernel_neon_end(); + } + } else { __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, num_rounds(&ctx->aes_key)); -- cgit v1.2.3 From 44bda4b7d26e9fffed6d7152d98a2e9edaeb2a76 Mon Sep 17 00:00:00 2001 From: Hari Vyas Date: Tue, 3 Jul 2018 14:35:41 +0530 Subject: PCI: Fix is_added/is_busmaster race condition When a PCI device is detected, pdev->is_added is set to 1 and proc and sysfs entries are created. When the device is removed, pdev->is_added is checked for one and then device is detached with clearing of proc and sys entries and at end, pdev->is_added is set to 0. is_added and is_busmaster are bit fields in pci_dev structure sharing same memory location. A strange issue was observed with multiple removal and rescan of a PCIe NVMe device using sysfs commands where is_added flag was observed as zero instead of one while removing device and proc,sys entries are not cleared. This causes issue in later device addition with warning message "proc_dir_entry" already registered. Debugging revealed a race condition between the PCI core setting the is_added bit in pci_bus_add_device() and the NVMe driver reset work-queue setting the is_busmaster bit in pci_set_master(). As these fields are not handled atomically, that clears the is_added bit. Move the is_added bit to a separate private flag variable and use atomic functions to set and retrieve the device addition state. This avoids the race because is_added no longer shares a memory location with is_busmaster. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200283 Signed-off-by: Hari Vyas Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Acked-by: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 4 +++- arch/powerpc/platforms/powernv/pci-ioda.c | 3 ++- arch/powerpc/platforms/pseries/setup.c | 3 ++- drivers/pci/bus.c | 6 +++--- drivers/pci/hotplug/acpiphp_glue.c | 2 +- drivers/pci/pci.h | 11 +++++++++++ drivers/pci/probe.c | 4 ++-- drivers/pci/remove.c | 5 +++-- include/linux/pci.h | 1 - 9 files changed, 27 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index fe9733ffffaa..471aac313b89 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -42,6 +42,8 @@ #include #include +#include "../../../drivers/pci/pci.h" + /* hose_spinlock protects accesses to the the phb_bitmap. */ static DEFINE_SPINLOCK(hose_spinlock); LIST_HEAD(hose_list); @@ -1014,7 +1016,7 @@ void pcibios_setup_bus_devices(struct pci_bus *bus) /* Cardbus can call us to add new devices to a bus, so ignore * those who are already fully discovered */ - if (dev->is_added) + if (pci_dev_is_added(dev)) continue; pcibios_setup_device(dev); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 5bd0eb6681bc..70b2e1e0f23c 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -46,6 +46,7 @@ #include "powernv.h" #include "pci.h" +#include "../../../../drivers/pci/pci.h" #define PNV_IODA1_M64_NUM 16 /* Number of M64 BARs */ #define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */ @@ -3138,7 +3139,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) struct pci_dn *pdn; int mul, total_vfs; - if (!pdev->is_physfn || pdev->is_added) + if (!pdev->is_physfn || pci_dev_is_added(pdev)) return; pdn = pci_get_pdn(pdev); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 139f0af6c3d9..8a4868a3964b 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -71,6 +71,7 @@ #include #include "pseries.h" +#include "../../../../drivers/pci/pci.h" int CMO_PrPSP = -1; int CMO_SecPSP = -1; @@ -664,7 +665,7 @@ static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev) const int *indexes; struct device_node *dn = pci_device_to_OF_node(pdev); - if (!pdev->is_physfn || pdev->is_added) + if (!pdev->is_physfn || pci_dev_is_added(pdev)) return; /*Firmware must support open sriov otherwise dont configure*/ indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 35b7fc87eac5..5cb40b2518f9 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -330,7 +330,7 @@ void pci_bus_add_device(struct pci_dev *dev) return; } - dev->is_added = 1; + pci_dev_assign_added(dev, true); } EXPORT_SYMBOL_GPL(pci_bus_add_device); @@ -347,14 +347,14 @@ void pci_bus_add_devices(const struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { /* Skip already-added devices */ - if (dev->is_added) + if (pci_dev_is_added(dev)) continue; pci_bus_add_device(dev); } list_for_each_entry(dev, &bus->devices, bus_list) { /* Skip if device attach failed */ - if (!dev->is_added) + if (!pci_dev_is_added(dev)) continue; child = dev->subordinate; if (child) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 3a17b290df5d..ef0b1b6ba86f 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -509,7 +509,7 @@ static void enable_slot(struct acpiphp_slot *slot) list_for_each_entry(dev, &bus->devices, bus_list) { /* Assume that newly added devices are powered on already. */ - if (!dev->is_added) + if (!pci_dev_is_added(dev)) dev->current_state = PCI_D0; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 882f1f9596df..08817253c8a2 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -288,6 +288,7 @@ struct pci_sriov { /* pci_dev priv_flags */ #define PCI_DEV_DISCONNECTED 0 +#define PCI_DEV_ADDED 1 static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) { @@ -300,6 +301,16 @@ static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) return test_bit(PCI_DEV_DISCONNECTED, &dev->priv_flags); } +static inline void pci_dev_assign_added(struct pci_dev *dev, bool added) +{ + assign_bit(PCI_DEV_ADDED, &dev->priv_flags, added); +} + +static inline bool pci_dev_is_added(const struct pci_dev *dev) +{ + return test_bit(PCI_DEV_ADDED, &dev->priv_flags); +} + #ifdef CONFIG_PCI_ATS void pci_restore_ats_state(struct pci_dev *dev); #else diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index ac876e32de4b..611adcd9c169 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2433,13 +2433,13 @@ int pci_scan_slot(struct pci_bus *bus, int devfn) dev = pci_scan_single_device(bus, devfn); if (!dev) return 0; - if (!dev->is_added) + if (!pci_dev_is_added(dev)) nr++; for (fn = next_fn(bus, dev, 0); fn > 0; fn = next_fn(bus, dev, fn)) { dev = pci_scan_single_device(bus, devfn + fn); if (dev) { - if (!dev->is_added) + if (!pci_dev_is_added(dev)) nr++; dev->multifunction = 1; } diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 6f072eae4f7a..5e3d0dced2b8 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -19,11 +19,12 @@ static void pci_stop_dev(struct pci_dev *dev) { pci_pme_active(dev, false); - if (dev->is_added) { + if (pci_dev_is_added(dev)) { device_release_driver(&dev->dev); pci_proc_detach_device(dev); pci_remove_sysfs_dev_files(dev); - dev->is_added = 0; + + pci_dev_assign_added(dev, false); } if (dev->bus->self) diff --git a/include/linux/pci.h b/include/linux/pci.h index abd5d5e17aee..c133ccfa002e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -368,7 +368,6 @@ struct pci_dev { unsigned int transparent:1; /* Subtractive decode bridge */ unsigned int multifunction:1; /* Multi-function device */ - unsigned int is_added:1; unsigned int is_busmaster:1; /* Is busmaster */ unsigned int no_msi:1; /* May not use MSI */ unsigned int no_64bit_msi:1; /* May only use 32-bit MSIs */ -- cgit v1.2.3 From cca19f0b684f4ed6aabf6ad07ae3e15e77bfd78a Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Tue, 31 Jul 2018 15:24:52 +0200 Subject: powerpc/64s/radix: Fix missing global invalidations when removing copro With the optimizations for TLB invalidation from commit 0cef77c7798a ("powerpc/64s/radix: flush remote CPUs out of single-threaded mm_cpumask"), the scope of a TLBI (global vs. local) can now be influenced by the value of the 'copros' counter of the memory context. When calling mm_context_remove_copro(), the 'copros' counter is decremented first before flushing. It may have the unintended side effect of sending local TLBIs when we explicitly need global invalidations in this case. Thus breaking any nMMU user in a bad and unpredictable way. Fix it by flushing first, before updating the 'copros' counter, so that invalidations will be global. Fixes: 0cef77c7798a ("powerpc/64s/radix: flush remote CPUs out of single-threaded mm_cpumask") Signed-off-by: Frederic Barrat Reviewed-by: Nicholas Piggin Tested-by: Vaibhav Jain Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 79d570cbf332..b2f89b621b15 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -143,24 +143,33 @@ static inline void mm_context_remove_copro(struct mm_struct *mm) { int c; - c = atomic_dec_if_positive(&mm->context.copros); - - /* Detect imbalance between add and remove */ - WARN_ON(c < 0); - /* - * Need to broadcast a global flush of the full mm before - * decrementing active_cpus count, as the next TLBI may be - * local and the nMMU and/or PSL need to be cleaned up. - * Should be rare enough so that it's acceptable. + * When removing the last copro, we need to broadcast a global + * flush of the full mm, as the next TLBI may be local and the + * nMMU and/or PSL need to be cleaned up. + * + * Both the 'copros' and 'active_cpus' counts are looked at in + * flush_all_mm() to determine the scope (local/global) of the + * TLBIs, so we need to flush first before decrementing + * 'copros'. If this API is used by several callers for the + * same context, it can lead to over-flushing. It's hopefully + * not common enough to be a problem. * * Skip on hash, as we don't know how to do the proper flush * for the time being. Invalidations will remain global if - * used on hash. + * used on hash. Note that we can't drop 'copros' either, as + * it could make some invalidations local with no flush + * in-between. */ - if (c == 0 && radix_enabled()) { + if (radix_enabled()) { flush_all_mm(mm); - dec_mm_active_cpus(mm); + + c = atomic_dec_if_positive(&mm->context.copros); + /* Detect imbalance between add and remove */ + WARN_ON(c < 0); + + if (c == 0) + dec_mm_active_cpus(mm); } } #else -- cgit v1.2.3 From ebad825cdd4e6b327eaf0dd72439408957049cea Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 1 Aug 2018 09:57:50 -0700 Subject: ia64: mark special ia64 memory areas anonymous Commit bfd40eaff5ab ("mm: fix vma_is_anonymous() false-positives") made newly allocated vma's have a dummy vm_ops field so that they wouldn't be mistaken for anonymous mappings, and if you wanted an anonymous vma you had to explicitly say so by calling "vma_set_anonymous()" on it. However, it missed the two special vmas that ia64 processes have: the register backing store and the NaT page. So they wouldn't actually act like anonymous ranges, and page faults on them caused a SIGBUS rather than the creation of a new anon page in them. That obviously will make any ia64 binary very unhappy indeed, and the boot fails early. Fixes: bfd40eaff5ab ("mm: fix vma_is_anonymous() false-positives") Reported-by: Tony Luck Cc: Kirill Shutemov Cc: Andrew Morton Cc: Dmitry Vyukov Cc: Oleg Nesterov Cc: Andrea Arcangeli Cc: John Stultz Signed-off-by: Linus Torvalds --- arch/ia64/mm/init.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index e6c6dfd98de2..3b85c3ecac38 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -116,6 +116,7 @@ ia64_init_addr_space (void) */ vma = vm_area_alloc(current->mm); if (vma) { + vma_set_anonymous(vma); vma->vm_start = current->thread.rbs_bot & PAGE_MASK; vma->vm_end = vma->vm_start + PAGE_SIZE; vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT; @@ -133,6 +134,7 @@ ia64_init_addr_space (void) if (!(current->personality & MMAP_PAGE_ZERO)) { vma = vm_area_alloc(current->mm); if (vma) { + vma_set_anonymous(vma); vma->vm_end = PAGE_SIZE; vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | -- cgit v1.2.3 From 8b11ec1b5ffb54f71cb5a5e5c8c4d36e5d113085 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 1 Aug 2018 13:43:38 -0700 Subject: mm: do not initialize TLB stack vma's with vma_init() Commit 2c4541e24c55 ("mm: use vma_init() to initialize VMAs on stack and data segments") tried to initialize various left-over ad-hoc vma's "properly", but actually made things worse for the temporary vma's used for TLB flushing. vma_init() doesn't actually initialize all of the vma, just a few fields, so doing something like - struct vm_area_struct vma = { .vm_mm = tlb->mm, }; + struct vm_area_struct vma; + + vma_init(&vma, tlb->mm); was actually very bad: instead of having a nicely initialized vma with every field but "vm_mm" zeroed, you'd have an entirely uninitialized vma with only a couple of fields initialized. And they weren't even fields that the code in question mostly cared about. The flush_tlb_range() function takes a "struct vma" rather than a "struct mm_struct", because a few architectures actually care about what kind of range it is - being able to only do an ITLB flush if it's a range that doesn't have data accesses enabled, for example. And all the normal users already have the vma for doing the range invalidation. But a few people want to call flush_tlb_range() with a range they just made up, so they also end up using a made-up vma. x86 just has a special "flush_tlb_mm_range()" function for this, but other architectures (arm and ia64) do the "use fake vma" thing instead, and thus got caught up in the vma_init() changes. At the same time, the TLB flushing code really doesn't care about most other fields in the vma, so vma_init() is just unnecessary and pointless. This fixes things by having an explicit "this is just an initializer for the TLB flush" initializer macro, which is used by the arm/arm64/ia64 people who mis-use this interface with just a dummy vma. Fixes: 2c4541e24c55 ("mm: use vma_init() to initialize VMAs on stack and data segments") Cc: Dmitry Vyukov Cc: Oleg Nesterov Cc: Andrea Arcangeli Cc: Kirill Shutemov Cc: Andrew Morton Cc: John Stultz Cc: Hugh Dickins Signed-off-by: Linus Torvalds --- arch/arm/mach-rpc/ecard.c | 5 +---- arch/arm64/include/asm/tlb.h | 4 +--- arch/arm64/mm/hugetlbpage.c | 10 ++++------ arch/ia64/include/asm/tlb.h | 7 +++---- include/linux/mm.h | 3 +++ 5 files changed, 12 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c index 8db62cc54a6a..04b2f22c2739 100644 --- a/arch/arm/mach-rpc/ecard.c +++ b/arch/arm/mach-rpc/ecard.c @@ -212,7 +212,7 @@ static DEFINE_MUTEX(ecard_mutex); */ static void ecard_init_pgtables(struct mm_struct *mm) { - struct vm_area_struct vma; + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, VM_EXEC); /* We want to set up the page tables for the following mapping: * Virtual Physical @@ -237,9 +237,6 @@ static void ecard_init_pgtables(struct mm_struct *mm) memcpy(dst_pgd, src_pgd, sizeof(pgd_t) * (EASI_SIZE / PGDIR_SIZE)); - vma_init(&vma, mm); - vma.vm_flags = VM_EXEC; - flush_tlb_range(&vma, IO_START, IO_START + IO_SIZE); flush_tlb_range(&vma, EASI_START, EASI_START + EASI_SIZE); } diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index d87f2d646caa..0ad1cf233470 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -37,9 +37,7 @@ static inline void __tlb_remove_table(void *_table) static inline void tlb_flush(struct mmu_gather *tlb) { - struct vm_area_struct vma; - - vma_init(&vma, tlb->mm); + struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0); /* * The ASID allocator will either invalidate the ASID or mark diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 1854e49aa18a..192b3ba07075 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -108,13 +108,10 @@ static pte_t get_clear_flush(struct mm_struct *mm, unsigned long pgsize, unsigned long ncontig) { - struct vm_area_struct vma; pte_t orig_pte = huge_ptep_get(ptep); bool valid = pte_valid(orig_pte); unsigned long i, saddr = addr; - vma_init(&vma, mm); - for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { pte_t pte = ptep_get_and_clear(mm, addr, ptep); @@ -127,8 +124,10 @@ static pte_t get_clear_flush(struct mm_struct *mm, orig_pte = pte_mkdirty(orig_pte); } - if (valid) + if (valid) { + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); flush_tlb_range(&vma, saddr, addr); + } return orig_pte; } @@ -147,10 +146,9 @@ static void clear_flush(struct mm_struct *mm, unsigned long pgsize, unsigned long ncontig) { - struct vm_area_struct vma; + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); unsigned long i, saddr = addr; - vma_init(&vma, mm); for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) pte_clear(mm, addr, ptep); diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index db89e7306081..516355a774bf 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -115,12 +115,11 @@ ia64_tlb_flush_mmu_tlbonly(struct mmu_gather *tlb, unsigned long start, unsigned flush_tlb_all(); } else { /* - * XXX fix me: flush_tlb_range() should take an mm pointer instead of a - * vma pointer. + * flush_tlb_range() takes a vma instead of a mm pointer because + * some architectures want the vm_flags for ITLB/DTLB flush. */ - struct vm_area_struct vma; + struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0); - vma_init(&vma, tlb->mm); /* flush the address range from the tlb: */ flush_tlb_range(&vma, start, end); /* now flush the virt. page-table area mapping the address range: */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 7ba6d356d18f..68a5121694ef 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -466,6 +466,9 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma) vma->vm_ops = NULL; } +/* flush_tlb_range() takes a vma, not a mm, and can care about flags */ +#define TLB_FLUSH_VMA(mm,flags) { .vm_mm = (mm), .vm_flags = (flags) } + struct mmu_gather; struct inode; -- cgit v1.2.3 From 1b3a62643660020cdc68e6139a010c06e8fc96c7 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 1 Aug 2018 16:32:25 +0300 Subject: x86/boot/compressed/64: Validate trampoline placement against E820 There were two report of boot failure cased by trampoline placed into a reserved memory region. It can happen on machines that don't report EBDA correctly. Fix the problem by re-validating the found address against the E820 table. If the address is in a reserved area, find the next usable region below the initial address. Fixes: 3548e131ec6a ("x86/boot/compressed/64: Find a place for 32-bit trampoline") Reported-by: Dmitry Malkin Reported-by: youling 257 Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180801133225.38121-1-kirill.shutemov@linux.intel.com --- arch/x86/boot/compressed/pgtable_64.c | 73 ++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 8c5107545251..9e2157371491 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -1,3 +1,4 @@ +#include #include #include "pgtable.h" #include "../string.h" @@ -34,10 +35,62 @@ unsigned long *trampoline_32bit __section(.data); extern struct boot_params *boot_params; int cmdline_find_option_bool(const char *option); +static unsigned long find_trampoline_placement(void) +{ + unsigned long bios_start, ebda_start; + unsigned long trampoline_start; + struct boot_e820_entry *entry; + int i; + + /* + * Find a suitable spot for the trampoline. + * This code is based on reserve_bios_regions(). + */ + + ebda_start = *(unsigned short *)0x40e << 4; + bios_start = *(unsigned short *)0x413 << 10; + + if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX) + bios_start = BIOS_START_MAX; + + if (ebda_start > BIOS_START_MIN && ebda_start < bios_start) + bios_start = ebda_start; + + bios_start = round_down(bios_start, PAGE_SIZE); + + /* Find the first usable memory region under bios_start. */ + for (i = boot_params->e820_entries - 1; i >= 0; i--) { + entry = &boot_params->e820_table[i]; + + /* Skip all entries above bios_start. */ + if (bios_start <= entry->addr) + continue; + + /* Skip non-RAM entries. */ + if (entry->type != E820_TYPE_RAM) + continue; + + /* Adjust bios_start to the end of the entry if needed. */ + if (bios_start > entry->addr + entry->size) + bios_start = entry->addr + entry->size; + + /* Keep bios_start page-aligned. */ + bios_start = round_down(bios_start, PAGE_SIZE); + + /* Skip the entry if it's too small. */ + if (bios_start - TRAMPOLINE_32BIT_SIZE < entry->addr) + continue; + + break; + } + + /* Place the trampoline just below the end of low memory */ + return bios_start - TRAMPOLINE_32BIT_SIZE; +} + struct paging_config paging_prepare(void *rmode) { struct paging_config paging_config = {}; - unsigned long bios_start, ebda_start; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ boot_params = rmode; @@ -61,23 +114,7 @@ struct paging_config paging_prepare(void *rmode) paging_config.l5_required = 1; } - /* - * Find a suitable spot for the trampoline. - * This code is based on reserve_bios_regions(). - */ - - ebda_start = *(unsigned short *)0x40e << 4; - bios_start = *(unsigned short *)0x413 << 10; - - if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX) - bios_start = BIOS_START_MAX; - - if (ebda_start > BIOS_START_MIN && ebda_start < bios_start) - bios_start = ebda_start; - - /* Place the trampoline just below the end of low memory, aligned to 4k */ - paging_config.trampoline_start = bios_start - TRAMPOLINE_32BIT_SIZE; - paging_config.trampoline_start = round_down(paging_config.trampoline_start, PAGE_SIZE); + paging_config.trampoline_start = find_trampoline_placement(); trampoline_32bit = (unsigned long *)paging_config.trampoline_start; -- cgit v1.2.3