From 032014bc04ae97fd0474f311259db7d669e47507 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 12 Dec 2014 10:44:20 +0100 Subject: s390/vtime: Get rid of redundant WARN_ON in the cpu time accounting function vtime_account_irq_enter (vtime_account_system) we use a WARN_ON_ONCE(!irqs_disabled()). This is redundant as the function virt_timer_forward is always called and has a BUG_ON(!irqs_disabled()). This saves several nanoseconds in my specific testcase (KVM entry/exit) and probably all other callers like (soft)irq entry/exit. Signed-off-by: Christian Borntraeger Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/vtime.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 7f0089d9a4aa..e34122e539a1 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -128,8 +128,6 @@ void vtime_account_irq_enter(struct task_struct *tsk) struct thread_info *ti = task_thread_info(tsk); u64 timer, system; - WARN_ON_ONCE(!irqs_disabled()); - timer = S390_lowcore.last_update_timer; S390_lowcore.last_update_timer = get_vtimer(); S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; -- cgit v1.2.3 From 81fc77fbfc1553aee35e33af89959be1d8c81db2 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 16 Dec 2014 10:25:37 +0100 Subject: s390/kernel: use stnsm 255 instead of stosm 0 On some models, stnsm 255 might be slightly faster than stosm 0. Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/irqflags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h index 37b9091ab8c0..16aa0c779e07 100644 --- a/arch/s390/include/asm/irqflags.h +++ b/arch/s390/include/asm/irqflags.h @@ -36,7 +36,7 @@ static inline notrace void __arch_local_irq_ssm(unsigned long flags) static inline notrace unsigned long arch_local_save_flags(void) { - return __arch_local_irq_stosm(0x00); + return __arch_local_irq_stnsm(0xff); } static inline notrace unsigned long arch_local_irq_save(void) -- cgit v1.2.3 From 98590460d44f4dc2020e594c5ac3357c8278f45b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 18 Dec 2014 10:04:21 +0100 Subject: s390: wire up execveat syscall Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/uapi/asm/unistd.h | 3 ++- arch/s390/kernel/syscalls.S | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 2b446cf0cc65..67878af257a0 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -289,7 +289,8 @@ #define __NR_bpf 351 #define __NR_s390_pci_mmio_write 352 #define __NR_s390_pci_mmio_read 353 -#define NR_syscalls 354 +#define __NR_execveat 354 +#define NR_syscalls 355 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index a2987243bc76..939ec474b1dd 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -362,3 +362,4 @@ SYSCALL(sys_memfd_create,sys_memfd_create,compat_sys_memfd_create) /* 350 */ SYSCALL(sys_bpf,sys_bpf,compat_sys_bpf) SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_write,compat_sys_s390_pci_mmio_write) SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_read,compat_sys_s390_pci_mmio_read) +SYSCALL(sys_execveat,sys_execveat,compat_sys_execveat) -- cgit v1.2.3 From 394f56fe480140877304d342dec46d50dc823d46 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 19 Dec 2014 16:04:11 -0800 Subject: x86_64, vdso: Fix the vdso address randomization algorithm The theory behind vdso randomization is that it's mapped at a random offset above the top of the stack. To avoid wasting a page of memory for an extra page table, the vdso isn't supposed to extend past the lowest PMD into which it can fit. Other than that, the address should be a uniformly distributed address that meets all of the alignment requirements. The current algorithm is buggy: the vdso has about a 50% probability of being at the very end of a PMD. The current algorithm also has a decent chance of failing outright due to incorrect handling of the case where the top of the stack is near the top of its PMD. This fixes the implementation. The paxtest estimate of vdso "randomisation" improves from 11 bits to 18 bits. (Disclaimer: I don't know what the paxtest code is actually calculating.) It's worth noting that this algorithm is inherently biased: the vdso is more likely to end up near the end of its PMD than near the beginning. Ideally we would either nix the PMD sharing requirement or jointly randomize the vdso and the stack to reduce the bias. In the mean time, this is a considerable improvement with basically no risk of compatibility issues, since the allowed outputs of the algorithm are unchanged. As an easy test, doing this: for i in `seq 10000` do grep -P vdso /proc/self/maps |cut -d- -f1 done |sort |uniq -d used to produce lots of output (1445 lines on my most recent run). A tiny subset looks like this: 7fffdfffe000 7fffe01fe000 7fffe05fe000 7fffe07fe000 7fffe09fe000 7fffe0bfe000 7fffe0dfe000 Note the suspicious fe000 endings. With the fix, I get a much more palatable 76 repeated addresses. Reviewed-by: Kees Cook Cc: stable@vger.kernel.org Signed-off-by: Andy Lutomirski --- arch/x86/vdso/vma.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 009495b9ab4b..1c9f750c3859 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -41,12 +41,17 @@ void __init init_vdso_image(const struct vdso_image *image) struct linux_binprm; -/* Put the vdso above the (randomized) stack with another randomized offset. - This way there is no hole in the middle of address space. - To save memory make sure it is still in the same PTE as the stack top. - This doesn't give that many random bits. - - Only used for the 64-bit and x32 vdsos. */ +/* + * Put the vdso above the (randomized) stack with another randomized + * offset. This way there is no hole in the middle of address space. + * To save memory make sure it is still in the same PTE as the stack + * top. This doesn't give that many random bits. + * + * Note that this algorithm is imperfect: the distribution of the vdso + * start address within a PMD is biased toward the end. + * + * Only used for the 64-bit and x32 vdsos. + */ static unsigned long vdso_addr(unsigned long start, unsigned len) { #ifdef CONFIG_X86_32 @@ -54,22 +59,30 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) #else unsigned long addr, end; unsigned offset; - end = (start + PMD_SIZE - 1) & PMD_MASK; + + /* + * Round up the start address. It can start out unaligned as a result + * of stack start randomization. + */ + start = PAGE_ALIGN(start); + + /* Round the lowest possible end address up to a PMD boundary. */ + end = (start + len + PMD_SIZE - 1) & PMD_MASK; if (end >= TASK_SIZE_MAX) end = TASK_SIZE_MAX; end -= len; - /* This loses some more bits than a modulo, but is cheaper */ - offset = get_random_int() & (PTRS_PER_PTE - 1); - addr = start + (offset << PAGE_SHIFT); - if (addr >= end) - addr = end; + + if (end > start) { + offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); + addr = start + (offset << PAGE_SHIFT); + } else { + addr = start; + } /* - * page-align it here so that get_unmapped_area doesn't - * align it wrongfully again to the next page. addr can come in 4K - * unaligned here as a result of stack start randomization. + * Forcibly align the final address in case we have a hardware + * issue that requires alignment for performance reasons. */ - addr = PAGE_ALIGN(addr); addr = align_vdso_addr(addr); return addr; -- cgit v1.2.3 From 8b8cd8a3673dd766428aee8222fa8fd3fdd26d58 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 22 Dec 2014 13:33:10 -0500 Subject: x86/xen: Remove unnecessary BUG_ON(preemptible()) in xen_setup_timer() There is no reason for having it and, with commit 250a1ac685f1 ("x86, smpboot: Remove pointless preempt_disable() in native_smp_prepare_cpus()"), it prevents HVM guests from booting. Signed-off-by: Boris Ostrovsky Signed-off-by: David Vrabel --- arch/x86/xen/time.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index f473d268d387..23019b483908 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -458,8 +458,6 @@ void xen_setup_timer(int cpu) void xen_setup_cpu_clockevents(void) { - BUG_ON(preemptible()); - clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt)); } -- cgit v1.2.3 From 132978b94e66f8ad7d20790f8332f0e9c1426029 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 19 Dec 2014 16:10:54 +0000 Subject: x86: Fix step size adjustment during initial memory mapping The old scheme can lead to failure in certain cases - the problem is that after bumping step_size the next (non-final) iteration is only guaranteed to make available a memory block the size of what step_size was before. E.g. for a memory block [0,3004600000) we'd have: iter start end step amount 1 3004400000 30045fffff 2M 2M 2 3004000000 30043fffff 64M 4M 3 3000000000 3003ffffff 2G 64M 4 2000000000 2fffffffff 64G 64G Yet to map 64G with 4k pages (as happens e.g. under PV Xen) we need slightly over 128M, but the first three iterations made only about 70M available. The condition (new_mapped_ram_size > mapped_ram_size) for bumping step_size is just not suitable. Instead we want to bump it when we know we have enough memory available to cover a block of the new step_size. And rather than making that condition more complicated than needed, simply adjust step_size by the largest possible factor we know we can cover at that point - which is shifting it left by one less than the difference between page table level shifts. (Interestingly the original STEP_SIZE_SHIFT definition had a comment hinting at that having been the intention, just that it should have been PUD_SHIFT-PMD_SHIFT-1 instead of (PUD_SHIFT-PMD_SHIFT)/2, and of course for non-PAE 32-bit we can't really use these two constants as they're equal there.) Furthermore the comment in get_new_step_size() didn't get updated when the bottom-down mapping logic got added. Yet while an overflow (flushing step_size to zero) of the shift doesn't matter for the top-down method, it does for bottom-up because round_up(x, 0) = 0, and an upper range boundary of zero can't really work well. Signed-off-by: Jan Beulich Acked-by: Yinghai Lu Link: http://lkml.kernel.org/r/54945C1E020000780005114E@mail.emea.novell.com Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index a97ee0801475..08a7d313538a 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -438,20 +438,20 @@ static unsigned long __init init_range_memory_mapping( static unsigned long __init get_new_step_size(unsigned long step_size) { /* - * Explain why we shift by 5 and why we don't have to worry about - * 'step_size << 5' overflowing: - * - * initial mapped size is PMD_SIZE (2M). + * Initial mapped size is PMD_SIZE (2M). * We can not set step_size to be PUD_SIZE (1G) yet. * In worse case, when we cross the 1G boundary, and * PG_LEVEL_2M is not set, we will need 1+1+512 pages (2M + 8k) - * to map 1G range with PTE. Use 5 as shift for now. + * to map 1G range with PTE. Hence we use one less than the + * difference of page table level shifts. * - * Don't need to worry about overflow, on 32bit, when step_size - * is 0, round_down() returns 0 for start, and that turns it - * into 0x100000000ULL. + * Don't need to worry about overflow in the top-down case, on 32bit, + * when step_size is 0, round_down() returns 0 for start, and that + * turns it into 0x100000000ULL. + * In the bottom-up case, round_up(x, 0) returns 0 though too, which + * needs to be taken into consideration by the code below. */ - return step_size << 5; + return step_size << (PMD_SHIFT - PAGE_SHIFT - 1); } /** @@ -471,7 +471,6 @@ static void __init memory_map_top_down(unsigned long map_start, unsigned long step_size; unsigned long addr; unsigned long mapped_ram_size = 0; - unsigned long new_mapped_ram_size; /* xen has big range in reserved near end of ram, skip it at first.*/ addr = memblock_find_in_range(map_start, map_end, PMD_SIZE, PMD_SIZE); @@ -496,14 +495,12 @@ static void __init memory_map_top_down(unsigned long map_start, start = map_start; } else start = map_start; - new_mapped_ram_size = init_range_memory_mapping(start, + mapped_ram_size += init_range_memory_mapping(start, last_start); last_start = start; min_pfn_mapped = last_start >> PAGE_SHIFT; - /* only increase step_size after big range get mapped */ - if (new_mapped_ram_size > mapped_ram_size) + if (mapped_ram_size >= step_size) step_size = get_new_step_size(step_size); - mapped_ram_size += new_mapped_ram_size; } if (real_end < map_end) @@ -524,7 +521,7 @@ static void __init memory_map_top_down(unsigned long map_start, static void __init memory_map_bottom_up(unsigned long map_start, unsigned long map_end) { - unsigned long next, new_mapped_ram_size, start; + unsigned long next, start; unsigned long mapped_ram_size = 0; /* step_size need to be small so pgt_buf from BRK could cover it */ unsigned long step_size = PMD_SIZE; @@ -539,19 +536,19 @@ static void __init memory_map_bottom_up(unsigned long map_start, * for page table. */ while (start < map_end) { - if (map_end - start > step_size) { + if (step_size && map_end - start > step_size) { next = round_up(start + 1, step_size); if (next > map_end) next = map_end; - } else + } else { next = map_end; + } - new_mapped_ram_size = init_range_memory_mapping(start, next); + mapped_ram_size += init_range_memory_mapping(start, next); start = next; - if (new_mapped_ram_size > mapped_ram_size) + if (mapped_ram_size >= step_size) step_size = get_new_step_size(step_size); - mapped_ram_size += new_mapped_ram_size; } } -- cgit v1.2.3 From ea174f4c4f6135e30a4e1e8c4511980338238b16 Mon Sep 17 00:00:00 2001 From: Sylvain BERTRAND Date: Tue, 23 Dec 2014 13:39:12 +0100 Subject: x86: Fix mkcapflags.sh bash-ism Chocked while compiling linux with dash shell instead of bash shell. See: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/test.html Signed-off-by: Sylvain BERTRAND Link: http://lkml.kernel.org/r/20141223123912.GA1386@localhost.localdomain Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mkcapflags.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index e2b22df964cd..36d99a337b49 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh @@ -28,7 +28,7 @@ function dump_array() # If the /* comment */ starts with a quote string, grab that. VALUE="$(echo "$i" | sed -n 's@.*/\* *\("[^"]*"\).*\*/@\1@p')" [ -z "$VALUE" ] && VALUE="\"$NAME\"" - [ "$VALUE" == '""' ] && continue + [ "$VALUE" = '""' ] && continue # Name is uppercase, VALUE is all lowercase VALUE="$(echo "$VALUE" | tr A-Z a-z)" -- cgit v1.2.3 From 280dbc572357eb50184663fc9e4aaf09c8141e9b Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Tue, 23 Dec 2014 12:57:43 +0100 Subject: x86/build: Clean auto-generated processor feature files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9def39be4e96 ("x86: Support compiling out human-friendly processor feature names") made two source file targets conditional. Such conditional targets will not be cleaned automatically by make mrproper. Fix by adding explicit clean-files targets for the two files. Fixes: 9def39be4e96 ("x86: Support compiling out human-friendly processor feature names") Signed-off-by: Bjørn Mork Cc: Josh Triplett Link: http://lkml.kernel.org/r/1419335863-10608-1-git-send-email-bjorn@mork.no Signed-off-by: Ingo Molnar --- arch/x86/boot/Makefile | 1 + arch/x86/kernel/cpu/Makefile | 1 + 2 files changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 5b016e2498f3..3db07f30636f 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -51,6 +51,7 @@ targets += cpustr.h $(obj)/cpustr.h: $(obj)/mkcpustr FORCE $(call if_changed,cpustr) endif +clean-files += cpustr.h # --------------------------------------------------------------------------- diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index e27b49d7c922..80091ae54c2b 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -66,3 +66,4 @@ targets += capflags.c $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE $(call if_changed,mkcapflags) endif +clean-files += capflags.c -- cgit v1.2.3 From 1ddf0b1b11aa8a90cef6706e935fc31c75c406ba Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 21 Dec 2014 08:57:46 -0800 Subject: x86, vdso: Use asm volatile in __getcpu In Linux 3.18 and below, GCC hoists the lsl instructions in the pvclock code all the way to the beginning of __vdso_clock_gettime, slowing the non-paravirt case significantly. For unknown reasons, presumably related to the removal of a branch, the performance issue is gone as of e76b027e6408 x86,vdso: Use LSL unconditionally for vgetcpu but I don't trust GCC enough to expect the problem to stay fixed. There should be no correctness issue, because the __getcpu calls in __vdso_vlock_gettime were never necessary in the first place. Note to stable maintainers: In 3.18 and below, depending on configuration, gcc 4.9.2 generates code like this: 9c3: 44 0f 03 e8 lsl %ax,%r13d 9c7: 45 89 eb mov %r13d,%r11d 9ca: 0f 03 d8 lsl %ax,%ebx This patch won't apply as is to any released kernel, but I'll send a trivial backported version if needed. Fixes: 51c19b4f5927 x86: vdso: pvclock gettime support Cc: stable@vger.kernel.org # 3.8+ Cc: Marcelo Tosatti Acked-by: Paolo Bonzini Signed-off-by: Andy Lutomirski --- arch/x86/include/asm/vgtod.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index e7e9682a33e9..f556c4843aa1 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -80,9 +80,11 @@ static inline unsigned int __getcpu(void) /* * Load per CPU data from GDT. LSL is faster than RDTSCP and - * works on all CPUs. + * works on all CPUs. This is volatile so that it orders + * correctly wrt barrier() and to keep gcc from cleverly + * hoisting it out of the calling function. */ - asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); + asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); return p; } -- cgit v1.2.3 From 0b1e95b2fa0934c3a08db483979c70d3b287f50e Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Tue, 30 Dec 2014 22:50:54 +0100 Subject: crypto: aesni - fix "by8" variant for 128 bit keys The "by8" counter mode optimization is broken for 128 bit keys with input data longer than 128 bytes. It uses the wrong key material for en- and decryption. The key registers xkey0, xkey4, xkey8 and xkey12 need to be preserved in case we're handling more than 128 bytes of input data -- they won't get reloaded after the initial load. They must therefore be (a) loaded on the first iteration and (b) be preserved for the latter ones. The implementation for 128 bit keys does not comply with (a) nor (b). Fix this by bringing the implementation back to its original source and correctly load the key registers and preserve their values by *not* re-using the registers for other purposes. Kudos to James for reporting the issue and providing a test case showing the discrepancies. Reported-by: James Yonan Cc: Chandramouli Narayanan Cc: # v3.18 Signed-off-by: Mathias Krause Signed-off-by: Herbert Xu --- arch/x86/crypto/aes_ctrby8_avx-x86_64.S | 46 +++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S index 2df2a0298f5a..a916c4a61165 100644 --- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S @@ -208,7 +208,7 @@ ddq_add_8: .if (klen == KEY_128) .if (load_keys) - vmovdqa 3*16(p_keys), xkeyA + vmovdqa 3*16(p_keys), xkey4 .endif .else vmovdqa 3*16(p_keys), xkeyA @@ -224,7 +224,7 @@ ddq_add_8: add $(16*by), p_in .if (klen == KEY_128) - vmovdqa 4*16(p_keys), xkey4 + vmovdqa 4*16(p_keys), xkeyB .else .if (load_keys) vmovdqa 4*16(p_keys), xkey4 @@ -234,7 +234,12 @@ ddq_add_8: .set i, 0 .rept by club XDATA, i - vaesenc xkeyA, var_xdata, var_xdata /* key 3 */ + /* key 3 */ + .if (klen == KEY_128) + vaesenc xkey4, var_xdata, var_xdata + .else + vaesenc xkeyA, var_xdata, var_xdata + .endif .set i, (i +1) .endr @@ -243,13 +248,18 @@ ddq_add_8: .set i, 0 .rept by club XDATA, i - vaesenc xkey4, var_xdata, var_xdata /* key 4 */ + /* key 4 */ + .if (klen == KEY_128) + vaesenc xkeyB, var_xdata, var_xdata + .else + vaesenc xkey4, var_xdata, var_xdata + .endif .set i, (i +1) .endr .if (klen == KEY_128) .if (load_keys) - vmovdqa 6*16(p_keys), xkeyB + vmovdqa 6*16(p_keys), xkey8 .endif .else vmovdqa 6*16(p_keys), xkeyB @@ -267,12 +277,17 @@ ddq_add_8: .set i, 0 .rept by club XDATA, i - vaesenc xkeyB, var_xdata, var_xdata /* key 6 */ + /* key 6 */ + .if (klen == KEY_128) + vaesenc xkey8, var_xdata, var_xdata + .else + vaesenc xkeyB, var_xdata, var_xdata + .endif .set i, (i +1) .endr .if (klen == KEY_128) - vmovdqa 8*16(p_keys), xkey8 + vmovdqa 8*16(p_keys), xkeyB .else .if (load_keys) vmovdqa 8*16(p_keys), xkey8 @@ -288,7 +303,7 @@ ddq_add_8: .if (klen == KEY_128) .if (load_keys) - vmovdqa 9*16(p_keys), xkeyA + vmovdqa 9*16(p_keys), xkey12 .endif .else vmovdqa 9*16(p_keys), xkeyA @@ -297,7 +312,12 @@ ddq_add_8: .set i, 0 .rept by club XDATA, i - vaesenc xkey8, var_xdata, var_xdata /* key 8 */ + /* key 8 */ + .if (klen == KEY_128) + vaesenc xkeyB, var_xdata, var_xdata + .else + vaesenc xkey8, var_xdata, var_xdata + .endif .set i, (i +1) .endr @@ -306,7 +326,12 @@ ddq_add_8: .set i, 0 .rept by club XDATA, i - vaesenc xkeyA, var_xdata, var_xdata /* key 9 */ + /* key 9 */ + .if (klen == KEY_128) + vaesenc xkey12, var_xdata, var_xdata + .else + vaesenc xkeyA, var_xdata, var_xdata + .endif .set i, (i +1) .endr @@ -412,7 +437,6 @@ ddq_add_8: /* main body of aes ctr load */ .macro do_aes_ctrmain key_len - cmp $16, num_bytes jb .Ldo_return2\key_len -- cgit v1.2.3 From 0b8c960cf6defc56b3aa1a71b5af95872b6dff2b Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Mon, 29 Dec 2014 16:20:39 -0800 Subject: crypto: sha-mb - Add avx2_supported check. This patch fixes this allyesconfig target build error with older binutils. LD arch/x86/crypto/built-in.o ld: arch/x86/crypto/sha-mb/built-in.o: No such file: No such file or directory Cc: stable@vger.kernel.org # 3.18+ Signed-off-by: Vinson Lee Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index fd0f848938cc..5a4a089e8b1f 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -26,7 +26,6 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o -obj-$(CONFIG_CRYPTO_SHA1_MB) += sha-mb/ obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o @@ -46,6 +45,7 @@ endif ifeq ($(avx2_supported),yes) obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o + obj-$(CONFIG_CRYPTO_SHA1_MB) += sha-mb/ endif aes-i586-y := aes-i586-asm_32.o aes_glue.o -- cgit v1.2.3 From d02dc27db0dc74683efc4a2b36f55f5594451f38 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Sun, 4 Jan 2015 18:55:03 +0800 Subject: ACPI / processor: Rename acpi_(un)map_lsapic() to acpi_(un)map_cpu() acpi_map_lsapic() will allocate a logical CPU number and map it to physical CPU id (such as APIC id) for the hot-added CPU, it will also do some mapping for NUMA node id and etc, acpi_unmap_lsapic() will do the reverse. We can see that the name of the function is a little bit confusing and arch (IA64) dependent so rename them as acpi_(un)map_cpu() to make arch agnostic and explicit. Signed-off-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- arch/ia64/kernel/acpi.c | 9 ++++----- arch/x86/kernel/acpi/boot.c | 9 ++++----- drivers/acpi/acpi_processor.c | 6 +++--- include/linux/acpi.h | 4 ++-- 4 files changed, 13 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 615ef81def49..e795cb848154 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -893,13 +893,13 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) } /* wrapper to silence section mismatch warning */ -int __ref acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) +int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu) { return _acpi_map_lsapic(handle, physid, pcpu); } -EXPORT_SYMBOL(acpi_map_lsapic); +EXPORT_SYMBOL(acpi_map_cpu); -int acpi_unmap_lsapic(int cpu) +int acpi_unmap_cpu(int cpu) { ia64_cpu_to_sapicid[cpu] = -1; set_cpu_present(cpu, false); @@ -910,8 +910,7 @@ int acpi_unmap_lsapic(int cpu) return (0); } - -EXPORT_SYMBOL(acpi_unmap_lsapic); +EXPORT_SYMBOL(acpi_unmap_cpu); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ #ifdef CONFIG_ACPI_NUMA diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4433a4be8171..d1626364a28a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -750,13 +750,13 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) } /* wrapper to silence section mismatch warning */ -int __ref acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) +int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu) { return _acpi_map_lsapic(handle, physid, pcpu); } -EXPORT_SYMBOL(acpi_map_lsapic); +EXPORT_SYMBOL(acpi_map_cpu); -int acpi_unmap_lsapic(int cpu) +int acpi_unmap_cpu(int cpu) { #ifdef CONFIG_ACPI_NUMA set_apicid_to_node(per_cpu(x86_cpu_to_apicid, cpu), NUMA_NO_NODE); @@ -768,8 +768,7 @@ int acpi_unmap_lsapic(int cpu) return (0); } - -EXPORT_SYMBOL(acpi_unmap_lsapic); +EXPORT_SYMBOL(acpi_unmap_cpu); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index f02b29eb0fda..1020b1b53a17 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -180,13 +180,13 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr) cpu_maps_update_begin(); cpu_hotplug_begin(); - ret = acpi_map_lsapic(pr->handle, pr->phys_id, &pr->id); + ret = acpi_map_cpu(pr->handle, pr->phys_id, &pr->id); if (ret) goto out; ret = arch_register_cpu(pr->id); if (ret) { - acpi_unmap_lsapic(pr->id); + acpi_unmap_cpu(pr->id); goto out; } @@ -461,7 +461,7 @@ static void acpi_processor_remove(struct acpi_device *device) /* Remove the CPU. */ arch_unregister_cpu(pr->id); - acpi_unmap_lsapic(pr->id); + acpi_unmap_cpu(pr->id); cpu_hotplug_done(); cpu_maps_update_done(); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 856d381b1d5b..d459cd17b477 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -147,8 +147,8 @@ void acpi_numa_arch_fixup(void); #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ -int acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu); -int acpi_unmap_lsapic(int cpu); +int acpi_map_cpu(acpi_handle handle, int physid, int *pcpu); +int acpi_unmap_cpu(int cpu); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base); -- cgit v1.2.3 From e38f97813302065fbc9c9eab5c1a94dc021d71e2 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Thu, 1 Jan 2015 22:27:32 +0800 Subject: s390/timex: fix get_tod_clock_ext() inline assembly For C language, it treats array parameter as a pointer, so sizeof for an array parameter is equal to sizeof for a pointer, which causes compiler warning (with allmodconfig by gcc 5): ./arch/s390/include/asm/timex.h: In function 'get_tod_clock_ext': ./arch/s390/include/asm/timex.h:76:32: warning: 'sizeof' on array function parameter 'clk' will return size of 'char *' [-Wsizeof-array-argument] typedef struct { char _[sizeof(clk)]; } addrtype; ^ Can use macro CLOCK_STORE_SIZE instead of all related hard code numbers, which also can avoid this warning. And also add a tab to CLOCK_TICK_RATE definition to match coding styles. [heiko.carstens@de.ibm.com]: Chen's patch actually fixes a bug within the get_tod_clock_ext() inline assembly where we incorrectly tell the compiler that only 8 bytes of memory get changed instead of 16 bytes. This would allow gcc to generate incorrect code. Right now this doesn't seem to be the case. Also slightly changed the patch a bit. - renamed CLOCK_STORE_SIZE to STORE_CLOCK_EXT_SIZE - changed get_tod_clock_ext() to receive a char pointer parameter Signed-off-by: Chen Gang Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/hypfs_vm.c | 2 +- arch/s390/include/asm/timex.h | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index 32040ace00ea..afbe07907c10 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -231,7 +231,7 @@ failed: struct dbfs_d2fc_hdr { u64 len; /* Length of d2fc buffer without header */ u16 version; /* Version of header */ - char tod_ext[16]; /* TOD clock for d2fc */ + char tod_ext[STORE_CLOCK_EXT_SIZE]; /* TOD clock for d2fc */ u64 count; /* Number of VM guests in d2fc buffer */ char reserved[30]; } __attribute__ ((packed)); diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 8beee1cceba4..98eb2a579223 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -67,20 +67,22 @@ static inline void local_tick_enable(unsigned long long comp) set_clock_comparator(S390_lowcore.clock_comparator); } -#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ +#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ +#define STORE_CLOCK_EXT_SIZE 16 /* stcke writes 16 bytes */ typedef unsigned long long cycles_t; -static inline void get_tod_clock_ext(char clk[16]) +static inline void get_tod_clock_ext(char *clk) { - typedef struct { char _[sizeof(clk)]; } addrtype; + typedef struct { char _[STORE_CLOCK_EXT_SIZE]; } addrtype; asm volatile("stcke %0" : "=Q" (*(addrtype *) clk) : : "cc"); } static inline unsigned long long get_tod_clock(void) { - unsigned char clk[16]; + unsigned char clk[STORE_CLOCK_EXT_SIZE]; + get_tod_clock_ext(clk); return *((unsigned long long *)&clk[1]); } -- cgit v1.2.3 From 0f9132ceab112ab75d34865f5aeddf0bac234896 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Jan 2015 12:24:47 +0000 Subject: arm64: Correct __NR_compat_syscalls for bpf Commit 97b56be10352a70c (arm64: compat: Enable bpf syscall) made the usual mistake of forgetting to update __NR_compat_syscalls. Due to this, when el0_sync_compat calls el0_svc_naked, the test against sc_nr (__NR_compat_syscalls) will fail, and we'll call ni_sys, returning -ENOSYS to userspace. This patch bumps __NR_compat_syscalls appropriately, enabling the use of the bpf syscall from compat tasks. Due to the reorganisation of unistd{,32}.h as part of commit f3e5c847ec3d12b4 (arm64: Add __NR_* definitions for compat syscalls) it is not currently possible to include both headers and sanity-check the value of __NR_compat_syscalls at build-time to prevent this from happening again. Additional rework is required to make such niceties a possibility. Cc: Will Deacon Acked-by: Catalin Marinas Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/unistd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 49c9aefd24a5..b780c6c76eec 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -44,7 +44,7 @@ #define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) -#define __NR_compat_syscalls 386 +#define __NR_compat_syscalls 387 #endif #define __ARCH_WANT_SYS_CLONE -- cgit v1.2.3 From 3efcb7a44bb75bd94d889245ba82e2195a7ab0a2 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 5 Jan 2015 14:23:48 +0000 Subject: arm64: Remove unused prepare_to_copy() prepare_to_copy() was removed from all architectures supported at that time in commit 55ccf3fe3f9a ("fork: move the real prepare_to_copy() users to arch_dup_task_struct()"). Remove it from arm64 as well. Signed-off-by: Tobias Klauser Signed-off-by: Will Deacon --- arch/arm64/include/asm/processor.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 286b1bec547c..f131a98e1e64 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -123,9 +123,6 @@ struct task_struct; /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); -/* Prepare to copy thread state - unlazy all lazy status */ -#define prepare_to_copy(tsk) do { } while (0) - unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() -- cgit v1.2.3 From 80639d4a79aac5081d43ed64a0801ef42de3aba3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 7 Jan 2015 10:31:56 +0000 Subject: arm64: sanity checks: add missing AArch32 registers We don't currently check a number of registers exposed to AArch32 guests (MVFR{0,1,2}_EL1 and ID_DFR0_EL1), despite the fact these describe AArch32 feature support exposed to userspace and KVM guests similarly to AArch64 registers which we do check. We do not expect these registers to vary across a set of CPUs. This patch adds said registers to the cpuinfo framework and sanity checks. No sanity check failures have been observed on a current ARMv8 big.LITTLE platform (Juno). Cc: Catalin Marinas Reported-by: Suzuki K. Poulose Signed-off-by: Suzuki K. Poulose Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpu.h | 5 +++++ arch/arm64/kernel/cpuinfo.c | 10 ++++++++++ 2 files changed, 15 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index ace70682499b..8e797b2fcc01 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -39,6 +39,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64pfr0; u64 reg_id_aa64pfr1; + u32 reg_id_dfr0; u32 reg_id_isar0; u32 reg_id_isar1; u32 reg_id_isar2; @@ -51,6 +52,10 @@ struct cpuinfo_arm64 { u32 reg_id_mmfr3; u32 reg_id_pfr0; u32 reg_id_pfr1; + + u32 reg_mvfr0; + u32 reg_mvfr1; + u32 reg_mvfr2; }; DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 57b641747534..07d435cf2eea 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -147,6 +147,7 @@ static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur) * If we have AArch32, we care about 32-bit features for compat. These * registers should be RES0 otherwise. */ + diff |= CHECK(id_dfr0, boot, cur, cpu); diff |= CHECK(id_isar0, boot, cur, cpu); diff |= CHECK(id_isar1, boot, cur, cpu); diff |= CHECK(id_isar2, boot, cur, cpu); @@ -165,6 +166,10 @@ static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur) diff |= CHECK(id_pfr0, boot, cur, cpu); diff |= CHECK(id_pfr1, boot, cur, cpu); + diff |= CHECK(mvfr0, boot, cur, cpu); + diff |= CHECK(mvfr1, boot, cur, cpu); + diff |= CHECK(mvfr2, boot, cur, cpu); + /* * Mismatched CPU features are a recipe for disaster. Don't even * pretend to support them. @@ -189,6 +194,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); + info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1); info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); @@ -202,6 +208,10 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); + info->reg_mvfr0 = read_cpuid(MVFR0_EL1); + info->reg_mvfr1 = read_cpuid(MVFR1_EL1); + info->reg_mvfr2 = read_cpuid(MVFR2_EL1); + cpuinfo_detect_icache_policy(info); check_local_cpu_errata(); -- cgit v1.2.3 From 2ec4560b7c73e6c9febc4fb2a3e6af257c904979 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Mon, 5 Jan 2015 17:38:41 -0700 Subject: arm64: fix missing asm/pgtable-hwdef.h include in asm/processor.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On next-20150105, defconfig compilation breaks with: ./arch/arm64/include/asm/processor.h:47:32: error: ‘PHYS_MASK’ undeclared (first use in this function) Fix by including asm/pgtable-hwdef.h, where PHYS_MASK is defined. This second version incorporates a comment from Mark Rutland to keep the includes in alphabetical order by filename. Signed-off-by: Paul Walmsley Cc: Paul Walmsley Cc: Catalin Marinas Cc: Will Deacon Acked-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/processor.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index f131a98e1e64..f9be30ea1cbd 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -31,6 +31,7 @@ #include #include +#include #include #include -- cgit v1.2.3 From 082471a8efe1a91d4e44abec202d9e3067dcec91 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Mon, 5 Jan 2015 17:38:41 -0700 Subject: arm64: fix missing linux/bug.h include in asm/arch_timer.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On next-20150105, defconfig compilation breaks with: ./arch/arm64/include/asm/arch_timer.h:112:2: error: implicit declaration of function ‘BUG’ [-Werror=implicit-function-declaration] Fix by including linux/bug.h, where the BUG macro is defined. This second version incorporates a comment from Mark Rutland to keep the includes in alphabetical order by filename. Signed-off-by: Paul Walmsley Cc: Paul Walmsley Cc: Catalin Marinas Cc: Will Deacon Acked-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/arch_timer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index b1fa4e614718..fbe0ca31a99c 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -21,6 +21,7 @@ #include +#include #include #include -- cgit v1.2.3 From 2c2b282d001e9934adeac93c10eb037b81d532f5 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Mon, 5 Jan 2015 17:38:41 -0700 Subject: arm64: fix missing asm/alternative.h include in kernel/module.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On next-20150105, defconfig compilation breaks with: arch/arm64/kernel/module.c:408:4: error: implicit declaration of function ‘apply_alternatives’ [-Werror=implicit-function-declaration] Fix by including asm/alternative.h, where the apply_alternatives() prototype is declared. This second version incorporates a comment from Mark Rutland to keep the includes in alphabetical order by filename. Signed-off-by: Paul Walmsley Cc: Paul Walmsley Cc: Catalin Marinas Cc: Will Deacon Acked-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/module.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index fd027b101de5..9b6f71db2709 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 59c68329a00eee7759568bc7a5383407d0d40be1 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Mon, 5 Jan 2015 17:38:42 -0700 Subject: arm64: fix missing asm/io.h include in kernel/smp_spin_table.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On next-20150105, defconfig compilation breaks with: arch/arm64/kernel/smp_spin_table.c:80:2: error: implicit declaration of function ‘ioremap_cache’ [-Werror=implicit-function-declaration] arch/arm64/kernel/smp_spin_table.c:92:2: error: implicit declaration of function ‘writeq_relaxed’ [-Werror=implicit-function-declaration] arch/arm64/kernel/smp_spin_table.c:101:2: error: implicit declaration of function ‘iounmap’ [-Werror=implicit-function-declaration] Fix by including asm/io.h, which contains definitions or prototypes for these macros or functions. This second version incorporates a comment from Mark Rutland to keep the includes in alphabetical order by filename. Signed-off-by: Paul Walmsley Cc: Paul Walmsley Cc: Catalin Marinas Cc: Will Deacon Acked-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/smp_spin_table.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 4f93c67e63de..14944e5b28da 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -25,6 +25,7 @@ #include #include #include +#include #include extern void secondary_holding_pen(void); -- cgit v1.2.3 From 841ee230253f2ceb647f89a218e6e0575d961435 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 18 Dec 2014 19:43:36 +0000 Subject: ARM: wire up execveat syscall Signed-off-by: Russell King --- arch/arm/include/uapi/asm/unistd.h | 1 + arch/arm/kernel/calls.S | 1 + 2 files changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index 705bb7620673..0c3f5a0dafd3 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -413,6 +413,7 @@ #define __NR_getrandom (__NR_SYSCALL_BASE+384) #define __NR_memfd_create (__NR_SYSCALL_BASE+385) #define __NR_bpf (__NR_SYSCALL_BASE+386) +#define __NR_execveat (__NR_SYSCALL_BASE+387) /* * The following SWIs are ARM private. diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index e51833f8cc38..05745eb838c5 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -396,6 +396,7 @@ CALL(sys_getrandom) /* 385 */ CALL(sys_memfd_create) CALL(sys_bpf) + CALL(sys_execveat) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted -- cgit v1.2.3 From cca547e9aa3a6d561fe65e75a4bb2c18d80c541a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 17 Dec 2014 17:57:38 +0100 Subject: ARM: 8249/1: mm: dump: don't skip regions Currently the arm page table dumping code starts dumping page tables from USER_PGTABLES_CEILING. This is unnecessary for skipping any entries related to userspace as the swapper_pg_dir does not contain such entries, and results in a couple of unfortuante side effects. Firstly, any kernel mappings which might exist below USER_PGTABLES_CEILING will not be accounted in the dump output. This masks any entries erroneously created below this address. Secondly, if the final page table entry walked is part of a valid mapping the page table dumping code will not log the region this entry is part of, as the final note_page call in walk_pgd will trigger an early return when 0 < USER_PGTABLES_CEILING. Luckily this isn't seen on contemporary systems as they typically don't have enough RAM to extend the linear mapping right to the end of the address space. Due to the way addr is constructed in the walk_* functions, it can never be less than USER_PGTABLES_CEILING when walking the page tables, so it is not necessary to avoid dereferencing invalid table addresses. The existing checks for st->current_prot and st->marker[1].start_address are sufficient to ensure we will not print and/or dereference garbage when trying to log information. This patch removes both problematic uses of USER_PGTABLES_CEILING from the arm page table dumping code, preventing both of these issues. We will now report any low mappings, and the final note_page call will not return early, ensuring all regions are logged. Signed-off-by: Mark Rutland Cc: Steve Capper Cc: Kees Cook Cc: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/dump.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c index 59424937e52b..9fe8e241335c 100644 --- a/arch/arm/mm/dump.c +++ b/arch/arm/mm/dump.c @@ -220,9 +220,6 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u static const char units[] = "KMGTPE"; u64 prot = val & pg_level[level].mask; - if (addr < USER_PGTABLES_CEILING) - return; - if (!st->level) { st->level = level; st->current_prot = prot; @@ -308,15 +305,13 @@ static void walk_pgd(struct seq_file *m) pgd_t *pgd = swapper_pg_dir; struct pg_state st; unsigned long addr; - unsigned i, pgdoff = USER_PGTABLES_CEILING / PGDIR_SIZE; + unsigned i; memset(&st, 0, sizeof(st)); st.seq = m; st.marker = address_markers; - pgd += pgdoff; - - for (i = pgdoff; i < PTRS_PER_PGD; i++, pgd++) { + for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { addr = i * PGDIR_SIZE; if (!pgd_none(*pgd)) { walk_pud(&st, pgd, addr); -- cgit v1.2.3 From ac08468867e99bc02b22baf4e58bc3537e9d852c Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Tue, 23 Dec 2014 19:36:55 +0100 Subject: ARM: 8253/1: mm: use phys_addr_t type in map_lowmem() for kernel mem region Now local variables kernel_x_start and kernel_x_end defined using 'unsigned long' type which is wrong because they represent physical memory range and will be calculated wrongly if LPAE is enabled. As result, all following code in map_lowmem() will not work correctly. For example, Keystone 2 boot is broken because kernel_x_start == 0x0000 0000 kernel_x_end == 0x0080 0000 instead of kernel_x_start == 0x0000 0008 0000 0000 kernel_x_end == 0x0000 0008 0080 0000 and as result whole low memory will be mapped with MT_MEMORY_RW permissions by code (start > kernel_x_end): } else if (start >= kernel_x_end) { map.pfn = __phys_to_pfn(start); map.virtual = __phys_to_virt(start); map.length = end - start; map.type = MT_MEMORY_RW; create_mapping(&map); } Hence, fix it by using phys_addr_t type for variables kernel_x_start and kernel_x_end. Tested-by: Murali Karicheri Signed-off-by: Grygorii Strashko Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index cda7c40999b6..4e6ef896c619 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1329,8 +1329,8 @@ static void __init kmap_init(void) static void __init map_lowmem(void) { struct memblock_region *reg; - unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); - unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); + phys_addr_t kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); + phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); /* Map all the lowmem memory banks. */ for_each_memblock(memory, reg) { -- cgit v1.2.3 From fbc89c952f004fb9191c23605a1428df6dd39a90 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 7 Jan 2015 11:00:02 +0100 Subject: s390/mm: avoid using pmd_to_page for !USE_SPLIT_PMD_PTLOCKS pmd_to_page() is only available if USE_SPLIT_PMD_PTLOCKS is defined. The use of pmd_to_page in the gmap code can cause compile errors if NR_CPUS is smaller than SPLIT_PTLOCK_CPUS. Do not use pmd_to_page outside of USE_SPLIT_PMD_PTLOCKS sections. Reported-by: Mike Frysinger Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 71c7eff2c89f..601deb85d2a0 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -322,11 +322,12 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, static unsigned long __gmap_segment_gaddr(unsigned long *entry) { struct page *page; - unsigned long offset; + unsigned long offset, mask; offset = (unsigned long) entry / sizeof(unsigned long); offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; - page = pmd_to_page((pmd_t *) entry); + mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); + page = virt_to_page((void *)((unsigned long) entry & mask)); return page->index + offset; } -- cgit v1.2.3 From 0e63ea48b4d8035dd0e91a3fa6fb79458b47adfb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 8 Jan 2015 09:54:58 +0000 Subject: arm64/efi: add missing call to early_ioremap_reset() The early ioremap support introduced by patch bf4b558eba92 ("arm64: add early_ioremap support") failed to add a call to early_ioremap_reset() at an appropriate time. Without this call, invocations of early_ioremap etc. that are done too late will go unnoticed and may cause corruption. This is exactly what happened when the first user of this feature was added in patch f84d02755f5a ("arm64: add EFI runtime services"). The early mapping of the EFI memory map is unmapped during an early initcall, at which time the early ioremap support is long gone. Fix by adding the missing call to early_ioremap_reset() to setup_arch(), and move the offending early_memunmap() to right after the point where the early mapping of the EFI memory map is last used. Fixes: f84d02755f5a ("arm64: add EFI runtime services") Cc: Signed-off-by: Leif Lindholm Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/efi.c | 2 +- arch/arm64/kernel/setup.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 6fac253bc783..2bb4347d0edf 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -326,6 +326,7 @@ void __init efi_idmap_init(void) /* boot time idmap_pg_dir is incomplete, so fill in missing parts */ efi_setup_idmap(); + early_memunmap(memmap.map, memmap.map_end - memmap.map); } static int __init remap_region(efi_memory_desc_t *md, void **new) @@ -380,7 +381,6 @@ static int __init arm64_enter_virtual_mode(void) } mapsize = memmap.map_end - memmap.map; - early_memunmap(memmap.map, mapsize); if (efi_runtime_disabled()) { pr_info("EFI runtime services will be disabled.\n"); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index b80991166754..20fe2932ad0c 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -402,6 +402,7 @@ void __init setup_arch(char **cmdline_p) request_standard_resources(); efi_idmap_init(); + early_ioremap_reset(); unflatten_device_tree(); -- cgit v1.2.3 From 701a261ad6c4c1915861673b7e8ab9fee5cef69a Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Wed, 7 Jan 2015 09:08:54 -0500 Subject: x86/xen: Free bootmem in free_p2m_page() during early boot With recent changes in p2m we now have legitimate cases when p2m memory needs to be freed during early boot (i.e. before slab is initialized). Signed-off-by: Boris Ostrovsky Reviewed-by: Juergen Gross Signed-off-by: David Vrabel --- arch/x86/xen/p2m.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index edbc7a63fd73..cab221da5f10 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -167,10 +167,13 @@ static void * __ref alloc_p2m_page(void) return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); } -/* Only to be called in case of a race for a page just allocated! */ -static void free_p2m_page(void *p) +static void __ref free_p2m_page(void *p) { - BUG_ON(!slab_is_available()); + if (unlikely(!slab_is_available())) { + free_bootmem((unsigned long)p, PAGE_SIZE); + return; + } + free_page((unsigned long)p); } -- cgit v1.2.3 From bc7142cf798ae77628ae8c29bfdf6aa6dd2378e9 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 7 Jan 2015 11:01:08 +0000 Subject: x86/xen: don't count how many PFNs are identity mapped This accounting is just used to print a diagnostic message that isn't very useful. Signed-off-by: David Vrabel Reviewed-by: Juergen Gross --- arch/x86/xen/setup.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index dfd77dec8e2b..664dffc29b6b 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -229,15 +229,14 @@ static int __init xen_free_mfn(unsigned long mfn) * as a fallback if the remapping fails. */ static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, - unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity, - unsigned long *released) + unsigned long end_pfn, unsigned long nr_pages, unsigned long *released) { - unsigned long len = 0; unsigned long pfn, end; int ret; WARN_ON(start_pfn > end_pfn); + /* Release pages first. */ end = min(end_pfn, nr_pages); for (pfn = start_pfn; pfn < end; pfn++) { unsigned long mfn = pfn_to_mfn(pfn); @@ -250,16 +249,14 @@ static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); if (ret == 1) { + (*released)++; if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY)) break; - len++; } else break; } - /* Need to release pages first */ - *released += len; - *identity += set_phys_range_identity(start_pfn, end_pfn); + set_phys_range_identity(start_pfn, end_pfn); } /* @@ -318,7 +315,6 @@ static void __init xen_do_set_identity_and_remap_chunk( unsigned long ident_pfn_iter, remap_pfn_iter; unsigned long ident_end_pfn = start_pfn + size; unsigned long left = size; - unsigned long ident_cnt = 0; unsigned int i, chunk; WARN_ON(size == 0); @@ -347,8 +343,7 @@ static void __init xen_do_set_identity_and_remap_chunk( xen_remap_mfn = mfn; /* Set identity map */ - ident_cnt += set_phys_range_identity(ident_pfn_iter, - ident_pfn_iter + chunk); + set_phys_range_identity(ident_pfn_iter, ident_pfn_iter + chunk); left -= chunk; } @@ -371,7 +366,7 @@ static void __init xen_do_set_identity_and_remap_chunk( static unsigned long __init xen_set_identity_and_remap_chunk( const struct e820entry *list, size_t map_size, unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn, - unsigned long *identity, unsigned long *released) + unsigned long *released) { unsigned long pfn; unsigned long i = 0; @@ -386,8 +381,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk( /* Do not remap pages beyond the current allocation */ if (cur_pfn >= nr_pages) { /* Identity map remaining pages */ - *identity += set_phys_range_identity(cur_pfn, - cur_pfn + size); + set_phys_range_identity(cur_pfn, cur_pfn + size); break; } if (cur_pfn + size > nr_pages) @@ -398,7 +392,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk( if (!remap_range_size) { pr_warning("Unable to find available pfn range, not remapping identity pages\n"); xen_set_identity_and_release_chunk(cur_pfn, - cur_pfn + left, nr_pages, identity, released); + cur_pfn + left, nr_pages, released); break; } /* Adjust size to fit in current e820 RAM region */ @@ -410,7 +404,6 @@ static unsigned long __init xen_set_identity_and_remap_chunk( /* Update variables to reflect new mappings. */ i += size; remap_pfn += size; - *identity += size; } /* @@ -430,7 +423,6 @@ static void __init xen_set_identity_and_remap( unsigned long *released) { phys_addr_t start = 0; - unsigned long identity = 0; unsigned long last_pfn = nr_pages; const struct e820entry *entry; unsigned long num_released = 0; @@ -460,14 +452,13 @@ static void __init xen_set_identity_and_remap( last_pfn = xen_set_identity_and_remap_chunk( list, map_size, start_pfn, end_pfn, nr_pages, last_pfn, - &identity, &num_released); + &num_released); start = end; } } *released = num_released; - pr_info("Set %ld page(s) to 1-1 mapping\n", identity); pr_info("Released %ld page(s)\n", num_released); } -- cgit v1.2.3 From a97dae1a2e92e728d28515e88e8eda151f5796f5 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 7 Jan 2015 11:21:50 +0000 Subject: x86/xen: add extra memory for remapped frames during setup If the non-RAM regions in the e820 memory map are larger than the size of the initial balloon, a BUG was triggered as the frames are remaped beyond the limit of the linear p2m. The frames are remapped into the initial balloon area (xen_extra_mem) but not enough of this is available. Ensure enough extra memory regions are added for these remapped frames. Signed-off-by: David Vrabel Reviewed-by: Juergen Gross --- arch/x86/xen/setup.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 664dffc29b6b..feb6d86fa0a0 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -366,7 +366,7 @@ static void __init xen_do_set_identity_and_remap_chunk( static unsigned long __init xen_set_identity_and_remap_chunk( const struct e820entry *list, size_t map_size, unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn, - unsigned long *released) + unsigned long *released, unsigned long *remapped) { unsigned long pfn; unsigned long i = 0; @@ -404,6 +404,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk( /* Update variables to reflect new mappings. */ i += size; remap_pfn += size; + *remapped += size; } /* @@ -420,12 +421,13 @@ static unsigned long __init xen_set_identity_and_remap_chunk( static void __init xen_set_identity_and_remap( const struct e820entry *list, size_t map_size, unsigned long nr_pages, - unsigned long *released) + unsigned long *released, unsigned long *remapped) { phys_addr_t start = 0; unsigned long last_pfn = nr_pages; const struct e820entry *entry; unsigned long num_released = 0; + unsigned long num_remapped = 0; int i; /* @@ -452,12 +454,13 @@ static void __init xen_set_identity_and_remap( last_pfn = xen_set_identity_and_remap_chunk( list, map_size, start_pfn, end_pfn, nr_pages, last_pfn, - &num_released); + &num_released, &num_remapped); start = end; } } *released = num_released; + *remapped = num_remapped; pr_info("Released %ld page(s)\n", num_released); } @@ -577,6 +580,7 @@ char * __init xen_memory_setup(void) struct xen_memory_map memmap; unsigned long max_pages; unsigned long extra_pages = 0; + unsigned long remapped_pages; int i; int op; @@ -626,9 +630,10 @@ char * __init xen_memory_setup(void) * underlying RAM. */ xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn, - &xen_released_pages); + &xen_released_pages, &remapped_pages); extra_pages += xen_released_pages; + extra_pages += remapped_pages; /* * Clamp the amount of extra memory to a EXTRA_MEM_RATIO -- cgit v1.2.3 From 7be0772d19103b3eac3c2e9ac325df2563273fdc Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 5 Jan 2015 16:27:51 +0100 Subject: x86/xen: avoid freeing static 'name' when kasprintf() fails In case kasprintf() fails in xen_setup_timer() we assign name to the static string "". We, however, don't check that fact before issuing kfree() in xen_teardown_timer(), kernel is supposed to crash with 'kernel BUG at mm/slub.c:3341!' Solve the issue by making name a fixed length string inside struct xen_clock_event_device. 16 bytes should be enough. Suggested-by: Laszlo Ersek Signed-off-by: Vitaly Kuznetsov Signed-off-by: David Vrabel --- arch/x86/xen/time.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 23019b483908..69087341d9ae 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -391,7 +391,7 @@ static const struct clock_event_device *xen_clockevent = struct xen_clock_event_device { struct clock_event_device evt; - char *name; + char name[16]; }; static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 }; @@ -420,39 +420,33 @@ void xen_teardown_timer(int cpu) if (evt->irq >= 0) { unbind_from_irqhandler(evt->irq, NULL); evt->irq = -1; - kfree(per_cpu(xen_clock_events, cpu).name); - per_cpu(xen_clock_events, cpu).name = NULL; } } void xen_setup_timer(int cpu) { - char *name; - struct clock_event_device *evt; + struct xen_clock_event_device *xevt = &per_cpu(xen_clock_events, cpu); + struct clock_event_device *evt = &xevt->evt; int irq; - evt = &per_cpu(xen_clock_events, cpu).evt; WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu); if (evt->irq >= 0) xen_teardown_timer(cpu); printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu); - name = kasprintf(GFP_KERNEL, "timer%d", cpu); - if (!name) - name = ""; + snprintf(xevt->name, sizeof(xevt->name), "timer%d", cpu); irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| IRQF_FORCE_RESUME|IRQF_EARLY_RESUME, - name, NULL); + xevt->name, NULL); (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX); memcpy(evt, xen_clockevent, sizeof(*evt)); evt->cpumask = cpumask_of(cpu); evt->irq = irq; - per_cpu(xen_clock_events, cpu).name = name; } -- cgit v1.2.3 From 9de93e7873f5f6c4d0768649d404703a62a51610 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 8 Jan 2015 14:32:26 -0800 Subject: arch/blackfin/mach-bf533/boards/stamp.c: add linux/delay.h build error arch/blackfin/mach-bf533/boards/stamp.c:834:2: error: implicit declaration of function 'mdelay' Signed-off-by: Oleg Nesterov Reported-by: Wu Fengguang Acked-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/blackfin/mach-bf533/boards/stamp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/blackfin/mach-bf533/boards/stamp.c b/arch/blackfin/mach-bf533/boards/stamp.c index 6f4bac969bf7..23eada79439c 100644 --- a/arch/blackfin/mach-bf533/boards/stamp.c +++ b/arch/blackfin/mach-bf533/boards/stamp.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include -- cgit v1.2.3 From df3eed3d282f2fe1ffb73d3545fcde4e9b80a0d3 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Thu, 8 Jan 2015 14:36:21 +0100 Subject: s390/bpf: Fix ALU_NEG (A = -A) Currently the LOAD NEGATIVE (lnr) instruction is used for ALU_NEG. This instruction always loads the negative value. Therefore, if A is already negative, it remains unchanged. To fix this use LOAD COMPLEMENT (lcr) instead. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/net/bpf_jit_comp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index c52ac77408ca..8bc474fb52fd 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -431,8 +431,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, EMIT4_DISP(0x88500000, K); break; case BPF_ALU | BPF_NEG: /* A = -A */ - /* lnr %r5,%r5 */ - EMIT2(0x1155); + /* lcr %r5,%r5 */ + EMIT2(0x1355); break; case BPF_JMP | BPF_JA: /* ip += K */ offset = addrs[i + K] + jit->start - jit->prg; -- cgit v1.2.3 From ae750974591bb9431b1f84b1323dc2fb7d8fe360 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Thu, 8 Jan 2015 14:46:18 +0100 Subject: s390/bpf: Fix JMP_JGE_X (A > X) and JMP_JGT_X (A >= X) Currently the signed COMPARE (cr) instruction is used to compare "A" with "X". This is not correct because "A" and "X" are both unsigned. To fix this use the unsigned COMPARE LOGICAL (clr) instruction instead. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/net/bpf_jit_comp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 8bc474fb52fd..524496d47ef5 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -502,8 +502,8 @@ branch: if (filter->jt == filter->jf) { xbranch: /* Emit compare if the branch targets are different */ if (filter->jt != filter->jf) { jit->seen |= SEEN_XREG; - /* cr %r5,%r12 */ - EMIT2(0x195c); + /* clr %r5,%r12 */ + EMIT2(0x155c); } goto branch; case BPF_JMP | BPF_JSET | BPF_X: /* ip += (A & X) ? jt : jf */ -- cgit v1.2.3 From 0f363b250b15af0f218bb2876d101fe5cd413f8b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Dec 2014 11:46:14 +0100 Subject: x86: Fix off-by-one in instruction decoder Stephane reported that the PEBS fixup was broken by the recent commit to the instruction decoder. The thing had an off-by-one which resulted in not being able to decode the last instruction and always bail. Reported-by: Stephane Eranian Fixes: 6ba48ff46f76 ("x86: Remove arbitrary instruction size limit in instruction decoder") Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org # 3.18 Cc: Cc: Jiri Olsa Cc: Liang Kan Cc: Arnaldo Carvalho de Melo Cc: Dave Hansen Cc: Jim Keniston Cc: Linus Torvalds Cc: Masami Hiramatsu Link: http://lkml.kernel.org/r/20141216104614.GV3337@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/lib/insn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 2480978b31cc..1313ae6b478b 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -28,7 +28,7 @@ /* Verify next sizeof(t) bytes can be on the same instruction */ #define validate_next(t, insn, n) \ - ((insn)->next_byte + sizeof(t) + n < (insn)->end_kaddr) + ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) #define __get_next(t, insn) \ ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) -- cgit v1.2.3 From 88a7c26af8dab2f2d69f5a6067eb670694ec38c0 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 4 Jan 2015 10:36:19 -0800 Subject: perf: Move task_pt_regs sampling into arch code On x86_64, at least, task_pt_regs may be only partially initialized in many contexts, so x86_64 should not use it without extra care from interrupt context, let alone NMI context. This will allow x86_64 to override the logic and will supply some scratch space to use to make a cleaner copy of user regs. Tested-by: Jiri Olsa Signed-off-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Cc: Stephane Eranian Cc: chenggang.qcg@taobao.com Cc: Wu Fengguang Cc: Namhyung Kim Cc: Mike Galbraith Cc: Arjan van de Ven Cc: David Ahern Cc: Arnaldo Carvalho de Melo Cc: Catalin Marinas Cc: Jean Pihet Cc: Linus Torvalds Cc: Mark Salter Cc: Russell King Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/e431cd4c18c2e1c44c774f10758527fb2d1025c4.1420396372.git.luto@amacapital.net Signed-off-by: Ingo Molnar --- arch/arm/kernel/perf_regs.c | 8 ++++++++ arch/arm64/kernel/perf_regs.c | 8 ++++++++ arch/x86/kernel/perf_regs.c | 16 ++++++++++++++++ include/linux/perf_event.h | 12 +++++++----- include/linux/perf_regs.h | 16 ++++++++++++++++ kernel/events/core.c | 19 ++++++++----------- 6 files changed, 63 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c index 6e4379c67cbc..592dda3f21ff 100644 --- a/arch/arm/kernel/perf_regs.c +++ b/arch/arm/kernel/perf_regs.c @@ -28,3 +28,11 @@ u64 perf_reg_abi(struct task_struct *task) { return PERF_SAMPLE_REGS_ABI_32; } + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs, + struct pt_regs *regs_user_copy) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 6762ad705587..3f62b35fb6f1 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -50,3 +50,11 @@ u64 perf_reg_abi(struct task_struct *task) else return PERF_SAMPLE_REGS_ABI_64; } + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs, + struct pt_regs *regs_user_copy) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index e309cc5c276e..3bbbb1a4fb52 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -78,6 +78,14 @@ u64 perf_reg_abi(struct task_struct *task) { return PERF_SAMPLE_REGS_ABI_32; } + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs, + struct pt_regs *regs_user_copy) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} #else /* CONFIG_X86_64 */ #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ (1ULL << PERF_REG_X86_ES) | \ @@ -102,4 +110,12 @@ u64 perf_reg_abi(struct task_struct *task) else return PERF_SAMPLE_REGS_ABI_64; } + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs, + struct pt_regs *regs_user_copy) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} #endif /* CONFIG_X86_32 */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 486e84ccb1f9..4f7a61ca4b39 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -79,11 +79,6 @@ struct perf_branch_stack { struct perf_branch_entry entries[0]; }; -struct perf_regs { - __u64 abi; - struct pt_regs *regs; -}; - struct task_struct; /* @@ -610,7 +605,14 @@ struct perf_sample_data { u32 reserved; } cpu_entry; struct perf_callchain_entry *callchain; + + /* + * regs_user may point to task_pt_regs or to regs_user_copy, depending + * on arch details. + */ struct perf_regs regs_user; + struct pt_regs regs_user_copy; + struct perf_regs regs_intr; u64 stack_user_size; } ____cacheline_aligned; diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h index 3c73d5fe18be..a5f98d53d732 100644 --- a/include/linux/perf_regs.h +++ b/include/linux/perf_regs.h @@ -1,11 +1,19 @@ #ifndef _LINUX_PERF_REGS_H #define _LINUX_PERF_REGS_H +struct perf_regs { + __u64 abi; + struct pt_regs *regs; +}; + #ifdef CONFIG_HAVE_PERF_REGS #include u64 perf_reg_value(struct pt_regs *regs, int idx); int perf_reg_validate(u64 mask); u64 perf_reg_abi(struct task_struct *task); +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs, + struct pt_regs *regs_user_copy); #else static inline u64 perf_reg_value(struct pt_regs *regs, int idx) { @@ -21,5 +29,13 @@ static inline u64 perf_reg_abi(struct task_struct *task) { return PERF_SAMPLE_REGS_ABI_NONE; } + +static inline void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs, + struct pt_regs *regs_user_copy) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} #endif /* CONFIG_HAVE_PERF_REGS */ #endif /* _LINUX_PERF_REGS_H */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 4c1ee7f2bebc..882f835a0d85 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4461,18 +4461,14 @@ perf_output_sample_regs(struct perf_output_handle *handle, } static void perf_sample_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs) + struct pt_regs *regs, + struct pt_regs *regs_user_copy) { - if (!user_mode(regs)) { - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - - if (regs) { - regs_user->abi = perf_reg_abi(current); + if (user_mode(regs)) { + regs_user->abi = perf_reg_abi(current); regs_user->regs = regs; + } else if (current->mm) { + perf_get_regs_user(regs_user, regs, regs_user_copy); } else { regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; regs_user->regs = NULL; @@ -4951,7 +4947,8 @@ void perf_prepare_sample(struct perf_event_header *header, } if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER)) - perf_sample_regs_user(&data->regs_user, regs); + perf_sample_regs_user(&data->regs_user, regs, + &data->regs_user_copy); if (sample_type & PERF_SAMPLE_REGS_USER) { /* regs dump ABI info */ -- cgit v1.2.3 From 86c269fea37334687b1c0789e6444be0d750e8a6 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 4 Jan 2015 10:36:20 -0800 Subject: perf/x86_64: Improve user regs sampling Perf reports user regs for kernel-mode samples so that samples can be backtraced through user code. The old code was very broken in syscall context, resulting in useless backtraces. The new code, in contrast, is still dangerously racy, but it should at least work most of the time. Tested-by: Jiri Olsa Signed-off-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Cc: Stephane Eranian Cc: Andrew Morton Cc: chenggang.qcg@taobao.com Cc: Wu Fengguang Cc: Namhyung Kim Cc: Mike Galbraith Cc: Arjan van de Ven Cc: David Ahern Cc: Linus Torvalds Link: http://lkml.kernel.org/r/243560c26ff0f739978e2459e203f6515367634d.1420396372.git.luto@amacapital.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/perf_regs.c | 78 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index 3bbbb1a4fb52..781861cc5ee8 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -115,7 +115,81 @@ void perf_get_regs_user(struct perf_regs *regs_user, struct pt_regs *regs, struct pt_regs *regs_user_copy) { - regs_user->regs = task_pt_regs(current); - regs_user->abi = perf_reg_abi(current); + struct pt_regs *user_regs = task_pt_regs(current); + + /* + * If we're in an NMI that interrupted task_pt_regs setup, then + * we can't sample user regs at all. This check isn't really + * sufficient, though, as we could be in an NMI inside an interrupt + * that happened during task_pt_regs setup. + */ + if (regs->sp > (unsigned long)&user_regs->r11 && + regs->sp <= (unsigned long)(user_regs + 1)) { + regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; + regs_user->regs = NULL; + return; + } + + /* + * RIP, flags, and the argument registers are usually saved. + * orig_ax is probably okay, too. + */ + regs_user_copy->ip = user_regs->ip; + regs_user_copy->cx = user_regs->cx; + regs_user_copy->dx = user_regs->dx; + regs_user_copy->si = user_regs->si; + regs_user_copy->di = user_regs->di; + regs_user_copy->r8 = user_regs->r8; + regs_user_copy->r9 = user_regs->r9; + regs_user_copy->r10 = user_regs->r10; + regs_user_copy->r11 = user_regs->r11; + regs_user_copy->orig_ax = user_regs->orig_ax; + regs_user_copy->flags = user_regs->flags; + + /* + * Don't even try to report the "rest" regs. + */ + regs_user_copy->bx = -1; + regs_user_copy->bp = -1; + regs_user_copy->r12 = -1; + regs_user_copy->r13 = -1; + regs_user_copy->r14 = -1; + regs_user_copy->r15 = -1; + + /* + * For this to be at all useful, we need a reasonable guess for + * sp and the ABI. Be careful: we're in NMI context, and we're + * considering current to be the current task, so we should + * be careful not to look at any other percpu variables that might + * change during context switches. + */ + if (IS_ENABLED(CONFIG_IA32_EMULATION) && + task_thread_info(current)->status & TS_COMPAT) { + /* Easy case: we're in a compat syscall. */ + regs_user->abi = PERF_SAMPLE_REGS_ABI_32; + regs_user_copy->sp = user_regs->sp; + regs_user_copy->cs = user_regs->cs; + regs_user_copy->ss = user_regs->ss; + } else if (user_regs->orig_ax != -1) { + /* + * We're probably in a 64-bit syscall. + * Warning: this code is severely racy. At least it's better + * than just blindly copying user_regs. + */ + regs_user->abi = PERF_SAMPLE_REGS_ABI_64; + regs_user_copy->sp = this_cpu_read(old_rsp); + regs_user_copy->cs = __USER_CS; + regs_user_copy->ss = __USER_DS; + regs_user_copy->cx = -1; /* usually contains garbage */ + } else { + /* We're probably in an interrupt or exception. */ + regs_user->abi = user_64bit_mode(user_regs) ? + PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32; + regs_user_copy->sp = user_regs->sp; + regs_user_copy->cs = user_regs->cs; + regs_user_copy->ss = user_regs->ss; + } + + regs_user->regs = regs_user_copy; } #endif /* CONFIG_X86_32 */ -- cgit v1.2.3 From 5306c31c5733cb4a79cc002e0c3ad256fd439614 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 6 Jan 2015 14:34:35 -0800 Subject: perf/x86/uncore/hsw-ep: Handle systems with only two SBOXes There was another report of a boot failure with a #GP fault in the uncore SBOX initialization. The earlier work around was not enough for this system. The boot was failing while trying to initialize the third SBOX. This patch detects parts with only two SBOXes and limits the number of SBOX units to two there. Stable material, as it affects boot problems on 3.18. Tested-by: Andreas Oehler Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: Yan, Zheng Link: http://lkml.kernel.org/r/1420583675-9163-1-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_uncore.h | 2 +- arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 18eb78bbdd10..863d9b02563e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -17,7 +17,7 @@ #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) #define UNCORE_PCI_DEV_IDX(data) (data & 0xff) #define UNCORE_EXTRA_PCI_DEV 0xff -#define UNCORE_EXTRA_PCI_DEV_MAX 2 +#define UNCORE_EXTRA_PCI_DEV_MAX 3 /* support up to 8 sockets */ #define UNCORE_SOCKET_MAX 8 diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c index 745b158e9a65..21af6149edf2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c @@ -891,6 +891,7 @@ void snbep_uncore_cpu_init(void) enum { SNBEP_PCI_QPI_PORT0_FILTER, SNBEP_PCI_QPI_PORT1_FILTER, + HSWEP_PCI_PCU_3, }; static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event) @@ -2026,6 +2027,17 @@ void hswep_uncore_cpu_init(void) { if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + + /* Detect 6-8 core systems with only two SBOXes */ + if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) { + u32 capid4; + + pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3], + 0x94, &capid4); + if (((capid4 >> 6) & 0x3) == 0) + hswep_uncore_sbox.num_boxes = 2; + } + uncore_msr_uncores = hswep_msr_uncores; } @@ -2287,6 +2299,11 @@ static DEFINE_PCI_DEVICE_TABLE(hswep_uncore_pci_ids) = { .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, SNBEP_PCI_QPI_PORT1_FILTER), }, + { /* PCU.3 (for Capability registers) */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + HSWEP_PCI_PCU_3), + }, { /* end: all zeroes */ } }; -- cgit v1.2.3 From 1e3479225acbb7ae048ac30fb7c6090fa7f0df02 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Fri, 9 Jan 2015 18:55:45 +0100 Subject: ARM: 8275/1: mm: fix PMD_SECT_RDONLY undeclared compile error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In v3.19-rc3 tree when CONFIG_ARM_LPAE and CONFIG_DEBUG_RODATA are enabled image failed to compile with the following error: arch/arm/mm/init.c:661:14: error: ‘PMD_SECT_RDONLY’ undeclared here (not in a function) It seems that '80d6b0c ARM: mm: allow text and rodata sections to be read-only' and 'ded9477 ARM: 8109/1: mm: Modify pte_write and pmd_write logic for LPAE' commits crossed. 80d6b0c uses PMD_SECT_RDONLY macro but ded9477 renames it and uses software bits L_PMD_SECT_RDONLY instead. Fix is to use L_PMD_SECT_RDONLY instead PMD_SECT_RDONLY as ded9477 does in another places. Signed-off-by: Victor Kamensky Acked-by: Will Deacon Signed-off-by: Russell King --- arch/arm/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 98ad9c79ea0e..2495c8cb47ba 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -658,8 +658,8 @@ static struct section_perm ro_perms[] = { .start = (unsigned long)_stext, .end = (unsigned long)__init_begin, #ifdef CONFIG_ARM_LPAE - .mask = ~PMD_SECT_RDONLY, - .prot = PMD_SECT_RDONLY, + .mask = ~L_PMD_SECT_RDONLY, + .prot = L_PMD_SECT_RDONLY, #else .mask = ~(PMD_SECT_APX | PMD_SECT_AP_WRITE), .prot = PMD_SECT_APX | PMD_SECT_AP_WRITE, -- cgit v1.2.3 From f27bd5bfeda5881ce283aa06408c8fa99fb950fa Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 21 Dec 2014 11:51:22 +0100 Subject: m68k: Wire up execveat Check success of execveat(3, '../execveat', 0)... [OK] Check success of execveat(5, 'execveat', 0)... [OK] Check success of execveat(6, 'execveat', 0)... [OK] Check success of execveat(-100, '/root/selftest-exec/exec/execveat', 0)... [OK] Check success of execveat(99, '/root/selftest-exec/exec/execveat', 0)... [OK] Check success of execveat(8, '', 4096)... [OK] Check success of execveat(17, '', 4096)... [OK] Check success of execveat(9, '', 4096)... [OK] Check success of execveat(14, '', 4096)... [OK] Check success of execveat(14, '', 4096)... [OK] Check success of execveat(15, '', 4096)... [OK] Check failure of execveat(8, '', 0) with ENOENT... [OK] Check failure of execveat(8, '(null)', 4096) with EFAULT... [OK] Check success of execveat(5, 'execveat.symlink', 0)... [OK] Check success of execveat(6, 'execveat.symlink', 0)... [OK] Check success of execveat(-100, '/root/selftest-exec/...xec/execveat.symlink', 0)... [OK] Check success of execveat(10, '', 4096)... [OK] Check success of execveat(10, '', 4352)... [OK] Check failure of execveat(5, 'execveat.symlink', 256) with ELOOP... [OK] Check failure of execveat(6, 'execveat.symlink', 256) with ELOOP... [OK] Check failure of execveat(-100, '/root/selftest-exec/exec/execveat.symlink', 256) with ELOOP... [OK] Check success of execveat(3, '../script', 0)... [OK] Check success of execveat(5, 'script', 0)... [OK] Check success of execveat(6, 'script', 0)... [OK] Check success of execveat(-100, '/root/selftest-exec/exec/script', 0)... [OK] Check success of execveat(13, '', 4096)... [OK] Check success of execveat(13, '', 4352)... [OK] Check failure of execveat(18, '', 4096) with ENOENT... [OK] Check failure of execveat(7, 'script', 0) with ENOENT... [OK] Check success of execveat(16, '', 4096)... [OK] Check success of execveat(16, '', 4096)... [OK] Check success of execveat(4, '../script', 0)... [OK] Check success of execveat(4, 'script', 0)... [OK] Check success of execveat(4, '../script', 0)... [OK] Check failure of execveat(4, 'script', 0) with ENOENT... [OK] Check failure of execveat(5, 'execveat', 65535) with EINVAL... [OK] Check failure of execveat(5, 'no-such-file', 0) with ENOENT... [OK] Check failure of execveat(6, 'no-such-file', 0) with ENOENT... [OK] Check failure of execveat(-100, 'no-such-file', 0) with ENOENT... [OK] Check failure of execveat(5, '', 4096) with EACCES... [OK] Check failure of execveat(5, 'Makefile', 0) with EACCES... [OK] Check failure of execveat(11, '', 4096) with EACCES... [OK] Check failure of execveat(12, '', 4096) with EACCES... [OK] Check failure of execveat(99, '', 4096) with EBADF... [OK] Check failure of execveat(99, 'execveat', 0) with EBADF... [OK] Check failure of execveat(8, 'execveat', 0) with ENOTDIR... [OK] Invoke copy of 'execveat' via filename of length 4093: Check success of execveat(19, '', 4096)... [OK] Check success of execveat(5, 'xxxxxxxxxxxxxxxxxxxx...yyyyyyyyyyyyyyyyyyyy', 0)... [OK] Invoke copy of 'script' via filename of length 4093: Check success of execveat(20, '', 4096)... [OK] Check success of execveat(5, 'xxxxxxxxxxxxxxxxxxxx...yyyyyyyyyyyyyyyyyyyy', 0)... [OK] Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/unistd.h | 2 +- arch/m68k/include/uapi/asm/unistd.h | 1 + arch/m68k/kernel/syscalltable.S | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index 75e75d7b1702..244e0dbe45db 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -4,7 +4,7 @@ #include -#define NR_syscalls 355 +#define NR_syscalls 356 #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT diff --git a/arch/m68k/include/uapi/asm/unistd.h b/arch/m68k/include/uapi/asm/unistd.h index 2c1bec9a14b6..61fb6cb9d2ae 100644 --- a/arch/m68k/include/uapi/asm/unistd.h +++ b/arch/m68k/include/uapi/asm/unistd.h @@ -360,5 +360,6 @@ #define __NR_getrandom 352 #define __NR_memfd_create 353 #define __NR_bpf 354 +#define __NR_execveat 355 #endif /* _UAPI_ASM_M68K_UNISTD_H_ */ diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S index 2ca219e184cd..a0ec4303f2c8 100644 --- a/arch/m68k/kernel/syscalltable.S +++ b/arch/m68k/kernel/syscalltable.S @@ -375,4 +375,5 @@ ENTRY(sys_call_table) .long sys_getrandom .long sys_memfd_create .long sys_bpf + .long sys_execveat /* 355 */ -- cgit v1.2.3 From bfe5fda8e7ced129716f5741cf7ed2592a338824 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 6 Jan 2015 21:12:08 +1100 Subject: powernv: Fix OPAL tracepoint code Patch c49f63530bb6 ("powernv: Add OPAL tracepoints") has a spurious store to the stack: ld r12,opal_tracepoint_refcount@toc(r2); \ std r12,32(r1); \ The store was originally used to save the current tracepoint status so the entry and the exit tracepoints were always balanced. In the end I just created a separate path when tracepoints are enabled. The offset on the stack used for this store is not valid for ABIv2 and it causes strange issues. I noticed it because OPAL console input was broken. Fixes: c49f63530bb6 ("powernv: Add OPAL tracepoints") Cc: # v3.17+ Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-wrappers.S | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 54eca8b3b288..0509bca5e830 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -40,7 +40,6 @@ BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ ld r12,opal_tracepoint_refcount@toc(r2); \ - std r12,32(r1); \ cmpdi r12,0; \ bne- LABEL; \ 1: -- cgit v1.2.3 From a87e810f61b49f19bd29ea564b7cd1e92e43d989 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 8 Jan 2015 15:30:08 +1100 Subject: powerpc: Work around gcc bug in current_thread_info() In commit a3e5b356b3ab "powerpc: Don't use local named register variable in current_thread_info" Anton changed the way we did current_thread_info() to accommodate LLVM, and it was not meant to have any effect elsewhere. Unfortunately it has exposed a gcc bug, where r1 gets copied into another register and then gcc uses that register to restore the toc after a function call, even when that register is volatile and has been clobbered by the function call. We could revert Anton's patch, but it's not clear the original code is safe either, we may just have been lucky. The cleanest solution is to just use the existing CURRENT_THREAD_INFO() asm macro, and call it using inline asm. Segher points out we don't need volatile on the asm, if the result of the shift is unused it's fine for the compiler to elide it. Fixes: a3e5b356b3ab ("powerpc: Don't use local named register variable in current_thread_info") Reported-by: Alexander Graf Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/thread_info.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index ebc4f165690a..0be6c681cab1 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -23,9 +23,9 @@ #define THREAD_SIZE (1 << THREAD_SHIFT) #ifdef CONFIG_PPC64 -#define CURRENT_THREAD_INFO(dest, sp) clrrdi dest, sp, THREAD_SHIFT +#define CURRENT_THREAD_INFO(dest, sp) stringify_in_c(clrrdi dest, sp, THREAD_SHIFT) #else -#define CURRENT_THREAD_INFO(dest, sp) rlwinm dest, sp, 0, 0, 31-THREAD_SHIFT +#define CURRENT_THREAD_INFO(dest, sp) stringify_in_c(rlwinm dest, sp, 0, 0, 31-THREAD_SHIFT) #endif #ifndef __ASSEMBLY__ @@ -71,12 +71,13 @@ struct thread_info { #define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT) /* how to get the thread information struct from C */ -register unsigned long __current_r1 asm("r1"); static inline struct thread_info *current_thread_info(void) { - /* gcc4, at least, is smart enough to turn this into a single - * rlwinm for ppc32 and clrrdi for ppc64 */ - return (struct thread_info *)(__current_r1 & ~(THREAD_SIZE-1)); + unsigned long val; + + asm (CURRENT_THREAD_INFO(%0,1) : "=r" (val)); + + return (struct thread_info *)val; } #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 8d1a2427d8fc0cb26ee72dfad7ad1033420089a1 Mon Sep 17 00:00:00 2001 From: Jan Willeke Date: Thu, 8 Jan 2015 16:56:01 +0100 Subject: s390/uprobes: fix user space PER events If uprobes are single stepped for example with gdb, the behavior should now be correct. Before this patch, when gdb was single stepping a uprobe, the result was a SIGILL. When PER is active for any storage alteration and a uprobe is hit, a storage alteration event is indicated. These over indications are filterd out by gdb, if no change has happened within the observed area. Signed-off-by: Jan Willeke Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/uprobes.c | 69 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index f6b3cd056ec2..cc7328080b60 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -48,6 +48,30 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *tsk) return false; } +static int check_per_event(unsigned short cause, unsigned long control, + struct pt_regs *regs) +{ + if (!(regs->psw.mask & PSW_MASK_PER)) + return 0; + /* user space single step */ + if (control == 0) + return 1; + /* over indication for storage alteration */ + if ((control & 0x20200000) && (cause & 0x2000)) + return 1; + if (cause & 0x8000) { + /* all branches */ + if ((control & 0x80800000) == 0x80000000) + return 1; + /* branch into selected range */ + if (((control & 0x80800000) == 0x80800000) && + regs->psw.addr >= current->thread.per_user.start && + regs->psw.addr <= current->thread.per_user.end) + return 1; + } + return 0; +} + int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { int fixup = probe_get_fixup_type(auprobe->insn); @@ -71,9 +95,13 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) if (regs->psw.addr - utask->xol_vaddr == ilen) regs->psw.addr = utask->vaddr + ilen; } - /* If per tracing was active generate trap */ - if (regs->psw.mask & PSW_MASK_PER) - do_per_trap(regs); + if (check_per_event(current->thread.per_event.cause, + current->thread.per_user.control, regs)) { + /* fix per address */ + current->thread.per_event.address = utask->vaddr; + /* trigger per event */ + set_pt_regs_flag(regs, PIF_PER_TRAP); + } return 0; } @@ -106,6 +134,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) clear_thread_flag(TIF_UPROBE_SINGLESTEP); regs->int_code = auprobe->saved_int_code; regs->psw.addr = current->utask->vaddr; + current->thread.per_event.address = current->utask->vaddr; } unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline, @@ -146,17 +175,20 @@ static void adjust_psw_addr(psw_t *psw, unsigned long len) __rc; \ }) -#define emu_store_ril(ptr, input) \ +#define emu_store_ril(regs, ptr, input) \ ({ \ unsigned int mask = sizeof(*(ptr)) - 1; \ + __typeof__(ptr) __ptr = (ptr); \ int __rc = 0; \ \ if (!test_facility(34)) \ __rc = EMU_ILLEGAL_OP; \ - else if ((u64 __force)ptr & mask) \ + else if ((u64 __force)__ptr & mask) \ __rc = EMU_SPECIFICATION; \ - else if (put_user(*(input), ptr)) \ + else if (put_user(*(input), __ptr)) \ __rc = EMU_ADDRESSING; \ + if (__rc == 0) \ + sim_stor_event(regs, __ptr, mask + 1); \ __rc; \ }) @@ -197,6 +229,25 @@ union split_register { s16 s16[4]; }; +/* + * If user per registers are setup to trace storage alterations and an + * emulated store took place on a fitting address a user trap is generated. + */ +static void sim_stor_event(struct pt_regs *regs, void *addr, int len) +{ + if (!(regs->psw.mask & PSW_MASK_PER)) + return; + if (!(current->thread.per_user.control & PER_EVENT_STORE)) + return; + if ((void *)current->thread.per_user.start > (addr + len)) + return; + if ((void *)current->thread.per_user.end < addr) + return; + current->thread.per_event.address = regs->psw.addr; + current->thread.per_event.cause = PER_EVENT_STORE >> 16; + set_pt_regs_flag(regs, PIF_PER_TRAP); +} + /* * pc relative instructions are emulated, since parameters may not be * accessible from the xol area due to range limitations. @@ -249,13 +300,13 @@ static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs) rc = emu_load_ril((u32 __user *)uptr, &rx->u64); break; case 0x07: /* sthrl */ - rc = emu_store_ril((u16 __user *)uptr, &rx->u16[3]); + rc = emu_store_ril(regs, (u16 __user *)uptr, &rx->u16[3]); break; case 0x0b: /* stgrl */ - rc = emu_store_ril((u64 __user *)uptr, &rx->u64); + rc = emu_store_ril(regs, (u64 __user *)uptr, &rx->u64); break; case 0x0f: /* strl */ - rc = emu_store_ril((u32 __user *)uptr, &rx->u32[1]); + rc = emu_store_ril(regs, (u32 __user *)uptr, &rx->u32[1]); break; } break; -- cgit v1.2.3 From 82c92ed1357bca22a5d637fbb93dab3eb18a8e8c Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 12 Jan 2015 06:05:07 +0100 Subject: xen: correct error for building p2m list on 32 bits In xen_rebuild_p2m_list() for large areas of invalid or identity mapped memory the pmd entries on 32 bit systems are initialized wrong. Correct this error. Suggested-by: Boris Ostrovsky Signed-off-by: Juergen Gross Signed-off-by: David Vrabel --- arch/x86/xen/p2m.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index cab221da5f10..ff4ebd820603 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -378,7 +378,7 @@ static void __init xen_rebuild_p2m_list(unsigned long *p2m) p2m_missing_pte : p2m_identity_pte; for (i = 0; i < PMDS_PER_MID_PAGE; i++) { pmdp = populate_extra_pmd( - (unsigned long)(p2m + pfn + i * PTRS_PER_PTE)); + (unsigned long)(p2m + pfn) + i * PMD_SIZE); set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE)); } } -- cgit v1.2.3 From f241b0b891c903da2465b7a98eaf650784e666da Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 12 Jan 2015 06:05:08 +0100 Subject: xen: correct race in alloc_p2m_pmd() When allocating a new pmd for the linear mapped p2m list a check is done for not introducing another pmd when this just happened on another cpu. In this case the old pte pointer was returned which points to the p2m_missing or p2m_identity page. The correct value would be the pointer to the found new page. Signed-off-by: Juergen Gross Signed-off-by: David Vrabel --- arch/x86/xen/p2m.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index ff4ebd820603..70fb5075c901 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -439,10 +439,9 @@ EXPORT_SYMBOL_GPL(get_phys_to_machine); * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual * pmd. In case of PAE/x86-32 there are multiple pmds to allocate! */ -static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg) +static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg) { pte_t *ptechk; - pte_t *pteret = ptep; pte_t *pte_newpg[PMDS_PER_MID_PAGE]; pmd_t *pmdp; unsigned int level; @@ -476,8 +475,6 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg) if (ptechk == pte_pg) { set_pmd(pmdp, __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE)); - if (vaddr == (addr & ~(PMD_SIZE - 1))) - pteret = pte_offset_kernel(pmdp, addr); pte_newpg[i] = NULL; } @@ -491,7 +488,7 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg) vaddr += PMD_SIZE; } - return pteret; + return lookup_address(addr, &level); } /* @@ -520,7 +517,7 @@ static bool alloc_p2m(unsigned long pfn) if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) { /* PMD level is missing, allocate a new one */ - ptep = alloc_p2m_pmd(addr, ptep, pte_pg); + ptep = alloc_p2m_pmd(addr, pte_pg); if (!ptep) return false; } -- cgit v1.2.3 From e86f949667127509d95b6c678fdd928b93128d9d Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 12 Jan 2015 06:05:09 +0100 Subject: xen: use correct type for physical addresses When converting a pfn to a physical address be sure to use 64 bit wide types or convert the physical address to a pfn if possible. Signed-off-by: Juergen Gross Tested-by: Boris Ostrovsky Signed-off-by: David Vrabel --- arch/x86/xen/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index feb6d86fa0a0..410210f279bf 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -140,7 +140,7 @@ static void __init xen_del_extra_mem(u64 start, u64 size) unsigned long __ref xen_chk_extra_mem(unsigned long pfn) { int i; - unsigned long addr = PFN_PHYS(pfn); + phys_addr_t addr = PFN_PHYS(pfn); for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { if (addr >= xen_extra_mem[i].start && @@ -284,7 +284,7 @@ static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn) } /* Update kernel mapping, but not for highmem. */ - if ((pfn << PAGE_SHIFT) >= __pa(high_memory)) + if (pfn >= PFN_UP(__pa(high_memory - 1))) return; if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT), -- cgit v1.2.3 From 9a17ad7f3db17db0c6375de96672f16ab1aa51ae Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 12 Jan 2015 06:05:10 +0100 Subject: xen: check for zero sized area when invalidating memory With the introduction of the linear mapped p2m list setting memory areas to "invalid" had to be delayed. When doing the invalidation make sure no zero sized areas are processed. Signed-off-by: Juegren Gross Signed-off-by: David Vrabel --- arch/x86/xen/setup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 410210f279bf..865e56cea7a0 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -160,6 +160,8 @@ void __init xen_inv_extra_mem(void) int i; for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { + if (!xen_extra_mem[i].size) + continue; pfn_s = PFN_DOWN(xen_extra_mem[i].start); pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size); for (pfn = pfn_s; pfn < pfn_e; pfn++) -- cgit v1.2.3 From 55e858b75808347378e5117c3c2339f46cc03575 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 11 Jan 2015 14:10:10 +0100 Subject: arm64: KVM: Fix TLB invalidation by IPA/VMID It took about two years for someone to notice that the IPA passed to TLBI IPAS2E1IS must be shifted by 12 bits. Clearly our reviewing is not as good as it should be... Paper bag time for me. Reported-by: Mario Smarduch Tested-by: Mario Smarduch Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/hyp.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index fbe909fb0a1a..c3ca89c27c6b 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -1014,6 +1014,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) * Instead, we invalidate Stage-2 for this IPA, and the * whole of Stage-1. Weep... */ + lsr x1, x1, #12 tlbi ipas2e1is, x1 /* * We have to ensure completion of the invalidation at Stage-2, -- cgit v1.2.3 From 801f6772cecea6cfc7da61aa197716ab64db5f9e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 11 Jan 2015 14:10:11 +0100 Subject: arm64: KVM: Fix HCR setting for 32bit guests Commit b856a59141b1 (arm/arm64: KVM: Reset the HCR on each vcpu when resetting the vcpu) moved the init of the HCR register to happen later in the init of a vcpu, but left out the fixup done in kvm_reset_vcpu when preparing for a 32bit guest. As a result, the 32bit guest is run as a 64bit guest, but the rest of the kernel still manages it as a 32bit. Fun follows. Moving the fixup to vcpu_reset_hcr solves the problem for good. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Paolo Bonzini --- arch/arm64/include/asm/kvm_emulate.h | 2 ++ arch/arm64/kvm/reset.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 8127e45e2637..865a7e28ea2d 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -41,6 +41,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; + if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) + vcpu->arch.hcr_el2 &= ~HCR_RW; } static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 70a7816535cd..0b4326578985 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -90,7 +90,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) if (!cpu_has_32bit_el1()) return -EINVAL; cpu_reset = &default_regs_reset32; - vcpu->arch.hcr_el2 &= ~HCR_RW; } else { cpu_reset = &default_regs_reset; } -- cgit v1.2.3 From f221b04fe07eb56c39935e31bb8e9ddacc00612f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 13 Jan 2015 07:40:05 +0000 Subject: x86/xen: properly retrieve NMI reason Using the native code here can't work properly, as the hypervisor would normally have cleared the two reason bits by the time Dom0 gets to see the NMI (if passed to it at all). There's a shared info field for this, and there's an existing hook to use - just fit the two together. This is particularly relevant so that NMIs intended to be handled by APEI / GHES actually make it to the respective handler. Note that the hook can (and should) be used irrespective of whether being in Dom0, as accessing port 0x61 in a DomU would be even worse, while the shared info field would just hold zero all the time. Note further that hardware NMI handling for PVH doesn't currently work anyway due to missing code in the hypervisor (but it is expected to work the native rather than the PV way). Signed-off-by: Jan Beulich Reviewed-by: Boris Ostrovsky Signed-off-by: David Vrabel --- arch/x86/xen/enlighten.c | 22 ++++++++++++++++++- include/xen/interface/nmi.h | 51 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 include/xen/interface/nmi.h (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index fac5e4f9607c..115016347806 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -66,6 +67,7 @@ #include #include #include +#include #include #include #include @@ -1357,6 +1359,21 @@ static const struct machine_ops xen_machine_ops __initconst = { .emergency_restart = xen_emergency_restart, }; +static unsigned char xen_get_nmi_reason(void) +{ + unsigned char reason = 0; + + /* Construct a value which looks like it came from port 0x61. */ + if (test_bit(_XEN_NMIREASON_io_error, + &HYPERVISOR_shared_info->arch.nmi_reason)) + reason |= NMI_REASON_IOCHK; + if (test_bit(_XEN_NMIREASON_pci_serr, + &HYPERVISOR_shared_info->arch.nmi_reason)) + reason |= NMI_REASON_SERR; + + return reason; +} + static void __init xen_boot_params_init_edd(void) { #if IS_ENABLED(CONFIG_EDD) @@ -1541,9 +1558,12 @@ asmlinkage __visible void __init xen_start_kernel(void) pv_info = xen_info; pv_init_ops = xen_init_ops; pv_apic_ops = xen_apic_ops; - if (!xen_pvh_domain()) + if (!xen_pvh_domain()) { pv_cpu_ops = xen_cpu_ops; + x86_platform.get_nmi_reason = xen_get_nmi_reason; + } + if (xen_feature(XENFEAT_auto_translated_physmap)) x86_init.resources.memory_setup = xen_auto_xlated_memory_setup; else diff --git a/include/xen/interface/nmi.h b/include/xen/interface/nmi.h new file mode 100644 index 000000000000..b47d9d06fade --- /dev/null +++ b/include/xen/interface/nmi.h @@ -0,0 +1,51 @@ +/****************************************************************************** + * nmi.h + * + * NMI callback registration and reason codes. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_NMI_H__ +#define __XEN_PUBLIC_NMI_H__ + +#include + +/* + * NMI reason codes: + * Currently these are x86-specific, stored in arch_shared_info.nmi_reason. + */ + /* I/O-check error reported via ISA port 0x61, bit 6. */ +#define _XEN_NMIREASON_io_error 0 +#define XEN_NMIREASON_io_error (1UL << _XEN_NMIREASON_io_error) + /* PCI SERR reported via ISA port 0x61, bit 7. */ +#define _XEN_NMIREASON_pci_serr 1 +#define XEN_NMIREASON_pci_serr (1UL << _XEN_NMIREASON_pci_serr) + /* Unknown hardware-generated NMI. */ +#define _XEN_NMIREASON_unknown 2 +#define XEN_NMIREASON_unknown (1UL << _XEN_NMIREASON_unknown) + +/* + * long nmi_op(unsigned int cmd, void *arg) + * NB. All ops return zero on success, else a negative error code. + */ + +/* + * Register NMI callback for this (calling) VCPU. Currently this only makes + * sense for domain 0, vcpu 0. All other callers will be returned EINVAL. + * arg == pointer to xennmi_callback structure. + */ +#define XENNMI_register_callback 0 +struct xennmi_callback { + unsigned long handler_address; + unsigned long pad; +}; +DEFINE_GUEST_HANDLE_STRUCT(xennmi_callback); + +/* + * Deregister NMI callback for this (calling) VCPU. + * arg == NULL. + */ +#define XENNMI_unregister_callback 1 + +#endif /* __XEN_PUBLIC_NMI_H__ */ -- cgit v1.2.3 From 3d125f9c91c599a77ac3cb8f05113a6c8df99cbe Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 14 Jan 2015 00:20:21 +0100 Subject: net: fec: fix MDIO bus assignement for dual fec SoC's On i.MX28, the MDIO bus is shared between the two FEC instances. The driver makes sure that the second FEC uses the MDIO bus of the first FEC. This is done conditionally if FEC_QUIRK_ENET_MAC is set. However, in newer designs, such as Vybrid or i.MX6SX, each FEC MAC has its own MDIO bus. Simply removing the quirk FEC_QUIRK_ENET_MAC is not an option since other logic, triggered by this quirk, is still needed. Furthermore, there are board designs which use the same MDIO bus for both PHY's even though the second bus would be available on the SoC side. Such layout are popular since it saves pins on SoC side. Due to the above quirk, those boards currently do work fine. The boards in the mainline tree with such a layout are: - Freescale Vybrid Tower with TWR-SER2 (vf610-twr.dts) - Freescale i.MX6 SoloX SDB Board (imx6sx-sdb.dts) This patch adds a new quirk FEC_QUIRK_SINGLE_MDIO for i.MX28, which makes sure that the MDIO bus of the first FEC is used in any case. However, the boards above do have a SoC with a MDIO bus for each FEC instance. But the PHY's are not connected in a 1:1 configuration. A proper device tree description is needed to allow the driver to figure out where to find its PHY. This patch fixes that shortcoming by adding a MDIO bus child node to the first FEC instance, along with the two PHY's on that bus, and making use of the phy-handle property to add a reference to the PHY's. Acked-by: Sascha Hauer Signed-off-by: Stefan Agner Signed-off-by: David S. Miller --- arch/arm/boot/dts/imx6sx-sdb.dts | 15 +++++++++++++++ arch/arm/boot/dts/vf610-twr.dts | 15 +++++++++++++++ drivers/net/ethernet/freescale/fec.h | 2 ++ drivers/net/ethernet/freescale/fec_main.c | 9 +++++---- 4 files changed, 37 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx6sx-sdb.dts b/arch/arm/boot/dts/imx6sx-sdb.dts index 1e6e5cc1c14c..8c1febd7e3f2 100644 --- a/arch/arm/boot/dts/imx6sx-sdb.dts +++ b/arch/arm/boot/dts/imx6sx-sdb.dts @@ -159,13 +159,28 @@ pinctrl-0 = <&pinctrl_enet1>; phy-supply = <®_enet_3v3>; phy-mode = "rgmii"; + phy-handle = <ðphy1>; status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy1: ethernet-phy@0 { + reg = <0>; + }; + + ethphy2: ethernet-phy@1 { + reg = <1>; + }; + }; }; &fec2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet2>; phy-mode = "rgmii"; + phy-handle = <ðphy2>; status = "okay"; }; diff --git a/arch/arm/boot/dts/vf610-twr.dts b/arch/arm/boot/dts/vf610-twr.dts index a0f762159cb2..f2b64b1b00fa 100644 --- a/arch/arm/boot/dts/vf610-twr.dts +++ b/arch/arm/boot/dts/vf610-twr.dts @@ -129,13 +129,28 @@ &fec0 { phy-mode = "rmii"; + phy-handle = <ðphy0>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_fec0>; status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy0: ethernet-phy@0 { + reg = <0>; + }; + + ethphy1: ethernet-phy@1 { + reg = <1>; + }; + }; }; &fec1 { phy-mode = "rmii"; + phy-handle = <ðphy1>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_fec1>; status = "okay"; diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 469691ad4a1e..40132929daf7 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -424,6 +424,8 @@ struct bufdesc_ex { * (40ns * 6). */ #define FEC_QUIRK_BUG_CAPTURE (1 << 10) +/* Controller has only one MDIO bus */ +#define FEC_QUIRK_SINGLE_MDIO (1 << 11) struct fec_enet_priv_tx_q { int index; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index dbcab1cecf68..bba87775419d 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -91,7 +91,8 @@ static struct platform_device_id fec_devtype[] = { .driver_data = 0, }, { .name = "imx28-fec", - .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME, + .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME | + FEC_QUIRK_SINGLE_MDIO, }, { .name = "imx6q-fec", .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | @@ -1937,7 +1938,7 @@ static int fec_enet_mii_init(struct platform_device *pdev) int err = -ENXIO, i; /* - * The dual fec interfaces are not equivalent with enet-mac. + * The i.MX28 dual fec interfaces are not equal. * Here are the differences: * * - fec0 supports MII & RMII modes while fec1 only supports RMII @@ -1952,7 +1953,7 @@ static int fec_enet_mii_init(struct platform_device *pdev) * mdio interface in board design, and need to be configured by * fec0 mii_bus. */ - if ((fep->quirks & FEC_QUIRK_ENET_MAC) && fep->dev_id > 0) { + if ((fep->quirks & FEC_QUIRK_SINGLE_MDIO) && fep->dev_id > 0) { /* fec1 uses fec0 mii_bus */ if (mii_cnt && fec0_mii_bus) { fep->mii_bus = fec0_mii_bus; @@ -2015,7 +2016,7 @@ static int fec_enet_mii_init(struct platform_device *pdev) mii_cnt++; /* save fec0 mii_bus */ - if (fep->quirks & FEC_QUIRK_ENET_MAC) + if (fep->quirks & FEC_QUIRK_SINGLE_MDIO) fec0_mii_bus = fep->mii_bus; return 0; -- cgit v1.2.3