From d0c31e02005764dae0aab130a57e9794d06b824d Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 24 Mar 2016 13:02:22 -0700 Subject: sparc/PCI: Fix for panic while enabling SR-IOV We noticed this panic while enabling SR-IOV in sparc. mlx4_core: Mellanox ConnectX core driver v2.2-1 (Jan 1 2015) mlx4_core: Initializing 0007:01:00.0 mlx4_core 0007:01:00.0: Enabling SR-IOV with 5 VFs mlx4_core: Initializing 0007:01:00.1 Unable to handle kernel NULL pointer dereference insmod(10010): Oops [#1] CPU: 391 PID: 10010 Comm: insmod Not tainted 4.1.12-32.el6uek.kdump2.sparc64 #1 TPC: I7: <__mlx4_init_one+0x324/0x500 [mlx4_core]> Call Trace: [00000000104c5ea4] __mlx4_init_one+0x324/0x500 [mlx4_core] [00000000104c613c] mlx4_init_one+0xbc/0x120 [mlx4_core] [0000000000725f14] local_pci_probe+0x34/0xa0 [0000000000726028] pci_call_probe+0xa8/0xe0 [0000000000726310] pci_device_probe+0x50/0x80 [000000000079f700] really_probe+0x140/0x420 [000000000079fa24] driver_probe_device+0x44/0xa0 [000000000079fb5c] __device_attach+0x3c/0x60 [000000000079d85c] bus_for_each_drv+0x5c/0xa0 [000000000079f588] device_attach+0x88/0xc0 [000000000071acd0] pci_bus_add_device+0x30/0x80 [0000000000736090] virtfn_add.clone.1+0x210/0x360 [00000000007364a4] sriov_enable+0x2c4/0x520 [000000000073672c] pci_enable_sriov+0x2c/0x40 [00000000104c2d58] mlx4_enable_sriov+0xf8/0x180 [mlx4_core] [00000000104c49ac] mlx4_load_one+0x42c/0xd40 [mlx4_core] Disabling lock debugging due to kernel taint Caller[00000000104c5ea4]: __mlx4_init_one+0x324/0x500 [mlx4_core] Caller[00000000104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core] Caller[0000000000725f14]: local_pci_probe+0x34/0xa0 Caller[0000000000726028]: pci_call_probe+0xa8/0xe0 Caller[0000000000726310]: pci_device_probe+0x50/0x80 Caller[000000000079f700]: really_probe+0x140/0x420 Caller[000000000079fa24]: driver_probe_device+0x44/0xa0 Caller[000000000079fb5c]: __device_attach+0x3c/0x60 Caller[000000000079d85c]: bus_for_each_drv+0x5c/0xa0 Caller[000000000079f588]: device_attach+0x88/0xc0 Caller[000000000071acd0]: pci_bus_add_device+0x30/0x80 Caller[0000000000736090]: virtfn_add.clone.1+0x210/0x360 Caller[00000000007364a4]: sriov_enable+0x2c4/0x520 Caller[000000000073672c]: pci_enable_sriov+0x2c/0x40 Caller[00000000104c2d58]: mlx4_enable_sriov+0xf8/0x180 [mlx4_core] Caller[00000000104c49ac]: mlx4_load_one+0x42c/0xd40 [mlx4_core] Caller[00000000104c5f90]: __mlx4_init_one+0x410/0x500 [mlx4_core] Caller[00000000104c613c]: mlx4_init_one+0xbc/0x120 [mlx4_core] Caller[0000000000725f14]: local_pci_probe+0x34/0xa0 Caller[0000000000726028]: pci_call_probe+0xa8/0xe0 Caller[0000000000726310]: pci_device_probe+0x50/0x80 Caller[000000000079f700]: really_probe+0x140/0x420 Caller[000000000079fa24]: driver_probe_device+0x44/0xa0 Caller[000000000079fb08]: __driver_attach+0x88/0xa0 Caller[000000000079d90c]: bus_for_each_dev+0x6c/0xa0 Caller[000000000079f29c]: driver_attach+0x1c/0x40 Caller[000000000079e35c]: bus_add_driver+0x17c/0x220 Caller[00000000007a02d4]: driver_register+0x74/0x120 Caller[00000000007263fc]: __pci_register_driver+0x3c/0x60 Caller[00000000104f62bc]: mlx4_init+0x60/0xcc [mlx4_core] Kernel panic - not syncing: Fatal exception Press Stop-A (L1-A) to return to the boot prom ---[ end Kernel panic - not syncing: Fatal exception Details: Here is the call sequence virtfn_add->__mlx4_init_one->dma_set_mask->dma_supported The panic happened at line 760(file arch/sparc/kernel/iommu.c) 758 int dma_supported(struct device *dev, u64 device_mask) 759 { 760 struct iommu *iommu = dev->archdata.iommu; 761 u64 dma_addr_mask = iommu->dma_addr_mask; 762 763 if (device_mask >= (1UL << 32UL)) 764 return 0; 765 766 if ((device_mask & dma_addr_mask) == dma_addr_mask) 767 return 1; 768 769 #ifdef CONFIG_PCI 770 if (dev_is_pci(dev)) 771 return pci64_dma_supported(to_pci_dev(dev), device_mask); 772 #endif 773 774 return 0; 775 } 776 EXPORT_SYMBOL(dma_supported); Same panic happened with Intel ixgbe driver also. SR-IOV code looks for arch specific data while enabling VFs. When VF device is added, driver probe function makes set of calls to initialize the pci device. Because the VF device is added different way than the normal PF device(which happens via of_create_pci_dev for sparc), some of the arch specific initialization does not happen for VF device. That causes panic when archdata is accessed. To fix this, I have used already defined weak function pcibios_setup_device to copy archdata from PF to VF. Also verified the fix. Signed-off-by: Babu Moger Signed-off-by: Sowmini Varadhan Reviewed-by: Ethan Zhao Signed-off-by: David S. Miller --- arch/sparc/kernel/pci.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch') diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index badf0951d73c..9f9614df9e1e 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -994,6 +994,23 @@ void pcibios_set_master(struct pci_dev *dev) /* No special bus mastering setup handling */ } +#ifdef CONFIG_PCI_IOV +int pcibios_add_device(struct pci_dev *dev) +{ + struct pci_dev *pdev; + + /* Add sriov arch specific initialization here. + * Copy dev_archdata from PF to VF + */ + if (dev->is_virtfn) { + pdev = dev->physfn; + memcpy(&dev->dev.archdata, &pdev->dev.archdata, + sizeof(struct dev_archdata)); + } + return 0; +} +#endif /* CONFIG_PCI_IOV */ + static int __init pcibios_init(void) { pci_dfl_cache_line_size = 64 >> 2; -- cgit v1.2.3 From 5ec712934ce1ff6e33bf3878034a91ca9bd600c9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 29 Mar 2016 18:39:26 -0700 Subject: sparc: Write up preadv2/pwritev2 syscalls. Signed-off-by: David S. Miller --- arch/sparc/include/uapi/asm/unistd.h | 4 +++- arch/sparc/kernel/systbls_32.S | 2 +- arch/sparc/kernel/systbls_64.S | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h index b6de8b10a55b..36eee8132c22 100644 --- a/arch/sparc/include/uapi/asm/unistd.h +++ b/arch/sparc/include/uapi/asm/unistd.h @@ -423,8 +423,10 @@ #define __NR_setsockopt 355 #define __NR_mlock2 356 #define __NR_copy_file_range 357 +#define __NR_preadv2 358 +#define __NR_pwritev2 359 -#define NR_syscalls 358 +#define NR_syscalls 360 /* Bitmask values returned from kern_features system call. */ #define KERN_FEATURE_MIXED_MODE_STACK 0x00000001 diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 6c3dd6c52f8b..eac7f0db5c8c 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -88,4 +88,4 @@ sys_call_table: /*340*/ .long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr /*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen -/*355*/ .long sys_setsockopt, sys_mlock2, sys_copy_file_range +/*355*/ .long sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2 diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 12b524cfcfa0..b0f17ff2ddba 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -89,7 +89,7 @@ sys_call_table32: /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr .word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen - .word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range + .word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range, compat_sys_preadv2, compat_sys_pwritev2 #endif /* CONFIG_COMPAT */ @@ -170,4 +170,4 @@ sys_call_table: /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen - .word sys_setsockopt, sys_mlock2, sys_copy_file_range + .word sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2 -- cgit v1.2.3 From dba599091c191d209b1499511a524ad9657c0e5a Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 15 Apr 2016 09:41:35 +0200 Subject: s390/pci: fix use after free in dma_init After a failure during registration of the dma_table (because of the function being in error state) we free its memory but don't reset the associated pointer to zero. When we then receive a notification from firmware (about the function being in error state) we'll try to walk and free the dma_table again. Fix this by resetting the dma_table pointer. In addition to that make sure that we free the iommu_bitmap when appropriate. Signed-off-by: Sebastian Ott Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_dma.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index e595e89eac65..1ea8c07eab84 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -457,7 +457,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) zdev->dma_table = dma_alloc_cpu_table(); if (!zdev->dma_table) { rc = -ENOMEM; - goto out_clean; + goto out; } /* @@ -477,18 +477,22 @@ int zpci_dma_init_device(struct zpci_dev *zdev) zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); if (!zdev->iommu_bitmap) { rc = -ENOMEM; - goto out_reg; + goto free_dma_table; } rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, (u64) zdev->dma_table); if (rc) - goto out_reg; - return 0; + goto free_bitmap; -out_reg: + return 0; +free_bitmap: + vfree(zdev->iommu_bitmap); + zdev->iommu_bitmap = NULL; +free_dma_table: dma_free_cpu_table(zdev->dma_table); -out_clean: + zdev->dma_table = NULL; +out: return rc; } -- cgit v1.2.3 From 723cacbd9dc79582e562c123a0bacf8bfc69e72a Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Fri, 15 Apr 2016 16:38:40 +0200 Subject: s390/mm: fix asce_bits handling with dynamic pagetable levels There is a race with multi-threaded applications between context switch and pagetable upgrade. In switch_mm() a new user_asce is built from mm->pgd and mm->context.asce_bits, w/o holding any locks. A concurrent mmap with a pagetable upgrade on another thread in crst_table_upgrade() could already have set new asce_bits, but not yet the new mm->pgd. This would result in a corrupt user_asce in switch_mm(), and eventually in a kernel panic from a translation exception. Fix this by storing the complete asce instead of just the asce_bits, which can then be read atomically from switch_mm(), so that it either sees the old value or the new value, but no mixture. Both cases are OK. Having the old value would result in a page fault on access to the higher level memory, but the fault handler would see the new mm->pgd, if it was a valid access after the mmap on the other thread has completed. So as worst-case scenario we would have a page fault loop for the racing thread until the next time slice. Also remove dead code and simplify the upgrade/downgrade path, there are no upgrades from 2 levels, and only downgrades from 3 levels for compat tasks. There are also no concurrent upgrades, because the mmap_sem is held with down_write() in do_mmap, so the flush and table checks during upgrade can be removed. Reported-by: Michael Munday Reviewed-by: Martin Schwidefsky Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu.h | 2 +- arch/s390/include/asm/mmu_context.h | 28 +++++++++--- arch/s390/include/asm/pgalloc.h | 4 +- arch/s390/include/asm/processor.h | 2 +- arch/s390/include/asm/tlbflush.h | 9 ++-- arch/s390/mm/init.c | 3 +- arch/s390/mm/mmap.c | 6 +-- arch/s390/mm/pgalloc.c | 85 ++++++++++++------------------------- 8 files changed, 62 insertions(+), 77 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index d29ad9545b41..081b2ad99d73 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -11,7 +11,7 @@ typedef struct { spinlock_t list_lock; struct list_head pgtable_list; struct list_head gmap_list; - unsigned long asce_bits; + unsigned long asce; unsigned long asce_limit; unsigned long vdso_base; /* The mmu context allocates 4K page tables. */ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index d321469eeda7..c837b79b455d 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -26,12 +26,28 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.has_pgste = 0; mm->context.use_skey = 0; #endif - if (mm->context.asce_limit == 0) { + switch (mm->context.asce_limit) { + case 1UL << 42: + /* + * forked 3-level task, fall through to set new asce with new + * mm->pgd + */ + case 0: /* context created by exec, set asce limit to 4TB */ - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | _ASCE_TYPE_REGION3; mm->context.asce_limit = STACK_TOP_MAX; - } else if (mm->context.asce_limit == (1UL << 31)) { + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION3; + break; + case 1UL << 53: + /* forked 4-level task, set new asce with new mm->pgd */ + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION2; + break; + case 1UL << 31: + /* forked 2-level compat task, set new asce with new mm->pgd */ + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; + /* pgd_alloc() did not increase mm->nr_pmds */ mm_inc_nr_pmds(mm); } crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); @@ -42,7 +58,7 @@ static inline int init_new_context(struct task_struct *tsk, static inline void set_user_asce(struct mm_struct *mm) { - S390_lowcore.user_asce = mm->context.asce_bits | __pa(mm->pgd); + S390_lowcore.user_asce = mm->context.asce; if (current->thread.mm_segment.ar4) __ctl_load(S390_lowcore.user_asce, 7, 7); set_cpu_flag(CIF_ASCE); @@ -71,7 +87,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, { int cpu = smp_processor_id(); - S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd); + S390_lowcore.user_asce = next->context.asce; if (prev == next) return; if (MACHINE_HAS_TLB_LC) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 9b3d9b6099f2..da34cb6b1f3b 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -52,8 +52,8 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) return _REGION2_ENTRY_EMPTY; } -int crst_table_upgrade(struct mm_struct *, unsigned long limit); -void crst_table_downgrade(struct mm_struct *, unsigned long limit); +int crst_table_upgrade(struct mm_struct *); +void crst_table_downgrade(struct mm_struct *); static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index d6fd22ea270d..18cdede1aeda 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -175,7 +175,7 @@ extern __vector128 init_task_fpu_regs[__NUM_VXRS]; regs->psw.mask = PSW_USER_BITS | PSW_MASK_BA; \ regs->psw.addr = new_psw; \ regs->gprs[15] = new_stackp; \ - crst_table_downgrade(current->mm, 1UL << 31); \ + crst_table_downgrade(current->mm); \ execve_tail(); \ } while (0) diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index ca148f7c3eaa..a2e6ef32e054 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -110,8 +110,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) static inline void __tlb_flush_kernel(void) { if (MACHINE_HAS_IDTE) - __tlb_flush_idte((unsigned long) init_mm.pgd | - init_mm.context.asce_bits); + __tlb_flush_idte(init_mm.context.asce); else __tlb_flush_global(); } @@ -133,8 +132,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce) static inline void __tlb_flush_kernel(void) { if (MACHINE_HAS_TLB_LC) - __tlb_flush_idte_local((unsigned long) init_mm.pgd | - init_mm.context.asce_bits); + __tlb_flush_idte_local(init_mm.context.asce); else __tlb_flush_local(); } @@ -148,8 +146,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm) * only ran on the local cpu. */ if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list)) - __tlb_flush_asce(mm, (unsigned long) mm->pgd | - mm->context.asce_bits); + __tlb_flush_asce(mm, mm->context.asce); else __tlb_flush_full(mm); } diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index c7b0451397d6..2489b2e917c8 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -89,7 +89,8 @@ void __init paging_init(void) asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; pgd_type = _REGION3_ENTRY_EMPTY; } - S390_lowcore.kernel_asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; + init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; + S390_lowcore.kernel_asce = init_mm.context.asce; clear_table((unsigned long *) init_mm.pgd, pgd_type, sizeof(unsigned long)*2048); vmem_map_init(); diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 45c4daa49930..89cf09e5f168 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -174,7 +174,7 @@ int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) if (!(flags & MAP_FIXED)) addr = 0; if ((addr + len) >= TASK_SIZE) - return crst_table_upgrade(current->mm, TASK_MAX_SIZE); + return crst_table_upgrade(current->mm); return 0; } @@ -191,7 +191,7 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr, return area; if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) { /* Upgrade the page table to 4 levels and retry. */ - rc = crst_table_upgrade(mm, TASK_MAX_SIZE); + rc = crst_table_upgrade(mm); if (rc) return (unsigned long) rc; area = arch_get_unmapped_area(filp, addr, len, pgoff, flags); @@ -213,7 +213,7 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr, return area; if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < TASK_MAX_SIZE) { /* Upgrade the page table to 4 levels and retry. */ - rc = crst_table_upgrade(mm, TASK_MAX_SIZE); + rc = crst_table_upgrade(mm); if (rc) return (unsigned long) rc; area = arch_get_unmapped_area_topdown(filp, addr, len, diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index f6c3de26cda8..e8b5962ac12a 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -76,81 +76,52 @@ static void __crst_table_upgrade(void *arg) __tlb_flush_local(); } -int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) +int crst_table_upgrade(struct mm_struct *mm) { unsigned long *table, *pgd; - unsigned long entry; - int flush; - BUG_ON(limit > TASK_MAX_SIZE); - flush = 0; -repeat: + /* upgrade should only happen from 3 to 4 levels */ + BUG_ON(mm->context.asce_limit != (1UL << 42)); + table = crst_table_alloc(mm); if (!table) return -ENOMEM; + spin_lock_bh(&mm->page_table_lock); - if (mm->context.asce_limit < limit) { - pgd = (unsigned long *) mm->pgd; - if (mm->context.asce_limit <= (1UL << 31)) { - entry = _REGION3_ENTRY_EMPTY; - mm->context.asce_limit = 1UL << 42; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION3; - } else { - entry = _REGION2_ENTRY_EMPTY; - mm->context.asce_limit = 1UL << 53; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION2; - } - crst_table_init(table, entry); - pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); - mm->pgd = (pgd_t *) table; - mm->task_size = mm->context.asce_limit; - table = NULL; - flush = 1; - } + pgd = (unsigned long *) mm->pgd; + crst_table_init(table, _REGION2_ENTRY_EMPTY); + pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); + mm->pgd = (pgd_t *) table; + mm->context.asce_limit = 1UL << 53; + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION2; + mm->task_size = mm->context.asce_limit; spin_unlock_bh(&mm->page_table_lock); - if (table) - crst_table_free(mm, table); - if (mm->context.asce_limit < limit) - goto repeat; - if (flush) - on_each_cpu(__crst_table_upgrade, mm, 0); + + on_each_cpu(__crst_table_upgrade, mm, 0); return 0; } -void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) +void crst_table_downgrade(struct mm_struct *mm) { pgd_t *pgd; + /* downgrade should only happen from 3 to 2 levels (compat only) */ + BUG_ON(mm->context.asce_limit != (1UL << 42)); + if (current->active_mm == mm) { clear_user_asce(); __tlb_flush_mm(mm); } - while (mm->context.asce_limit > limit) { - pgd = mm->pgd; - switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { - case _REGION_ENTRY_TYPE_R2: - mm->context.asce_limit = 1UL << 42; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION3; - break; - case _REGION_ENTRY_TYPE_R3: - mm->context.asce_limit = 1UL << 31; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_SEGMENT; - break; - default: - BUG(); - } - mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); - mm->task_size = mm->context.asce_limit; - crst_table_free(mm, (unsigned long *) pgd); - } + + pgd = mm->pgd; + mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); + mm->context.asce_limit = 1UL << 31; + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT; + mm->task_size = mm->context.asce_limit; + crst_table_free(mm, (unsigned long *) pgd); + if (current->active_mm == mm) set_user_asce(mm); } -- cgit v1.2.3 From a95f94bc98754bad32cb7f36c867d71f3f2be729 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 30 Mar 2016 13:42:47 +0200 Subject: sparc/defconfigs: Remove CONFIG_IPV6_PRIVACY Option is long gone, see 5d9efa7ee99e ("ipv6: Remove privacy config option.") Signed-off-by: Borislav Petkov Cc: sparclinux@vger.kernel.org Signed-off-by: David S. Miller --- arch/sparc/configs/sparc32_defconfig | 1 - arch/sparc/configs/sparc64_defconfig | 1 - 2 files changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/sparc/configs/sparc32_defconfig b/arch/sparc/configs/sparc32_defconfig index fb23fd6b186a..c74d3701ad68 100644 --- a/arch/sparc/configs/sparc32_defconfig +++ b/arch/sparc/configs/sparc32_defconfig @@ -24,7 +24,6 @@ CONFIG_INET_AH=y CONFIG_INET_ESP=y CONFIG_INET_IPCOMP=y # CONFIG_INET_LRO is not set -CONFIG_IPV6_PRIVACY=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 04920ab8e292..3583d676a916 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -48,7 +48,6 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_AH=y CONFIG_INET_ESP=y CONFIG_INET_IPCOMP=y -CONFIG_IPV6_PRIVACY=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_IPV6_OPTIMISTIC_DAD=y -- cgit v1.2.3 From 9a78d4fc28904785ffe4c2d361e25b251b479704 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Mon, 11 Apr 2016 17:57:05 -0700 Subject: sparc/pci: Refactor dev_archdata initialization into pci_init_dev_archdata The function pcibios_add_device() added by commit d0c31e020057 ("sparc/PCI: Fix for panic while enabling SR-IOV") initializes the dev_archdata by doing a memcpy from the PF. This has the problem that it erroneously copies the OF device without explicitly refcounting it. As David Miller pointed out: "Generally speaking we don't really support hot-plug for OF probed devices, but if we did all of the device tree pointers have to be refcounted properly." To fix this error, and also avoid code duplication, this patch creates a new helper function, pci_init_dev_archdata(), that initializes the fields in dev_archdata, and can be invoked by callers after they have taken the needed refcounts Signed-off-by: Sowmini Varadhan Tested-by: Babu Moger Reviewed-by: Khalid Aziz Signed-off-by: David S. Miller --- arch/sparc/kernel/pci.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 9f9614df9e1e..c2b202d763a1 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -245,6 +245,18 @@ static void pci_parse_of_addrs(struct platform_device *op, } } +static void pci_init_dev_archdata(struct dev_archdata *sd, void *iommu, + void *stc, void *host_controller, + struct platform_device *op, + int numa_node) +{ + sd->iommu = iommu; + sd->stc = stc; + sd->host_controller = host_controller; + sd->op = op; + sd->numa_node = numa_node; +} + static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, struct device_node *node, struct pci_bus *bus, int devfn) @@ -259,13 +271,10 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, if (!dev) return NULL; + op = of_find_device_by_node(node); sd = &dev->dev.archdata; - sd->iommu = pbm->iommu; - sd->stc = &pbm->stc; - sd->host_controller = pbm; - sd->op = op = of_find_device_by_node(node); - sd->numa_node = pbm->numa_node; - + pci_init_dev_archdata(sd, pbm->iommu, &pbm->stc, pbm, op, + pbm->numa_node); sd = &op->dev.archdata; sd->iommu = pbm->iommu; sd->stc = &pbm->stc; @@ -1003,9 +1012,13 @@ int pcibios_add_device(struct pci_dev *dev) * Copy dev_archdata from PF to VF */ if (dev->is_virtfn) { + struct dev_archdata *psd; + pdev = dev->physfn; - memcpy(&dev->dev.archdata, &pdev->dev.archdata, - sizeof(struct dev_archdata)); + psd = &pdev->dev.archdata; + pci_init_dev_archdata(&dev->dev.archdata, psd->iommu, + psd->stc, psd->host_controller, NULL, + psd->numa_node); } return 0; } -- cgit v1.2.3 From 36128d204b81c099b5779771127a5546eac549c9 Mon Sep 17 00:00:00 2001 From: Adrian Glaubitz Date: Thu, 14 Apr 2016 20:14:41 +0200 Subject: sparc: Implement and wire up modalias_show for vio. Signed-off-by: John Paul Adrian Glaubitz Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/kernel/vio.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c index cb5789c9f961..d7055609a41c 100644 --- a/arch/sparc/kernel/vio.c +++ b/arch/sparc/kernel/vio.c @@ -105,9 +105,18 @@ static ssize_t type_show(struct device *dev, return sprintf(buf, "%s\n", vdev->type); } +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + const struct vio_dev *vdev = to_vio_dev(dev); + + return sprintf(buf, "vio:T%sS%s\n", vdev->type, vdev->compat); +} + static struct device_attribute vio_dev_attrs[] = { __ATTR_RO(devspec), __ATTR_RO(type), + __ATTR_RO(modalias), __ATTR_NULL }; -- cgit v1.2.3 From 5bde2c9be701c4583f0a9243bd46590ec401bfba Mon Sep 17 00:00:00 2001 From: Adrian Glaubitz Date: Thu, 14 Apr 2016 20:14:42 +0200 Subject: sparc: Implement and wire up vio_hotplug for vio. Signed-off-by: John Paul Adrian Glaubitz Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/kernel/vio.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c index d7055609a41c..f6bb857254fc 100644 --- a/arch/sparc/kernel/vio.c +++ b/arch/sparc/kernel/vio.c @@ -45,6 +45,14 @@ static const struct vio_device_id *vio_match_device( return NULL; } +static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env) +{ + const struct vio_dev *vio_dev = to_vio_dev(dev); + + add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, vio_dev->compat); + return 0; +} + static int vio_bus_match(struct device *dev, struct device_driver *drv) { struct vio_dev *vio_dev = to_vio_dev(dev); @@ -123,6 +131,7 @@ static struct device_attribute vio_dev_attrs[] = { static struct bus_type vio_bus_type = { .name = "vio", .dev_attrs = vio_dev_attrs, + .uevent = vio_hotplug, .match = vio_bus_match, .probe = vio_device_probe, .remove = vio_device_remove, -- cgit v1.2.3 From c5b8b5beee1e2aadef0409dba555b7085d57910d Mon Sep 17 00:00:00 2001 From: Khalid Aziz Date: Tue, 19 Apr 2016 11:12:54 -0600 Subject: sparc64: recognize and support Sonoma CPU type Add code to recognize SPARC-Sonoma cpu correctly and update cpu hardware caps and cpu distribution map. SPARC-Sonoma is based upon SPARC-M7 core along with additional PCI functions added on and is reported by firmware as "SPARC-SN". Signed-off-by: Khalid Aziz Acked-by: Allen Pais Signed-off-by: David S. Miller --- arch/sparc/include/asm/spitfire.h | 1 + arch/sparc/kernel/cpu.c | 6 ++++++ arch/sparc/kernel/cpumap.c | 1 + arch/sparc/kernel/head_64.S | 8 ++++++++ arch/sparc/kernel/setup_64.c | 7 ++++++- arch/sparc/mm/init_64.c | 3 +++ 6 files changed, 25 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h index 56f933816144..1d8321c827a8 100644 --- a/arch/sparc/include/asm/spitfire.h +++ b/arch/sparc/include/asm/spitfire.h @@ -48,6 +48,7 @@ #define SUN4V_CHIP_SPARC_M6 0x06 #define SUN4V_CHIP_SPARC_M7 0x07 #define SUN4V_CHIP_SPARC64X 0x8a +#define SUN4V_CHIP_SPARC_SN 0x8b #define SUN4V_CHIP_UNKNOWN 0xff #ifndef __ASSEMBLY__ diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index dfad8b1aea9f..493e023a468a 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -506,6 +506,12 @@ static void __init sun4v_cpu_probe(void) sparc_pmu_type = "sparc-m7"; break; + case SUN4V_CHIP_SPARC_SN: + sparc_cpu_type = "SPARC-SN"; + sparc_fpu_type = "SPARC-SN integrated FPU"; + sparc_pmu_type = "sparc-sn"; + break; + case SUN4V_CHIP_SPARC64X: sparc_cpu_type = "SPARC64-X"; sparc_fpu_type = "SPARC64-X integrated FPU"; diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c index e69ec0e3f155..45c820e1cba5 100644 --- a/arch/sparc/kernel/cpumap.c +++ b/arch/sparc/kernel/cpumap.c @@ -328,6 +328,7 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index) case SUN4V_CHIP_NIAGARA5: case SUN4V_CHIP_SPARC_M6: case SUN4V_CHIP_SPARC_M7: + case SUN4V_CHIP_SPARC_SN: case SUN4V_CHIP_SPARC64X: rover_inc_table = niagara_iterate_method; break; diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index cd1f592cd347..5b4f5c363674 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -414,6 +414,8 @@ sun4v_chip_type: cmp %g2, 'T' be,pt %xcc, 70f cmp %g2, 'M' + be,pt %xcc, 70f + cmp %g2, 'S' bne,pn %xcc, 49f nop @@ -433,6 +435,9 @@ sun4v_chip_type: cmp %g2, '7' be,pt %xcc, 5f mov SUN4V_CHIP_SPARC_M7, %g4 + cmp %g2, 'N' + be,pt %xcc, 5f + mov SUN4V_CHIP_SPARC_SN, %g4 ba,pt %xcc, 49f nop @@ -595,6 +600,9 @@ niagara_tlb_fixup: be,pt %xcc, niagara4_patch nop cmp %g1, SUN4V_CHIP_SPARC_M7 + be,pt %xcc, niagara4_patch + nop + cmp %g1, SUN4V_CHIP_SPARC_SN be,pt %xcc, niagara4_patch nop diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 26db95b54ee9..599f1207eed2 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -285,7 +285,8 @@ static void __init sun4v_patch(void) sun4v_patch_2insn_range(&__sun4v_2insn_patch, &__sun4v_2insn_patch_end); - if (sun4v_chip_type == SUN4V_CHIP_SPARC_M7) + if (sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC_SN) sun_m7_patch_2insn_range(&__sun_m7_2insn_patch, &__sun_m7_2insn_patch_end); @@ -524,6 +525,7 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC_SN || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= HWCAP_SPARC_BLKINIT; if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || @@ -532,6 +534,7 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC_SN || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= HWCAP_SPARC_N2; } @@ -561,6 +564,7 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC_SN || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 | AV_SPARC_ASI_BLK_INIT | @@ -570,6 +574,7 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC_SN || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC | AV_SPARC_FMAF); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 1cfe6aab7a11..09e838801e39 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1769,6 +1769,7 @@ static void __init setup_page_offset(void) max_phys_bits = 47; break; case SUN4V_CHIP_SPARC_M7: + case SUN4V_CHIP_SPARC_SN: default: /* M7 and later support 52-bit virtual addresses. */ sparc64_va_hole_top = 0xfff8000000000000UL; @@ -1986,6 +1987,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) */ switch (sun4v_chip_type) { case SUN4V_CHIP_SPARC_M7: + case SUN4V_CHIP_SPARC_SN: pagecv_flag = 0x00; break; default: @@ -2138,6 +2140,7 @@ void __init paging_init(void) */ switch (sun4v_chip_type) { case SUN4V_CHIP_SPARC_M7: + case SUN4V_CHIP_SPARC_SN: page_cache4v_flag = _PAGE_CP_4V; break; default: -- cgit v1.2.3 From d9676fa152c83b82137af950b1d4f629045d90c9 Mon Sep 17 00:00:00 2001 From: Evgeny Voevodin Date: Wed, 23 Mar 2016 12:26:52 +0300 Subject: ARCv2: Enable LOCKDEP - The asm helpers for calling into irq tracer were missing - Add calls to above helpers in low level assembly entry code for ARCv2 - irq_save() uses CLRI to disable interrupts and returns the prev interrupt state (in STATUS32) in a specific encoding (and not the raw value of STATUS32). This is usable with SETI in irq_restore(). However save_flags() reads the raw value of STATUS32 which doesn't pair with irq_save/restore() and thus needs fixing. Signed-off-by: Evgeny Voevodin [vgupta: updated changelog and also added some comments] Signed-off-by: Vineet Gupta --- arch/arc/include/asm/irqflags-arcv2.h | 36 ++++++++++++++++++++++++++++++++++- arch/arc/kernel/entry-arcv2.S | 10 +++++++++- arch/arc/kernel/entry-compact.S | 3 +++ 3 files changed, 47 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h index 37c2f751eebf..d1ec7f6b31e0 100644 --- a/arch/arc/include/asm/irqflags-arcv2.h +++ b/arch/arc/include/asm/irqflags-arcv2.h @@ -18,6 +18,12 @@ #define STATUS_AD_MASK (1< Date: Fri, 22 Apr 2016 13:05:31 +0100 Subject: xen/qspinlock: Don't kick CPU if IRQ is not initialized The following commit: 1fb3a8b2cfb2 ("xen/spinlock: Fix locking path engaging too soon under PVHVM.") ... moved the initalization of the kicker interrupt until after native_cpu_up() is called. However, when using qspinlocks, a CPU may try to kick another CPU that is spinning (because it has not yet initialized its kicker interrupt), resulting in the following crash during boot: kernel BUG at /build/linux-Ay7j_C/linux-4.4.0/drivers/xen/events/events_base.c:1210! invalid opcode: 0000 [#1] SMP ... RIP: 0010:[] [] xen_send_IPI_one+0x59/0x60 ... Call Trace: [] xen_qlock_kick+0xe/0x10 [] __pv_queued_spin_unlock+0xb2/0xf0 [] ? __raw_callee_save___pv_queued_spin_unlock+0x11/0x20 [] ? check_tsc_warp+0x76/0x150 [] check_tsc_sync_source+0x96/0x160 [] native_cpu_up+0x3d8/0x9f0 [] xen_hvm_cpu_up+0x35/0x80 [] _cpu_up+0x13c/0x180 [] cpu_up+0x7a/0xa0 [] smp_init+0x7f/0x81 [] kernel_init_freeable+0xef/0x212 [] ? rest_init+0x80/0x80 [] kernel_init+0xe/0xe0 [] ret_from_fork+0x3f/0x70 [] ? rest_init+0x80/0x80 To fix this, only send the kick if the target CPU's interrupt has been initialized. This check isn't racy, because the target is waiting for the spinlock, so it won't have initialized the interrupt in the meantime. Signed-off-by: Ross Lagerwall Reviewed-by: Boris Ostrovsky Cc: David Vrabel Cc: Juergen Gross Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Cc: xen-devel@lists.xenproject.org Signed-off-by: Ingo Molnar --- arch/x86/xen/spinlock.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 9e2ba5c6e1dd..f42e78de1e10 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -27,6 +27,12 @@ static bool xen_pvspin = true; static void xen_qlock_kick(int cpu) { + int irq = per_cpu(lock_kicker_irq, cpu); + + /* Don't kick if the target's kicker interrupt is not initialized. */ + if (irq == -1) + return; + xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); } -- cgit v1.2.3 From b89c173788c3a8ed571652c203bf59a0e9d700aa Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 15 Apr 2016 13:25:33 -0700 Subject: perf/x86/intel: Add model number for Skylake Server to perf Everything the same as base Skylake, just a new model number. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1460751933-2264-1-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 68fa55b4d42e..aff79884e17d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3639,6 +3639,7 @@ __init int intel_pmu_init(void) case 78: /* 14nm Skylake Mobile */ case 94: /* 14nm Skylake Desktop */ + case 85: /* 14nm Skylake Server */ x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); -- cgit v1.2.3 From e1089602a3bf3efd13d0ffc575f3e22213f009da Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Sun, 17 Apr 2016 08:43:29 -0700 Subject: perf/x86/intel/rapl: Add missing Haswell model Added one missing Haswell model. Signed-off-by: Srinivas Pandruvada Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: bp@alien8.de Cc: hpa@zytor.com Link: http://lkml.kernel.org/r/1460907809-11897-1-git-send-email-srinivas.pandruvada@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/rapl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 70c93f9b03ac..1705c9d75e44 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -718,6 +718,7 @@ static int __init rapl_pmu_init(void) break; case 60: /* Haswell */ case 69: /* Haswell-Celeron */ + case 70: /* Haswell GT3e */ case 61: /* Broadwell */ case 71: /* Broadwell-H */ rapl_cntr_mask = RAPL_IDX_HSW; -- cgit v1.2.3 From e16d8a6cbb499c5c8bfe9330d3351b649bded4af Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Apr 2016 08:52:44 -0700 Subject: Revert "x86/mm/32: Set NX in __supported_pte_mask before enabling paging" This reverts commit 320d25b6a05f8b73c23fc21025d2906ecdd2d4fc. This change was problematic for a couple of reasons: 1. It missed a some entry points (Xen things and 64-bit native). 2. The entry it changed can be executed more than once. This isn't really a problem, but it conflated per-cpu state setup and global state setup. 3. It broke 64-bit non-NX. 64-bit non-NX worked the other way around from 32-bit -- __supported_pte_mask had NX set initially and was *cleared* in x86_configure_nx. With the patch applied, it never got cleared. Reported-and-tested-by: Meelis Roos Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/59bd15f7f4b56b633a611b7f70876c6d2ad01a98.1461685884.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 6 ------ arch/x86/mm/setup_nx.c | 5 +++-- 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 54cdbd2003fe..af1112980dd4 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -389,12 +389,6 @@ default_entry: /* Make changes effective */ wrmsr - /* - * And make sure that all the mappings we set up have NX set from - * the beginning. - */ - orl $(1 << (_PAGE_BIT_NX - 32)), pa(__supported_pte_mask + 4) - enable_paging: /* diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index 8bea84724a7d..f65a33f505b6 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c @@ -32,8 +32,9 @@ early_param("noexec", noexec_setup); void x86_configure_nx(void) { - /* If disable_nx is set, clear NX on all new mappings going forward. */ - if (disable_nx) + if (boot_cpu_has(X86_FEATURE_NX) && !disable_nx) + __supported_pte_mask |= _PAGE_NX; + else __supported_pte_mask &= ~_PAGE_NX; } -- cgit v1.2.3 From d701cca6744fe0d67c86346dcfc9b128b17b5045 Mon Sep 17 00:00:00 2001 From: Rui Salvaterra Date: Tue, 19 Apr 2016 13:23:36 +0100 Subject: powerpc: wire up preadv2 and pwritev2 syscalls Wire up preadv2/pwritev2 in the same way as preadv/pwritev. Fixes two build warnings on ppc64. mpe: Lightly tested with fio (slightly hacked to add the syscall wrappers): fio-4217 [009] .... 1304.635300: sys_preadv2(fd: 3, vec: 10025821de0, vlen: 1, pos_l: 6253000, pos_h: 0, flags: 1) fio-4217 [009] .... 1304.635474: sys_preadv2 -> 0x1000 Signed-off-by: Rui Salvaterra Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/systbl.h | 2 ++ arch/powerpc/include/asm/unistd.h | 2 +- arch/powerpc/include/uapi/asm/unistd.h | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 3fa9df70aa20..2fc5d4db503c 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -384,3 +384,5 @@ SYSCALL(ni_syscall) SYSCALL(ni_syscall) SYSCALL(mlock2) SYSCALL(copy_file_range) +COMPAT_SYS_SPU(preadv2) +COMPAT_SYS_SPU(pwritev2) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 1f2594d45605..cf12c580f6b2 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include -#define NR_syscalls 380 +#define NR_syscalls 382 #define __NR__exit __NR_exit diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 940290d45b08..e9f5f41aa55a 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -390,5 +390,7 @@ #define __NR_membarrier 365 #define __NR_mlock2 378 #define __NR_copy_file_range 379 +#define __NR_preadv2 380 +#define __NR_pwritev2 381 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ -- cgit v1.2.3 From a8950e49bd241dc9ad90c24e92e23e95dbe9f018 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 19 Apr 2016 18:17:32 +0800 Subject: nios2: memset: use the right constraint modifier for the %4 output operand Depending on the size of the area to be memset'ed, the nios2 memset implementation either uses a naive loop (for buffers smaller or equal than 8 bytes) or a more optimized implementation (for buffers larger than 8 bytes). This implementation does 4-byte stores rather than 1-byte stores to speed up memset. However, we discovered that on our nios2 platform, memset() was not properly setting the buffer to the expected value. A memset of 0xff would not set the entire buffer to 0xff, but to: 0xff 0x00 0xff 0x00 0xff 0x00 0xff 0x00 ... Which is obviously incorrect. Our investigation has revealed that the problem lies in the incorrect constraints used in the inline assembly. The following piece of assembly, from the nios2 memset implementation, is supposed to create a 4-byte value that repeats 4 times the 1-byte pattern passed as memset argument: /* fill8 %3, %5 (c & 0xff) */ " slli %4, %5, 8\n" " or %4, %4, %5\n" " slli %3, %4, 16\n" " or %3, %3, %4\n" However, depending on the compiler and optimization level, this code might be compiled as: 34: 280a923a slli r5,r5,8 38: 294ab03a or r5,r5,r5 3c: 2808943a slli r4,r5,16 40: 2148b03a or r4,r4,r5 This is wrong because r5 gets used both for %5 and %4, which leads to the final pattern stored in r4 to be 0xff00ff00 rather than the expected 0xffffffff. %4 is defined with the "=r" constraint, i.e as an output operand. However, as explained in http://www.ethernut.de/en/documents/arm-inline-asm.html, this does not prevent gcc from using the same register for an output operand (%4) and input operand (%5). By using the constraint modifier '&', we indicate that the register should be used for output only. With this change, we get the following assembly output: 34: 2810923a slli r8,r5,8 38: 4150b03a or r8,r8,r5 3c: 400e943a slli r7,r8,16 40: 3a0eb03a or r7,r7,r8 Which correctly produces the 0xffffffff pattern when 0xff is passed as the memset() pattern. It is worth mentioning the observed consequence of this bug: we were hitting the kernel BUG() in mm/bootmem.c:__free() that verifies when marking a page as free that it was previously marked as occupied (i.e that the bit was set to 1). The entire bootmem bitmap is set to 0xff bit via a memset() during the bootmem initialization. The bootmem_free() call right after the initialization was finding some bits to be set to 0, which didn't make sense since the bitmap has just been memset'ed to 0xff. Except that due to the bug explained above, the bitmap was in fact initialized to 0xff00ff00. Thanks to Marek Vasut for his help and feedback. Signed-off-by: Romain Perier Acked-by: Marek Vasut Acked-by: Ley Foon Tan --- arch/nios2/lib/memset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/nios2/lib/memset.c b/arch/nios2/lib/memset.c index c2cfcb121e34..2fcefe720283 100644 --- a/arch/nios2/lib/memset.c +++ b/arch/nios2/lib/memset.c @@ -68,7 +68,7 @@ void *memset(void *s, int c, size_t count) "=r" (charcnt), /* %1 Output */ "=r" (dwordcnt), /* %2 Output */ "=r" (fill8reg), /* %3 Output */ - "=r" (wrkrega) /* %4 Output */ + "=&r" (wrkrega) /* %4 Output only */ : "r" (c), /* %5 Input */ "0" (s), /* %0 Input/Output */ "1" (count) /* %1 Input/Output */ -- cgit v1.2.3 From 32ed9a0e0ddcf58ef343bcb6ae44ee60d1ee31ce Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Tue, 26 Apr 2016 19:29:33 +0300 Subject: ARC: support generic per-device coherent dma mem Signed-off-by: Alexey Brodkin Cc: linux-kernel@vger.kernel.org Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 12d0284a46e5..28d47f8eb934 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -37,6 +37,7 @@ config ARC select OF_EARLY_FLATTREE select PERF_USE_VMALLOC select HAVE_DEBUG_STACKOVERFLOW + select HAVE_GENERIC_DMA_COHERENT config MIGHT_HAVE_PCI bool -- cgit v1.2.3 From 1b10cb21d888c021bedbe678f7c26aee1bf04ffa Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Tue, 26 Apr 2016 19:29:34 +0300 Subject: ARC: add support for reserved memory defined by device tree Enable reserved memory initialization from device tree. Signed-off-by: Alexey Brodkin Cc: Grant Likely Cc: Marek Szyprowski Cc: linux-kernel@vger.kernel.org Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 1 + arch/arc/mm/init.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 28d47f8eb934..ec4791ea6911 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -35,6 +35,7 @@ config ARC select NO_BOOTMEM select OF select OF_EARLY_FLATTREE + select OF_RESERVED_MEM select PERF_USE_VMALLOC select HAVE_DEBUG_STACKOVERFLOW select HAVE_GENERIC_DMA_COHERENT diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 7d2c4fbf4f22..5487d0b97400 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -13,6 +13,7 @@ #ifdef CONFIG_BLK_DEV_INITRD #include #endif +#include #include #include #include @@ -136,6 +137,9 @@ void __init setup_arch_memory(void) memblock_reserve(__pa(initrd_start), initrd_end - initrd_start); #endif + early_init_fdt_reserve_self(); + early_init_fdt_scan_reserved_mem(); + memblock_dump_all(); /*----------------- node/zones setup --------------------------*/ -- cgit v1.2.3 From 49fa5230462f9f2c4e97c81356473a6bdf06c422 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 27 Apr 2016 17:27:37 -0400 Subject: sparc64: Fix bootup regressions on some Kconfig combinations. The system call tracing bug fix mentioned in the Fixes tag below increased the amount of assembler code in the sequence of assembler files included by head_64.S This caused to total set of code to exceed 0x4000 bytes in size, which overflows the expression in head_64.S that works to place swapper_tsb at address 0x408000. When this is violated, the TSB is not properly aligned, and also the trap table is not aligned properly either. All of this together results in failed boots. So, do two things: 1) Simplify some code by using ba,a instead of ba/nop to get those bytes back. 2) Add a linker script assertion to make sure that if this happens again the build will fail. Fixes: 1a40b95374f6 ("sparc: Fix system call tracing register handling.") Reported-by: Meelis Roos Reported-by: Joerg Abraham Signed-off-by: David S. Miller --- arch/sparc/kernel/cherrs.S | 14 +++++--------- arch/sparc/kernel/fpu_traps.S | 11 +++++------ arch/sparc/kernel/head_64.S | 24 ++++++++---------------- arch/sparc/kernel/misctrap.S | 12 ++++-------- arch/sparc/kernel/spiterrs.S | 18 ++++++------------ arch/sparc/kernel/utrap.S | 3 +-- arch/sparc/kernel/vmlinux.lds.S | 4 ++++ arch/sparc/kernel/winfixup.S | 3 +-- 8 files changed, 34 insertions(+), 55 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/cherrs.S b/arch/sparc/kernel/cherrs.S index 4ee1ad420862..655628def68e 100644 --- a/arch/sparc/kernel/cherrs.S +++ b/arch/sparc/kernel/cherrs.S @@ -214,8 +214,7 @@ do_dcpe_tl1_nonfatal: /* Ok we may use interrupt globals safely. */ subcc %g1, %g2, %g1 ! Next cacheline bge,pt %icc, 1b nop - ba,pt %xcc, dcpe_icpe_tl1_common - nop + ba,a,pt %xcc, dcpe_icpe_tl1_common do_dcpe_tl1_fatal: sethi %hi(1f), %g7 @@ -224,8 +223,7 @@ do_dcpe_tl1_fatal: mov 0x2, %o0 call cheetah_plus_parity_error add %sp, PTREGS_OFF, %o1 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size do_dcpe_tl1,.-do_dcpe_tl1 .globl do_icpe_tl1 @@ -259,8 +257,7 @@ do_icpe_tl1_nonfatal: /* Ok we may use interrupt globals safely. */ subcc %g1, %g2, %g1 bge,pt %icc, 1b nop - ba,pt %xcc, dcpe_icpe_tl1_common - nop + ba,a,pt %xcc, dcpe_icpe_tl1_common do_icpe_tl1_fatal: sethi %hi(1f), %g7 @@ -269,8 +266,7 @@ do_icpe_tl1_fatal: mov 0x3, %o0 call cheetah_plus_parity_error add %sp, PTREGS_OFF, %o1 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size do_icpe_tl1,.-do_icpe_tl1 .type dcpe_icpe_tl1_common,#function @@ -456,7 +452,7 @@ __cheetah_log_error: cmp %g2, 0x63 be c_cee nop - ba,pt %xcc, c_deferred + ba,a,pt %xcc, c_deferred .size __cheetah_log_error,.-__cheetah_log_error /* Cheetah FECC trap handling, we get here from tl{0,1}_fecc diff --git a/arch/sparc/kernel/fpu_traps.S b/arch/sparc/kernel/fpu_traps.S index a6864826a4bd..336d2750fe78 100644 --- a/arch/sparc/kernel/fpu_traps.S +++ b/arch/sparc/kernel/fpu_traps.S @@ -100,8 +100,8 @@ do_fpdis: fmuld %f0, %f2, %f26 faddd %f0, %f2, %f28 fmuld %f0, %f2, %f30 - b,pt %xcc, fpdis_exit - nop + ba,a,pt %xcc, fpdis_exit + 2: andcc %g5, FPRS_DU, %g0 bne,pt %icc, 3f fzero %f32 @@ -144,8 +144,8 @@ do_fpdis: fmuld %f32, %f34, %f58 faddd %f32, %f34, %f60 fmuld %f32, %f34, %f62 - ba,pt %xcc, fpdis_exit - nop + ba,a,pt %xcc, fpdis_exit + 3: mov SECONDARY_CONTEXT, %g3 add %g6, TI_FPREGS, %g1 @@ -197,8 +197,7 @@ fpdis_exit2: fp_other_bounce: call do_fpother add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size fp_other_bounce,.-fp_other_bounce .align 32 diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 5b4f5c363674..a076b4249e62 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -466,9 +466,8 @@ sun4v_chip_type: subcc %g3, 1, %g3 bne,pt %xcc, 41b add %g1, 1, %g1 - mov SUN4V_CHIP_SPARC64X, %g4 ba,pt %xcc, 5f - nop + mov SUN4V_CHIP_SPARC64X, %g4 49: mov SUN4V_CHIP_UNKNOWN, %g4 @@ -553,8 +552,7 @@ sun4u_init: stxa %g0, [%g7] ASI_DMMU membar #Sync - ba,pt %xcc, sun4u_continue - nop + ba,a,pt %xcc, sun4u_continue sun4v_init: /* Set ctx 0 */ @@ -565,14 +563,12 @@ sun4v_init: mov SECONDARY_CONTEXT, %g7 stxa %g0, [%g7] ASI_MMU membar #Sync - ba,pt %xcc, niagara_tlb_fixup - nop + ba,a,pt %xcc, niagara_tlb_fixup sun4u_continue: BRANCH_IF_ANY_CHEETAH(g1, g7, cheetah_tlb_fixup) - ba,pt %xcc, spitfire_tlb_fixup - nop + ba,a,pt %xcc, spitfire_tlb_fixup niagara_tlb_fixup: mov 3, %g2 /* Set TLB type to hypervisor. */ @@ -647,8 +643,7 @@ niagara_patch: call hypervisor_patch_cachetlbops nop - ba,pt %xcc, tlb_fixup_done - nop + ba,a,pt %xcc, tlb_fixup_done cheetah_tlb_fixup: mov 2, %g2 /* Set TLB type to cheetah+. */ @@ -667,8 +662,7 @@ cheetah_tlb_fixup: call cheetah_patch_cachetlbops nop - ba,pt %xcc, tlb_fixup_done - nop + ba,a,pt %xcc, tlb_fixup_done spitfire_tlb_fixup: /* Set TLB type to spitfire. */ @@ -782,8 +776,7 @@ setup_trap_table: call %o1 add %sp, (2047 + 128), %o0 - ba,pt %xcc, 2f - nop + ba,a,pt %xcc, 2f 1: sethi %hi(sparc64_ttable_tl0), %o0 set prom_set_trap_table_name, %g2 @@ -822,8 +815,7 @@ setup_trap_table: BRANCH_IF_ANY_CHEETAH(o2, o3, 1f) - ba,pt %xcc, 2f - nop + ba,a,pt %xcc, 2f /* Disable STICK_INT interrupts. */ 1: diff --git a/arch/sparc/kernel/misctrap.S b/arch/sparc/kernel/misctrap.S index 753b4f031bfb..34b4933900bf 100644 --- a/arch/sparc/kernel/misctrap.S +++ b/arch/sparc/kernel/misctrap.S @@ -18,8 +18,7 @@ __do_privact: 109: or %g7, %lo(109b), %g7 call do_privact add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __do_privact,.-__do_privact .type do_mna,#function @@ -46,8 +45,7 @@ do_mna: mov %l5, %o2 call mem_address_unaligned add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size do_mna,.-do_mna .type do_lddfmna,#function @@ -65,8 +63,7 @@ do_lddfmna: mov %l5, %o2 call handle_lddfmna add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size do_lddfmna,.-do_lddfmna .type do_stdfmna,#function @@ -84,8 +81,7 @@ do_stdfmna: mov %l5, %o2 call handle_stdfmna add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size do_stdfmna,.-do_stdfmna .type breakpoint_trap,#function diff --git a/arch/sparc/kernel/spiterrs.S b/arch/sparc/kernel/spiterrs.S index c357e40ffd01..4a73009f66a5 100644 --- a/arch/sparc/kernel/spiterrs.S +++ b/arch/sparc/kernel/spiterrs.S @@ -85,8 +85,7 @@ __spitfire_cee_trap_continue: ba,pt %xcc, etraptl1 rd %pc, %g7 - ba,pt %xcc, 2f - nop + ba,a,pt %xcc, 2f 1: ba,pt %xcc, etrap_irq rd %pc, %g7 @@ -100,8 +99,7 @@ __spitfire_cee_trap_continue: mov %l5, %o2 call spitfire_access_error add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __spitfire_access_error,.-__spitfire_access_error /* This is the trap handler entry point for ECC correctable @@ -179,8 +177,7 @@ __spitfire_data_access_exception_tl1: mov %l5, %o2 call spitfire_data_access_exception_tl1 add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __spitfire_data_access_exception_tl1,.-__spitfire_data_access_exception_tl1 .type __spitfire_data_access_exception,#function @@ -200,8 +197,7 @@ __spitfire_data_access_exception: mov %l5, %o2 call spitfire_data_access_exception add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __spitfire_data_access_exception,.-__spitfire_data_access_exception .type __spitfire_insn_access_exception_tl1,#function @@ -220,8 +216,7 @@ __spitfire_insn_access_exception_tl1: mov %l5, %o2 call spitfire_insn_access_exception_tl1 add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __spitfire_insn_access_exception_tl1,.-__spitfire_insn_access_exception_tl1 .type __spitfire_insn_access_exception,#function @@ -240,6 +235,5 @@ __spitfire_insn_access_exception: mov %l5, %o2 call spitfire_insn_access_exception add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap .size __spitfire_insn_access_exception,.-__spitfire_insn_access_exception diff --git a/arch/sparc/kernel/utrap.S b/arch/sparc/kernel/utrap.S index b7f0f3f3a909..c731e8023d3e 100644 --- a/arch/sparc/kernel/utrap.S +++ b/arch/sparc/kernel/utrap.S @@ -11,8 +11,7 @@ utrap_trap: /* %g3=handler,%g4=level */ mov %l4, %o1 call bad_trap add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap invoke_utrap: sllx %g3, 3, %g3 diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index aadd321aa05d..7d02b1fef025 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -33,6 +33,10 @@ ENTRY(_start) jiffies = jiffies_64; #endif +#ifdef CONFIG_SPARC64 +ASSERT((swapper_tsb == 0x0000000000408000), "Error: sparc64 early assembler too large") +#endif + SECTIONS { #ifdef CONFIG_SPARC64 diff --git a/arch/sparc/kernel/winfixup.S b/arch/sparc/kernel/winfixup.S index 1e67ce958369..855019a8590e 100644 --- a/arch/sparc/kernel/winfixup.S +++ b/arch/sparc/kernel/winfixup.S @@ -32,8 +32,7 @@ fill_fixup: rd %pc, %g7 call do_sparc64_fault add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - nop + ba,a,pt %xcc, rtrap /* Be very careful about usage of the trap globals here. * You cannot touch %g5 as that has the fault information. -- cgit v1.2.3 From 1bdb8970392a68489b469c3a330a1adb5ef61beb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 27 Apr 2016 14:22:32 -0600 Subject: x86/apic: Handle zero vector gracefully in clear_vector_irq() If x86_vector_alloc_irq() fails x86_vector_free_irqs() is invoked to cleanup the already allocated vectors. This subsequently calls clear_vector_irq(). The failed irq has no vector assigned, which triggers the BUG_ON(!vector) in clear_vector_irq(). We cannot suppress the call to x86_vector_free_irqs() for the failed interrupt, because the other data related to this irq must be cleaned up as well. So calling clear_vector_irq() with vector == 0 is legitimate. Remove the BUG_ON and return if vector is zero, [ tglx: Massaged changelog ] Fixes: b5dc8e6c21e7 "x86/irq: Use hierarchical irqdomain to manage CPU interrupt vectors" Signed-off-by: Keith Busch Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/vector.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index ad59d70bcb1a..ef495511f019 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -256,7 +256,8 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) struct irq_desc *desc; int cpu, vector; - BUG_ON(!data->cfg.vector); + if (!data->cfg.vector) + return; vector = data->cfg.vector; for_each_cpu_and(cpu, data->domain, cpu_online_mask) -- cgit v1.2.3 From 0a25556f84d5f79e68e9502bb1f32a43377ab2bf Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Wed, 27 Apr 2016 11:35:31 +0200 Subject: perf/x86/amd: Set the size of event map array to PERF_COUNT_HW_MAX The entry for PERF_COUNT_HW_REF_CPU_CYCLES is not used on AMD, but is referenced by filter_events() which expects undefined events to have a value of 0. Found via KASAN: UBSAN: Undefined behaviour in arch/x86/events/amd/core.c:132:30 index 9 is out of range for type 'u64 [9]' UBSAN: Undefined behaviour in arch/x86/events/amd/core.c:132:9 load of address ffffffff81c021c8 with insufficient space for an object of type 'const u64' Signed-off-by: Adam Borowski Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1461749731-30979-1-git-send-email-kilobyte@angband.pl Signed-off-by: Ingo Molnar --- arch/x86/events/amd/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 86a9bec18dab..bd3e8421b57c 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -115,7 +115,7 @@ static __initconst const u64 amd_hw_cache_event_ids /* * AMD Performance Monitor K7 and later. */ -static const u64 amd_perfmon_event_map[] = +static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, -- cgit v1.2.3 From 1c5ac21a0e9bab7fc45d0ba9e11623e9ad99d02e Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 29 Mar 2016 17:43:10 +0300 Subject: perf/x86/intel/pt: Don't die on VMXON Some versions of Intel PT do not support tracing across VMXON, more specifically, VMXON will clear TraceEn control bit and any attempt to set it before VMXOFF will throw a #GP, which in the current state of things will crash the kernel. Namely: $ perf record -e intel_pt// kvm -nographic on such a machine will kill it. To avoid this, notify the intel_pt driver before VMXON and after VMXOFF so that it knows when not to enable itself. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Gleb Natapov Cc: Jiri Olsa Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: hpa@zytor.com Link: http://lkml.kernel.org/r/87oa9dwrfk.fsf@ashishki-desk.ger.corp.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/pt.c | 75 +++++++++++++++++++++++++++++++++------ arch/x86/events/intel/pt.h | 3 ++ arch/x86/include/asm/perf_event.h | 4 +++ arch/x86/kvm/vmx.c | 4 +++ 4 files changed, 75 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 6af7cf71d6b2..09a77dbc73c9 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -136,9 +136,21 @@ static int __init pt_pmu_hw_init(void) struct dev_ext_attribute *de_attrs; struct attribute **attrs; size_t size; + u64 reg; int ret; long i; + if (boot_cpu_has(X86_FEATURE_VMX)) { + /* + * Intel SDM, 36.5 "Tracing post-VMXON" says that + * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace + * post-VMXON. + */ + rdmsrl(MSR_IA32_VMX_MISC, reg); + if (reg & BIT(14)) + pt_pmu.vmx = true; + } + attrs = NULL; for (i = 0; i < PT_CPUID_LEAVES; i++) { @@ -269,20 +281,23 @@ static void pt_config(struct perf_event *event) reg |= (event->attr.config & PT_CONFIG_MASK); + event->hw.config = reg; wrmsrl(MSR_IA32_RTIT_CTL, reg); } -static void pt_config_start(bool start) +static void pt_config_stop(struct perf_event *event) { - u64 ctl; + u64 ctl = READ_ONCE(event->hw.config); + + /* may be already stopped by a PMI */ + if (!(ctl & RTIT_CTL_TRACEEN)) + return; - rdmsrl(MSR_IA32_RTIT_CTL, ctl); - if (start) - ctl |= RTIT_CTL_TRACEEN; - else - ctl &= ~RTIT_CTL_TRACEEN; + ctl &= ~RTIT_CTL_TRACEEN; wrmsrl(MSR_IA32_RTIT_CTL, ctl); + WRITE_ONCE(event->hw.config, ctl); + /* * A wrmsr that disables trace generation serializes other PT * registers and causes all data packets to be written to memory, @@ -291,8 +306,7 @@ static void pt_config_start(bool start) * The below WMB, separating data store and aux_head store matches * the consumer's RMB that separates aux_head load and data load. */ - if (!start) - wmb(); + wmb(); } static void pt_config_buffer(void *buf, unsigned int topa_idx, @@ -942,11 +956,17 @@ void intel_pt_interrupt(void) if (!ACCESS_ONCE(pt->handle_nmi)) return; - pt_config_start(false); + /* + * If VMX is on and PT does not support it, don't touch anything. + */ + if (READ_ONCE(pt->vmx_on)) + return; if (!event) return; + pt_config_stop(event); + buf = perf_get_aux(&pt->handle); if (!buf) return; @@ -983,6 +1003,35 @@ void intel_pt_interrupt(void) } } +void intel_pt_handle_vmx(int on) +{ + struct pt *pt = this_cpu_ptr(&pt_ctx); + struct perf_event *event; + unsigned long flags; + + /* PT plays nice with VMX, do nothing */ + if (pt_pmu.vmx) + return; + + /* + * VMXON will clear RTIT_CTL.TraceEn; we need to make + * sure to not try to set it while VMX is on. Disable + * interrupts to avoid racing with pmu callbacks; + * concurrent PMI should be handled fine. + */ + local_irq_save(flags); + WRITE_ONCE(pt->vmx_on, on); + + if (on) { + /* prevent pt_config_stop() from writing RTIT_CTL */ + event = pt->handle.event; + if (event) + event->hw.config = 0; + } + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(intel_pt_handle_vmx); + /* * PMU callbacks */ @@ -992,6 +1041,9 @@ static void pt_event_start(struct perf_event *event, int mode) struct pt *pt = this_cpu_ptr(&pt_ctx); struct pt_buffer *buf = perf_get_aux(&pt->handle); + if (READ_ONCE(pt->vmx_on)) + return; + if (!buf || pt_buffer_is_full(buf, pt)) { event->hw.state = PERF_HES_STOPPED; return; @@ -1014,7 +1066,8 @@ static void pt_event_stop(struct perf_event *event, int mode) * see comment in intel_pt_interrupt(). */ ACCESS_ONCE(pt->handle_nmi) = 0; - pt_config_start(false); + + pt_config_stop(event); if (event->hw.state == PERF_HES_STOPPED) return; diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h index 336878a5d205..3abb5f5cccc8 100644 --- a/arch/x86/events/intel/pt.h +++ b/arch/x86/events/intel/pt.h @@ -65,6 +65,7 @@ enum pt_capabilities { struct pt_pmu { struct pmu pmu; u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES]; + bool vmx; }; /** @@ -107,10 +108,12 @@ struct pt_buffer { * struct pt - per-cpu pt context * @handle: perf output handle * @handle_nmi: do handle PT PMI on this cpu, there's an active event + * @vmx_on: 1 if VMX is ON on this cpu */ struct pt { struct perf_output_handle handle; int handle_nmi; + int vmx_on; }; #endif /* __INTEL_PT_H__ */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 5a2ed3ed2f26..f353061bba1d 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void) { } static inline void perf_check_microcode(void) { } #endif +#ifdef CONFIG_CPU_SUP_INTEL + extern void intel_pt_handle_vmx(int on); +#endif + #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) extern void amd_pmu_enable_virt(void); extern void amd_pmu_disable_virt(void); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ee1c8a93871c..133679d520af 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3103,6 +3103,8 @@ static __init int vmx_disabled_by_bios(void) static void kvm_cpu_vmxon(u64 addr) { + intel_pt_handle_vmx(1); + asm volatile (ASM_VMX_VMXON_RAX : : "a"(&addr), "m"(addr) : "memory", "cc"); @@ -3172,6 +3174,8 @@ static void vmclear_local_loaded_vmcss(void) static void kvm_cpu_vmxoff(void) { asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); + + intel_pt_handle_vmx(0); } static void hardware_disable(void) -- cgit v1.2.3 From cf3beb7c90a8efa16a06b26634cddddc92bb819c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 21 Apr 2016 02:30:10 -0700 Subject: perf/x86/intel: Fix incorrect lbr_sel_mask value This patch fixes a bug which was introduced by: b16a5b52eb90 ("perf/x86: Add option to disable reading branch flags/cycles") In this patch, lbr_sel_mask is used to mask the lbr_select. But LBR_SEL_MASK doesn't include the bit for LBR_CALL_STACK. So LBR call stack will never be set in lbr_select. This patch corrects the LBR_SEL_MASK by including all valid bits in LBR_SELECT. Also, the LBR_CALL_STACK bit is different as other bit in LBR_SELECT. It does not operate in suppress mode, so it needs to be specially handled in intel_pmu_setup_hw_lbr_filter. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1461231010-4399-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/lbr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 6c3b7c1780c9..1ca5d1e7d4f2 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -63,7 +63,7 @@ static enum { #define LBR_PLM (LBR_KERNEL | LBR_USER) -#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */ +#define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */ #define LBR_NOT_SUPP -1 /* LBR filter not supported */ #define LBR_IGN 0 /* ignored */ @@ -610,8 +610,10 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate * in suppress mode. So LBR_SELECT should be set to * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) + * But the 10th bit LBR_CALL_STACK does not operate + * in suppress mode. */ - reg->config = mask ^ x86_pmu.lbr_sel_mask; + reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK); if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && -- cgit v1.2.3