10 files changed, 1001 insertions, 132 deletions
diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
index d3a0f0fd56dd..50137bc9e150 100644
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_PCI) += pci.o
 obj-$(CONFIG_MODULES) += xtensa_ksyms.o module.o
 obj-$(CONFIG_FUNCTION_TRACER) += mcount.o
 obj-$(CONFIG_SMP) += smp.o mxhead.o
+obj-$(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) += perf_event.o
 
 AFLAGS_head.o += -mtext-section-literals
 
@@ -27,10 +28,11 @@ AFLAGS_head.o += -mtext-section-literals
 #
 # Replicate rules in scripts/Makefile.build
 
-sed-y = -e 's/\*(\(\.[a-z]*it\|\.ref\|\)\.text)/*(\1.literal \1.text)/g' \
-	-e 's/\.text\.unlikely/.literal.unlikely .text.unlikely/g'	 \
-	-e 's/\*(\(\.text .*\))/*(.literal \1)/g'			 \
-	-e 's/\*(\(\.text\.[a-z]*\))/*(\1.literal \1)/g'
+sed-y = -e ':a; s/\*(\([^)]*\)\.text\.unlikely/*(\1.literal.unlikely .{text}.unlikely/; ta; ' \
+	-e ':b; s/\*(\([^)]*\)\.text\(\.[a-z]*\)/*(\1.{text}\2.literal .{text}\2/; tb; ' \
+	-e ':c; s/\*(\([^)]*\)\(\.[a-z]*it\|\.ref\)\.text/*(\1\2.literal \2.{text}/; tc; ' \
+	-e ':d; s/\*(\([^)]\+ \|\)\.text/*(\1.literal .{text}/; td; ' \
+	-e 's/\.{text}/.text/g'
 
 quiet_cmd__cpp_lds_S = LDS     $@
 cmd__cpp_lds_S = $(CPP) $(cpp_flags) -P -C -Uxtensa -D__ASSEMBLY__ $<    \
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index 82bbfa5a05b3..504130357597 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1,6 +1,4 @@
 /*
- * arch/xtensa/kernel/entry.S
- *
  * Low-level exception handling
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -8,6 +6,7 @@
  * for more details.
  *
  * Copyright (C) 2004 - 2008 by Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  *
  * Chris Zankel <chris@zankel.net>
  *
@@ -75,6 +74,27 @@
 #endif
 	.endm
 
+
+	.macro	irq_save flags tmp
+#if XTENSA_FAKE_NMI
+#if defined(CONFIG_DEBUG_KERNEL) && (LOCKLEVEL | TOPLEVEL) >= XCHAL_DEBUGLEVEL
+	rsr	\flags, ps
+	extui	\tmp, \flags, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+	bgei	\tmp, LOCKLEVEL, 99f
+	rsil	\tmp, LOCKLEVEL
+99:
+#else
+	movi	\tmp, LOCKLEVEL
+	rsr	\flags, ps
+	or	\flags, \flags, \tmp
+	xsr	\flags, ps
+	rsync
+#endif
+#else
+	rsil	\flags, LOCKLEVEL
+#endif
+	.endm
+
 /* ----------------- DEFAULT FIRST LEVEL EXCEPTION HANDLERS ----------------- */
 
 /*
@@ -122,6 +142,7 @@ _user_exception:
 	/* Save SAR and turn off single stepping */
 
 	movi	a2, 0
+	wsr	a2, depc		# terminate user stack trace with 0
 	rsr	a3, sar
 	xsr	a2, icountlevel
 	s32i	a3, a1, PT_SAR
@@ -301,7 +322,18 @@ _kernel_exception:
 	s32i	a14, a1, PT_AREG14
 	s32i	a15, a1, PT_AREG15
 
+	_bnei	a2, 1, 1f
+
+	/* Copy spill slots of a0 and a1 to imitate movsp
+	 * in order to keep exception stack continuous
+	 */
+	l32i	a3, a1, PT_SIZE
+	l32i	a0, a1, PT_SIZE + 4
+	s32e	a3, a1, -16
+	s32e	a0, a1, -12
 1:
+	l32i	a0, a1, PT_AREG0	# restore saved a0
+	wsr	a0, depc
 
 #ifdef KERNEL_STACK_OVERFLOW_CHECK
 
@@ -340,75 +372,88 @@ common_exception:
 
 	/* It is now save to restore the EXC_TABLE_FIXUP variable. */
 
-	rsr	a0, exccause
+	rsr	a2, exccause
 	movi	a3, 0
-	rsr	a2, excsave1
-	s32i	a0, a1, PT_EXCCAUSE
-	s32i	a3, a2, EXC_TABLE_FIXUP
-
-	/* All unrecoverable states are saved on stack, now, and a1 is valid,
-	 * so we can allow exceptions and interrupts (*) again.
-	 * Set PS(EXCM = 0, UM = 0, RING = 0, OWB = 0, WOE = 1, INTLEVEL = X)
+	rsr	a0, excsave1
+	s32i	a2, a1, PT_EXCCAUSE
+	s32i	a3, a0, EXC_TABLE_FIXUP
+
+	/* All unrecoverable states are saved on stack, now, and a1 is valid.
+	 * Now we can allow exceptions again. In case we've got an interrupt
+	 * PS.INTLEVEL is set to LOCKLEVEL disabling furhter interrupts,
+	 * otherwise it's left unchanged.
 	 *
-	 * (*) We only allow interrupts if they were previously enabled and
-	 *     we're not handling an IRQ
+	 * Set PS(EXCM = 0, UM = 0, RING = 0, OWB = 0, WOE = 1, INTLEVEL = X)
 	 */
 
 	rsr	a3, ps
-	addi	a0, a0, -EXCCAUSE_LEVEL1_INTERRUPT
-	movi	a2, LOCKLEVEL
+	s32i	a3, a1, PT_PS		# save ps
+
+#if XTENSA_FAKE_NMI
+	/* Correct PS needs to be saved in the PT_PS:
+	 * - in case of exception or level-1 interrupt it's in the PS,
+	 *   and is already saved.
+	 * - in case of medium level interrupt it's in the excsave2.
+	 */
+	movi	a0, EXCCAUSE_MAPPED_NMI
+	extui	a3, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+	beq	a2, a0, .Lmedium_level_irq
+	bnei	a2, EXCCAUSE_LEVEL1_INTERRUPT, .Lexception
+	beqz	a3, .Llevel1_irq	# level-1 IRQ sets ps.intlevel to 0
+
+.Lmedium_level_irq:
+	rsr	a0, excsave2
+	s32i	a0, a1, PT_PS		# save medium-level interrupt ps
+	bgei	a3, LOCKLEVEL, .Lexception
+
+.Llevel1_irq:
+	movi	a3, LOCKLEVEL
+
+.Lexception:
+	movi	a0, 1 << PS_WOE_BIT
+	or	a3, a3, a0
+#else
+	addi	a2, a2, -EXCCAUSE_LEVEL1_INTERRUPT
+	movi	a0, LOCKLEVEL
 	extui	a3, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
 					# a3 = PS.INTLEVEL
-	moveqz	a3, a2, a0		# a3 = LOCKLEVEL iff interrupt
+	moveqz	a3, a0, a2		# a3 = LOCKLEVEL iff interrupt
 	movi	a2, 1 << PS_WOE_BIT
 	or	a3, a3, a2
-	rsr	a0, exccause
-	xsr	a3, ps
+	rsr	a2, exccause
+#endif
 
-	s32i	a3, a1, PT_PS		# save ps
+	/* restore return address (or 0 if return to userspace) */
+	rsr	a0, depc
+	wsr	a3, ps
+	rsync				# PS.WOE => rsync => overflow
 
 	/* Save lbeg, lend */
 
-	rsr	a2, lbeg
+	rsr	a4, lbeg
 	rsr	a3, lend
-	s32i	a2, a1, PT_LBEG
+	s32i	a4, a1, PT_LBEG
 	s32i	a3, a1, PT_LEND
 
 	/* Save SCOMPARE1 */
 
 #if XCHAL_HAVE_S32C1I
-	rsr     a2, scompare1
-	s32i    a2, a1, PT_SCOMPARE1
+	rsr     a3, scompare1
+	s32i    a3, a1, PT_SCOMPARE1
 #endif
 
 	/* Save optional registers. */
 
-	save_xtregs_opt a1 a2 a4 a5 a6 a7 PT_XTREGS_OPT
+	save_xtregs_opt a1 a3 a4 a5 a6 a7 PT_XTREGS_OPT
 	
-#ifdef CONFIG_TRACE_IRQFLAGS
-	l32i	a4, a1, PT_DEPC
-	/* Double exception means we came here with an exception
-	 * while PS.EXCM was set, i.e. interrupts disabled.
-	 */
-	bgeui	a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
-	l32i	a4, a1, PT_EXCCAUSE
-	bnei	a4, EXCCAUSE_LEVEL1_INTERRUPT, 1f
-	/* We came here with an interrupt means interrupts were enabled
-	 * and we've just disabled them.
-	 */
-	movi	a4, trace_hardirqs_off
-	callx4	a4
-1:
-#endif
-
 	/* Go to second-level dispatcher. Set up parameters to pass to the
 	 * exception handler and call the exception handler.
 	 */
 
 	rsr	a4, excsave1
 	mov	a6, a1			# pass stack frame
-	mov	a7, a0			# pass EXCCAUSE
-	addx4	a4, a0, a4
+	mov	a7, a2			# pass EXCCAUSE
+	addx4	a4, a2, a4
 	l32i	a4, a4, EXC_TABLE_DEFAULT		# load handler
 
 	/* Call the second-level handler */
@@ -419,8 +464,17 @@ common_exception:
 	.global common_exception_return
 common_exception_return:
 
+#if XTENSA_FAKE_NMI
+	l32i	a2, a1, PT_EXCCAUSE
+	movi	a3, EXCCAUSE_MAPPED_NMI
+	beq	a2, a3, .LNMIexit
+#endif
 1:
-	rsil	a2, LOCKLEVEL
+	irq_save a2, a3
+#ifdef CONFIG_TRACE_IRQFLAGS
+	movi	a4, trace_hardirqs_off
+	callx4	a4
+#endif
 
 	/* Jump if we are returning from kernel exceptions. */
 
@@ -445,6 +499,10 @@ common_exception_return:
 
 	/* Call do_signal() */
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+	movi	a4, trace_hardirqs_on
+	callx4	a4
+#endif
 	rsil	a2, 0
 	movi	a4, do_notify_resume	# int do_notify_resume(struct pt_regs*)
 	mov	a6, a1
@@ -453,6 +511,10 @@ common_exception_return:
 
 3:	/* Reschedule */
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+	movi	a4, trace_hardirqs_on
+	callx4	a4
+#endif
 	rsil	a2, 0
 	movi	a4, schedule	# void schedule (void)
 	callx4	a4
@@ -471,6 +533,12 @@ common_exception_return:
 	j	1b
 #endif
 
+#if XTENSA_FAKE_NMI
+.LNMIexit:
+	l32i	a3, a1, PT_PS
+	_bbci.l	a3, PS_UM_BIT, 4f
+#endif
+
 5:
 #ifdef CONFIG_DEBUG_TLB_SANITY
 	l32i	a4, a1, PT_DEPC
@@ -481,16 +549,8 @@ common_exception_return:
 6:
 4:
 #ifdef CONFIG_TRACE_IRQFLAGS
-	l32i	a4, a1, PT_DEPC
-	/* Double exception means we came here with an exception
-	 * while PS.EXCM was set, i.e. interrupts disabled.
-	 */
-	bgeui	a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
-	l32i	a4, a1, PT_EXCCAUSE
-	bnei	a4, EXCCAUSE_LEVEL1_INTERRUPT, 1f
-	/* We came here with an interrupt means interrupts were enabled
-	 * and we'll reenable them on return.
-	 */
+	extui	a4, a3, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH
+	bgei	a4, LOCKLEVEL, 1f
 	movi	a4, trace_hardirqs_on
 	callx4	a4
 1:
@@ -568,12 +628,13 @@ user_exception_exit:
 	 *	 (if we have restored WSBITS-1 frames).
 	 */
 
+2:
 #if XCHAL_HAVE_THREADPTR
 	l32i	a3, a1, PT_THREADPTR
 	wur	a3, threadptr
 #endif
 
-2:	j	common_exception_exit
+	j	common_exception_exit
 
 	/* This is the kernel exception exit.
 	 * We avoided to do a MOVSP when we entered the exception, but we
@@ -1561,6 +1622,13 @@ ENTRY(fast_second_level_miss)
 	rfde
 
 9:	l32i	a0, a1, TASK_ACTIVE_MM	# unlikely case mm == 0
+	bnez	a0, 8b
+
+	/* Even more unlikely case active_mm == 0.
+	 * We can get here with NMI in the middle of context_switch that
+	 * touches vmalloc area.
+	 */
+	movi	a0, init_mm
 	j	8b
 
 #if (DCACHE_WAY_SIZE > PAGE_SIZE)
@@ -1820,7 +1888,7 @@ ENDPROC(system_call)
 	mov	a12, a0
 	.endr
 #endif
-	_entry	a1, 48
+	_entry	a1, 16
 #if XCHAL_NUM_AREGS % 12 == 0
 	mov	a8, a8
 #elif XCHAL_NUM_AREGS % 12 == 4
@@ -1844,7 +1912,7 @@ ENDPROC(system_call)
 
 ENTRY(_switch_to)
 
-	entry	a1, 16
+	entry	a1, 48
 
 	mov	a11, a3			# and 'next' (a3)
 
@@ -1864,10 +1932,8 @@ ENTRY(_switch_to)
 
 	/* Disable ints while we manipulate the stack pointer. */
 
-	rsil	a14, LOCKLEVEL
-	rsr	a3, excsave1
+	irq_save a14, a3
 	rsync
-	s32i	a3, a3, EXC_TABLE_FIXUP	/* enter critical section */
 
 	/* Switch CPENABLE */
 
@@ -1888,9 +1954,7 @@ ENTRY(_switch_to)
 	 */
 
 	rsr	a3, excsave1		# exc_table
-	movi	a6, 0
 	addi	a7, a5, PT_REGS_OFFSET
-	s32i	a6, a3, EXC_TABLE_FIXUP
 	s32i	a7, a3, EXC_TABLE_KSTK
 
 	/* restore context of the task 'next' */
diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c
index d7b5a4c8ae5d..4ac3d23161cf 100644
--- a/arch/xtensa/kernel/irq.c
+++ b/arch/xtensa/kernel/irq.c
@@ -28,7 +28,7 @@
 #include <asm/uaccess.h>
 #include <asm/platform.h>
 
-atomic_t irq_err_count;
+DECLARE_PER_CPU(unsigned long, nmi_count);
 
 asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
 {
@@ -57,11 +57,16 @@ asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs)
 
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
+	unsigned cpu __maybe_unused;
 #ifdef CONFIG_SMP
 	show_ipi_list(p, prec);
 #endif
-	seq_printf(p, "%*s: ", prec, "ERR");
-	seq_printf(p, "%10u\n", atomic_read(&irq_err_count));
+#if XTENSA_FAKE_NMI
+	seq_printf(p, "%*s:", prec, "NMI");
+	for_each_online_cpu(cpu)
+		seq_printf(p, " %10lu", per_cpu(nmi_count, cpu));
+	seq_puts(p, "   Non-maskable interrupts\n");
+#endif
 	return 0;
 }
 
@@ -106,6 +111,12 @@ int xtensa_irq_map(struct irq_domain *d, unsigned int irq,
 		irq_set_chip_and_handler_name(irq, irq_chip,
 				handle_percpu_irq, "timer");
 		irq_clear_status_flags(irq, IRQ_LEVEL);
+#ifdef XCHAL_INTTYPE_MASK_PROFILING
+	} else if (mask & XCHAL_INTTYPE_MASK_PROFILING) {
+		irq_set_chip_and_handler_name(irq, irq_chip,
+				handle_percpu_irq, "profiling");
+		irq_set_status_flags(irq, IRQ_LEVEL);
+#endif
 	} else {/* XCHAL_INTTYPE_MASK_WRITE_ERROR */
 		/* XCHAL_INTTYPE_MASK_NMI */
 		irq_set_chip_and_handler_name(irq, irq_chip,
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index e8b76b8e4b29..fb75ebf1463a 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -1,6 +1,4 @@
 /*
- * arch/xtensa/kernel/pci-dma.c
- *
  * DMA coherent memory allocation.
  *
  * This program is free software; you can redistribute  it and/or modify it
@@ -9,6 +7,7 @@
  * option) any later version.
  *
  * Copyright (C) 2002 - 2005 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  *
  * Based on version for i386.
  *
@@ -25,13 +24,107 @@
 #include <asm/io.h>
 #include <asm/cacheflush.h>
 
+void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+		    enum dma_data_direction dir)
+{
+	switch (dir) {
+	case DMA_BIDIRECTIONAL:
+		__flush_invalidate_dcache_range((unsigned long)vaddr, size);
+		break;
+
+	case DMA_FROM_DEVICE:
+		__invalidate_dcache_range((unsigned long)vaddr, size);
+		break;
+
+	case DMA_TO_DEVICE:
+		__flush_dcache_range((unsigned long)vaddr, size);
+		break;
+
+	case DMA_NONE:
+		BUG();
+		break;
+	}
+}
+EXPORT_SYMBOL(dma_cache_sync);
+
+static void xtensa_sync_single_for_cpu(struct device *dev,
+				       dma_addr_t dma_handle, size_t size,
+				       enum dma_data_direction dir)
+{
+	void *vaddr;
+
+	switch (dir) {
+	case DMA_BIDIRECTIONAL:
+	case DMA_FROM_DEVICE:
+		vaddr = bus_to_virt(dma_handle);
+		__invalidate_dcache_range((unsigned long)vaddr, size);
+		break;
+
+	case DMA_NONE:
+		BUG();
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void xtensa_sync_single_for_device(struct device *dev,
+					  dma_addr_t dma_handle, size_t size,
+					  enum dma_data_direction dir)
+{
+	void *vaddr;
+
+	switch (dir) {
+	case DMA_BIDIRECTIONAL:
+	case DMA_TO_DEVICE:
+		vaddr = bus_to_virt(dma_handle);
+		__flush_dcache_range((unsigned long)vaddr, size);
+		break;
+
+	case DMA_NONE:
+		BUG();
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void xtensa_sync_sg_for_cpu(struct device *dev,
+				   struct scatterlist *sg, int nents,
+				   enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		xtensa_sync_single_for_cpu(dev, sg_dma_address(s),
+					   sg_dma_len(s), dir);
+	}
+}
+
+static void xtensa_sync_sg_for_device(struct device *dev,
+				      struct scatterlist *sg, int nents,
+				      enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		xtensa_sync_single_for_device(dev, sg_dma_address(s),
+					      sg_dma_len(s), dir);
+	}
+}
+
 /*
  * Note: We assume that the full memory space is always mapped to 'kseg'
  *	 Otherwise we have to use page attributes (not implemented).
  */
 
-void *
-dma_alloc_coherent(struct device *dev,size_t size,dma_addr_t *handle,gfp_t flag)
+static void *xtensa_dma_alloc(struct device *dev, size_t size,
+			      dma_addr_t *handle, gfp_t flag,
+			      struct dma_attrs *attrs)
 {
 	unsigned long ret;
 	unsigned long uncached = 0;
@@ -52,20 +145,15 @@ dma_alloc_coherent(struct device *dev,size_t size,dma_addr_t *handle,gfp_t flag)
 	BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR ||
 	       ret > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1);
 
+	uncached = ret + XCHAL_KSEG_BYPASS_VADDR - XCHAL_KSEG_CACHED_VADDR;
+	*handle = virt_to_bus((void *)ret);
+	__invalidate_dcache_range(ret, size);
 
-	if (ret != 0) {
-		memset((void*) ret, 0, size);
-		uncached = ret+XCHAL_KSEG_BYPASS_VADDR-XCHAL_KSEG_CACHED_VADDR;
-		*handle = virt_to_bus((void*)ret);
-		__flush_invalidate_dcache_range(ret, size);
-	}
-
-	return (void*)uncached;
+	return (void *)uncached;
 }
-EXPORT_SYMBOL(dma_alloc_coherent);
 
-void dma_free_coherent(struct device *hwdev, size_t size,
-			 void *vaddr, dma_addr_t dma_handle)
+static void xtensa_dma_free(struct device *hwdev, size_t size, void *vaddr,
+			    dma_addr_t dma_handle, struct dma_attrs *attrs)
 {
 	unsigned long addr = (unsigned long)vaddr +
 		XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR;
@@ -75,24 +163,79 @@ void dma_free_coherent(struct device *hwdev, size_t size,
 
 	free_pages(addr, get_order(size));
 }
-EXPORT_SYMBOL(dma_free_coherent);
 
+static dma_addr_t xtensa_map_page(struct device *dev, struct page *page,
+				  unsigned long offset, size_t size,
+				  enum dma_data_direction dir,
+				  struct dma_attrs *attrs)
+{
+	dma_addr_t dma_handle = page_to_phys(page) + offset;
+
+	BUG_ON(PageHighMem(page));
+	xtensa_sync_single_for_device(dev, dma_handle, size, dir);
+	return dma_handle;
+}
 
-void consistent_sync(void *vaddr, size_t size, int direction)
+static void xtensa_unmap_page(struct device *dev, dma_addr_t dma_handle,
+			      size_t size, enum dma_data_direction dir,
+			      struct dma_attrs *attrs)
 {
-	switch (direction) {
-	case PCI_DMA_NONE:
-		BUG();
-	case PCI_DMA_FROMDEVICE:        /* invalidate only */
-		__invalidate_dcache_range((unsigned long)vaddr,
-				          (unsigned long)size);
-		break;
+	xtensa_sync_single_for_cpu(dev, dma_handle, size, dir);
+}
 
-	case PCI_DMA_TODEVICE:          /* writeback only */
-	case PCI_DMA_BIDIRECTIONAL:     /* writeback and invalidate */
-		__flush_invalidate_dcache_range((unsigned long)vaddr,
-				    		(unsigned long)size);
-		break;
+static int xtensa_map_sg(struct device *dev, struct scatterlist *sg,
+			 int nents, enum dma_data_direction dir,
+			 struct dma_attrs *attrs)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		s->dma_address = xtensa_map_page(dev, sg_page(s), s->offset,
+						 s->length, dir, attrs);
+	}
+	return nents;
+}
+
+static void xtensa_unmap_sg(struct device *dev,
+			    struct scatterlist *sg, int nents,
+			    enum dma_data_direction dir,
+			    struct dma_attrs *attrs)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		xtensa_unmap_page(dev, sg_dma_address(s),
+				  sg_dma_len(s), dir, attrs);
 	}
 }
-EXPORT_SYMBOL(consistent_sync);
+
+int xtensa_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return 0;
+}
+
+struct dma_map_ops xtensa_dma_map_ops = {
+	.alloc = xtensa_dma_alloc,
+	.free = xtensa_dma_free,
+	.map_page = xtensa_map_page,
+	.unmap_page = xtensa_unmap_page,
+	.map_sg = xtensa_map_sg,
+	.unmap_sg = xtensa_unmap_sg,
+	.sync_single_for_cpu = xtensa_sync_single_for_cpu,
+	.sync_single_for_device = xtensa_sync_single_for_device,
+	.sync_sg_for_cpu = xtensa_sync_sg_for_cpu,
+	.sync_sg_for_device = xtensa_sync_sg_for_device,
+	.mapping_error = xtensa_dma_mapping_error,
+};
+EXPORT_SYMBOL(xtensa_dma_map_ops);
+
+#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
+
+static int __init xtensa_dma_init(void)
+{
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+	return 0;
+}
+fs_initcall(xtensa_dma_init);
diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index b848cc3dc913..d27b4dcf221f 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -210,10 +210,6 @@ subsys_initcall(pcibios_init);
 
 void pcibios_fixup_bus(struct pci_bus *bus)
 {
-	if (bus->parent) {
-		/* This is a subordinate bridge */
-		pci_read_bridge_bases(bus);
-	}
 }
 
 void pcibios_set_master(struct pci_dev *dev)
diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c
new file mode 100644
index 000000000000..54f01188c29c
--- /dev/null
+++ b/arch/xtensa/kernel/perf_event.c
@@ -0,0 +1,454 @@
+/*
+ * Xtensa Performance Monitor Module driver
+ * See Tensilica Debug User's Guide for PMU registers documentation.
+ *
+ * Copyright (C) 2015 Cadence Design Systems Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+
+#include <asm/processor.h>
+#include <asm/stacktrace.h>
+
+/* Global control/status for all perf counters */
+#define XTENSA_PMU_PMG			0x1000
+/* Perf counter values */
+#define XTENSA_PMU_PM(i)		(0x1080 + (i) * 4)
+/* Perf counter control registers */
+#define XTENSA_PMU_PMCTRL(i)		(0x1100 + (i) * 4)
+/* Perf counter status registers */
+#define XTENSA_PMU_PMSTAT(i)		(0x1180 + (i) * 4)
+
+#define XTENSA_PMU_PMG_PMEN		0x1
+
+#define XTENSA_PMU_COUNTER_MASK		0xffffffffULL
+#define XTENSA_PMU_COUNTER_MAX		0x7fffffff
+
+#define XTENSA_PMU_PMCTRL_INTEN		0x00000001
+#define XTENSA_PMU_PMCTRL_KRNLCNT	0x00000008
+#define XTENSA_PMU_PMCTRL_TRACELEVEL	0x000000f0
+#define XTENSA_PMU_PMCTRL_SELECT_SHIFT	8
+#define XTENSA_PMU_PMCTRL_SELECT	0x00001f00
+#define XTENSA_PMU_PMCTRL_MASK_SHIFT	16
+#define XTENSA_PMU_PMCTRL_MASK		0xffff0000
+
+#define XTENSA_PMU_MASK(select, mask) \
+	(((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \
+	 ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \
+	 XTENSA_PMU_PMCTRL_TRACELEVEL | \
+	 XTENSA_PMU_PMCTRL_INTEN)
+
+#define XTENSA_PMU_PMSTAT_OVFL		0x00000001
+#define XTENSA_PMU_PMSTAT_INTASRT	0x00000010
+
+struct xtensa_pmu_events {
+	/* Array of events currently on this core */
+	struct perf_event *event[XCHAL_NUM_PERF_COUNTERS];
+	/* Bitmap of used hardware counters */
+	unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)];
+};
+static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events);
+
+static const u32 xtensa_hw_ctl[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= XTENSA_PMU_MASK(0, 0x1),
+	[PERF_COUNT_HW_INSTRUCTIONS]		= XTENSA_PMU_MASK(2, 0xffff),
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= XTENSA_PMU_MASK(10, 0x1),
+	[PERF_COUNT_HW_CACHE_MISSES]		= XTENSA_PMU_MASK(12, 0x1),
+	/* Taken and non-taken branches + taken loop ends */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= XTENSA_PMU_MASK(2, 0x490),
+	/* Instruction-related + other global stall cycles */
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= XTENSA_PMU_MASK(4, 0x1ff),
+	/* Data-related global stall cycles */
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= XTENSA_PMU_MASK(3, 0x1ff),
+};
+
+#define C(_x) PERF_COUNT_HW_CACHE_##_x
+
+static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(10, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(10, 0x2),
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(11, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(11, 0x2),
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(8, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(8, 0x2),
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(9, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(9, 0x8),
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(7, 0x1),
+			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(7, 0x8),
+		},
+	},
+};
+
+static int xtensa_pmu_cache_event(u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result;
+	int ret;
+
+	cache_type = (config >>  0) & 0xff;
+	cache_op = (config >>  8) & 0xff;
+	cache_result = (config >> 16) & 0xff;
+
+	if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) ||
+	    cache_op >= C(OP_MAX) ||
+	    cache_result >= C(RESULT_MAX))
+		return -EINVAL;
+
+	ret = xtensa_cache_ctl[cache_type][cache_op][cache_result];
+
+	if (ret == 0)
+		return -EINVAL;
+
+	return ret;
+}
+
+static inline uint32_t xtensa_pmu_read_counter(int idx)
+{
+	return get_er(XTENSA_PMU_PM(idx));
+}
+
+static inline void xtensa_pmu_write_counter(int idx, uint32_t v)
+{
+	set_er(v, XTENSA_PMU_PM(idx));
+}
+
+static void xtensa_perf_event_update(struct perf_event *event,
+				     struct hw_perf_event *hwc, int idx)
+{
+	uint64_t prev_raw_count, new_raw_count;
+	int64_t delta;
+
+	do {
+		prev_raw_count = local64_read(&hwc->prev_count);
+		new_raw_count = xtensa_pmu_read_counter(event->hw.idx);
+	} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+				 new_raw_count) != prev_raw_count);
+
+	delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK;
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+}
+
+static bool xtensa_perf_event_set_period(struct perf_event *event,
+					 struct hw_perf_event *hwc, int idx)
+{
+	bool rc = false;
+	s64 left;
+
+	if (!is_sampling_event(event)) {
+		left = XTENSA_PMU_COUNTER_MAX;
+	} else {
+		s64 period = hwc->sample_period;
+
+		left = local64_read(&hwc->period_left);
+		if (left <= -period) {
+			left = period;
+			local64_set(&hwc->period_left, left);
+			hwc->last_period = period;
+			rc = true;
+		} else if (left <= 0) {
+			left += period;
+			local64_set(&hwc->period_left, left);
+			hwc->last_period = period;
+			rc = true;
+		}
+		if (left > XTENSA_PMU_COUNTER_MAX)
+			left = XTENSA_PMU_COUNTER_MAX;
+	}
+
+	local64_set(&hwc->prev_count, -left);
+	xtensa_pmu_write_counter(idx, -left);
+	perf_event_update_userpage(event);
+
+	return rc;
+}
+
+static void xtensa_pmu_enable(struct pmu *pmu)
+{
+	set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
+}
+
+static void xtensa_pmu_disable(struct pmu *pmu)
+{
+	set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
+}
+
+static int xtensa_pmu_event_init(struct perf_event *event)
+{
+	int ret;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) ||
+		    xtensa_hw_ctl[event->attr.config] == 0)
+			return -EINVAL;
+		event->hw.config = xtensa_hw_ctl[event->attr.config];
+		return 0;
+
+	case PERF_TYPE_HW_CACHE:
+		ret = xtensa_pmu_cache_event(event->attr.config);
+		if (ret < 0)
+			return ret;
+		event->hw.config = ret;
+		return 0;
+
+	case PERF_TYPE_RAW:
+		/* Not 'previous counter' select */
+		if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) ==
+		    (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT))
+			return -EINVAL;
+		event->hw.config = (event->attr.config &
+				    (XTENSA_PMU_PMCTRL_KRNLCNT |
+				     XTENSA_PMU_PMCTRL_TRACELEVEL |
+				     XTENSA_PMU_PMCTRL_SELECT |
+				     XTENSA_PMU_PMCTRL_MASK)) |
+			XTENSA_PMU_PMCTRL_INTEN;
+		return 0;
+
+	default:
+		return -ENOENT;
+	}
+}
+
+/*
+ * Starts/Stops a counter present on the PMU. The PMI handler
+ * should stop the counter when perf_event_overflow() returns
+ * !0. ->start() will be used to continue.
+ */
+static void xtensa_pmu_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (WARN_ON_ONCE(idx == -1))
+		return;
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+		xtensa_perf_event_set_period(event, hwc, idx);
+	}
+
+	hwc->state = 0;
+
+	set_er(hwc->config, XTENSA_PMU_PMCTRL(idx));
+}
+
+static void xtensa_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		set_er(0, XTENSA_PMU_PMCTRL(idx));
+		set_er(get_er(XTENSA_PMU_PMSTAT(idx)),
+		       XTENSA_PMU_PMSTAT(idx));
+		hwc->state |= PERF_HES_STOPPED;
+	}
+
+	if ((flags & PERF_EF_UPDATE) &&
+	    !(event->hw.state & PERF_HES_UPTODATE)) {
+		xtensa_perf_event_update(event, &event->hw, idx);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+}
+
+/*
+ * Adds/Removes a counter to/from the PMU, can be done inside
+ * a transaction, see the ->*_txn() methods.
+ */
+static int xtensa_pmu_add(struct perf_event *event, int flags)
+{
+	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (__test_and_set_bit(idx, ev->used_mask)) {
+		idx = find_first_zero_bit(ev->used_mask,
+					  XCHAL_NUM_PERF_COUNTERS);
+		if (idx == XCHAL_NUM_PERF_COUNTERS)
+			return -EAGAIN;
+
+		__set_bit(idx, ev->used_mask);
+		hwc->idx = idx;
+	}
+	ev->event[idx] = event;
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		xtensa_pmu_start(event, PERF_EF_RELOAD);
+
+	perf_event_update_userpage(event);
+	return 0;
+}
+
+static void xtensa_pmu_del(struct perf_event *event, int flags)
+{
+	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
+
+	xtensa_pmu_stop(event, PERF_EF_UPDATE);
+	__clear_bit(event->hw.idx, ev->used_mask);
+	perf_event_update_userpage(event);
+}
+
+static void xtensa_pmu_read(struct perf_event *event)
+{
+	xtensa_perf_event_update(event, &event->hw, event->hw.idx);
+}
+
+static int callchain_trace(struct stackframe *frame, void *data)
+{
+	struct perf_callchain_entry *entry = data;
+
+	perf_callchain_store(entry, frame->pc);
+	return 0;
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry *entry,
+			   struct pt_regs *regs)
+{
+	xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH,
+				callchain_trace, NULL, entry);
+}
+
+void perf_callchain_user(struct perf_callchain_entry *entry,
+			 struct pt_regs *regs)
+{
+	xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH,
+			      callchain_trace, entry);
+}
+
+void perf_event_print_debug(void)
+{
+	unsigned long flags;
+	unsigned i;
+
+	local_irq_save(flags);
+	pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(),
+		get_er(XTENSA_PMU_PMG));
+	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i)
+		pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n",
+			i, get_er(XTENSA_PMU_PM(i)),
+			i, get_er(XTENSA_PMU_PMCTRL(i)),
+			i, get_er(XTENSA_PMU_PMSTAT(i)));
+	local_irq_restore(flags);
+}
+
+irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
+{
+	irqreturn_t rc = IRQ_NONE;
+	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
+	unsigned i;
+
+	for (i = find_first_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS);
+	     i < XCHAL_NUM_PERF_COUNTERS;
+	     i = find_next_bit(ev->used_mask, XCHAL_NUM_PERF_COUNTERS, i + 1)) {
+		uint32_t v = get_er(XTENSA_PMU_PMSTAT(i));
+		struct perf_event *event = ev->event[i];
+		struct hw_perf_event *hwc = &event->hw;
+		u64 last_period;
+
+		if (!(v & XTENSA_PMU_PMSTAT_OVFL))
+			continue;
+
+		set_er(v, XTENSA_PMU_PMSTAT(i));
+		xtensa_perf_event_update(event, hwc, i);
+		last_period = hwc->last_period;
+		if (xtensa_perf_event_set_period(event, hwc, i)) {
+			struct perf_sample_data data;
+			struct pt_regs *regs = get_irq_regs();
+
+			perf_sample_data_init(&data, 0, last_period);
+			if (perf_event_overflow(event, &data, regs))
+				xtensa_pmu_stop(event, 0);
+		}
+
+		rc = IRQ_HANDLED;
+	}
+	return rc;
+}
+
+static struct pmu xtensa_pmu = {
+	.pmu_enable = xtensa_pmu_enable,
+	.pmu_disable = xtensa_pmu_disable,
+	.event_init = xtensa_pmu_event_init,
+	.add = xtensa_pmu_add,
+	.del = xtensa_pmu_del,
+	.start = xtensa_pmu_start,
+	.stop = xtensa_pmu_stop,
+	.read = xtensa_pmu_read,
+};
+
+static void xtensa_pmu_setup(void)
+{
+	unsigned i;
+
+	set_er(0, XTENSA_PMU_PMG);
+	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) {
+		set_er(0, XTENSA_PMU_PMCTRL(i));
+		set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i));
+	}
+}
+
+static int xtensa_pmu_notifier(struct notifier_block *self,
+			       unsigned long action, void *data)
+{
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_STARTING:
+		xtensa_pmu_setup();
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int __init xtensa_pmu_init(void)
+{
+	int ret;
+	int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
+
+	perf_cpu_notifier(xtensa_pmu_notifier);
+#if XTENSA_FAKE_NMI
+	enable_irq(irq);
+#else
+	ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
+			  "pmu", NULL);
+	if (ret < 0)
+		return ret;
+#endif
+
+	ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
+	if (ret)
+		free_irq(irq, NULL);
+
+	return ret;
+}
+early_initcall(xtensa_pmu_init);
diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c
index 7d2c317bd98b..7538d802b65a 100644
--- a/arch/xtensa/kernel/stacktrace.c
+++ b/arch/xtensa/kernel/stacktrace.c
@@ -1,11 +1,12 @@
 /*
- * arch/xtensa/kernel/stacktrace.c
+ * Kernel and userspace stack tracing.
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
  * Copyright (C) 2001 - 2013 Tensilica Inc.
+ * Copyright (C) 2015 Cadence Design Systems Inc.
  */
 #include <linux/export.h>
 #include <linux/sched.h>
@@ -13,6 +14,170 @@
 
 #include <asm/stacktrace.h>
 #include <asm/traps.h>
+#include <asm/uaccess.h>
+
+#if IS_ENABLED(CONFIG_OPROFILE) || IS_ENABLED(CONFIG_PERF_EVENTS)
+
+/* Address of common_exception_return, used to check the
+ * transition from kernel to user space.
+ */
+extern int common_exception_return;
+
+/* A struct that maps to the part of the frame containing the a0 and
+ * a1 registers.
+ */
+struct frame_start {
+	unsigned long a0;
+	unsigned long a1;
+};
+
+void xtensa_backtrace_user(struct pt_regs *regs, unsigned int depth,
+			   int (*ufn)(struct stackframe *frame, void *data),
+			   void *data)
+{
+	unsigned long windowstart = regs->windowstart;
+	unsigned long windowbase = regs->windowbase;
+	unsigned long a0 = regs->areg[0];
+	unsigned long a1 = regs->areg[1];
+	unsigned long pc = regs->pc;
+	struct stackframe frame;
+	int index;
+
+	if (!depth--)
+		return;
+
+	frame.pc = pc;
+	frame.sp = a1;
+
+	if (pc == 0 || pc >= TASK_SIZE || ufn(&frame, data))
+		return;
+
+	/* Two steps:
+	 *
+	 * 1. Look through the register window for the
+	 * previous PCs in the call trace.
+	 *
+	 * 2. Look on the stack.
+	 */
+
+	/* Step 1.  */
+	/* Rotate WINDOWSTART to move the bit corresponding to
+	 * the current window to the bit #0.
+	 */
+	windowstart = (windowstart << WSBITS | windowstart) >> windowbase;
+
+	/* Look for bits that are set, they correspond to
+	 * valid windows.
+	 */
+	for (index = WSBITS - 1; (index > 0) && depth; depth--, index--)
+		if (windowstart & (1 << index)) {
+			/* Get the PC from a0 and a1. */
+			pc = MAKE_PC_FROM_RA(a0, pc);
+			/* Read a0 and a1 from the
+			 * corresponding position in AREGs.
+			 */
+			a0 = regs->areg[index * 4];
+			a1 = regs->areg[index * 4 + 1];
+
+			frame.pc = pc;
+			frame.sp = a1;
+
+			if (pc == 0 || pc >= TASK_SIZE || ufn(&frame, data))
+				return;
+		}
+
+	/* Step 2. */
+	/* We are done with the register window, we need to
+	 * look through the stack.
+	 */
+	if (!depth)
+		return;
+
+	/* Start from the a1 register. */
+	/* a1 = regs->areg[1]; */
+	while (a0 != 0 && depth--) {
+		struct frame_start frame_start;
+		/* Get the location for a1, a0 for the
+		 * previous frame from the current a1.
+		 */
+		unsigned long *psp = (unsigned long *)a1;
+
+		psp -= 4;
+
+		/* Check if the region is OK to access. */
+		if (!access_ok(VERIFY_READ, psp, sizeof(frame_start)))
+			return;
+		/* Copy a1, a0 from user space stack frame. */
+		if (__copy_from_user_inatomic(&frame_start, psp,
+					      sizeof(frame_start)))
+			return;
+
+		pc = MAKE_PC_FROM_RA(a0, pc);
+		a0 = frame_start.a0;
+		a1 = frame_start.a1;
+
+		frame.pc = pc;
+		frame.sp = a1;
+
+		if (pc == 0 || pc >= TASK_SIZE || ufn(&frame, data))
+			return;
+	}
+}
+EXPORT_SYMBOL(xtensa_backtrace_user);
+
+void xtensa_backtrace_kernel(struct pt_regs *regs, unsigned int depth,
+			     int (*kfn)(struct stackframe *frame, void *data),
+			     int (*ufn)(struct stackframe *frame, void *data),
+			     void *data)
+{
+	unsigned long pc = regs->depc > VALID_DOUBLE_EXCEPTION_ADDRESS ?
+		regs->depc : regs->pc;
+	unsigned long sp_start, sp_end;
+	unsigned long a0 = regs->areg[0];
+	unsigned long a1 = regs->areg[1];
+
+	sp_start = a1 & ~(THREAD_SIZE - 1);
+	sp_end = sp_start + THREAD_SIZE;
+
+	/* Spill the register window to the stack first. */
+	spill_registers();
+
+	/* Read the stack frames one by one and create the PC
+	 * from the a0 and a1 registers saved there.
+	 */
+	while (a1 > sp_start && a1 < sp_end && depth--) {
+		struct stackframe frame;
+		unsigned long *psp = (unsigned long *)a1;
+
+		frame.pc = pc;
+		frame.sp = a1;
+
+		if (kernel_text_address(pc) && kfn(&frame, data))
+			return;
+
+		if (pc == (unsigned long)&common_exception_return) {
+			regs = (struct pt_regs *)a1;
+			if (user_mode(regs)) {
+				if (ufn == NULL)
+					return;
+				xtensa_backtrace_user(regs, depth, ufn, data);
+				return;
+			}
+			a0 = regs->areg[0];
+			a1 = regs->areg[1];
+			continue;
+		}
+
+		sp_start = a1;
+
+		pc = MAKE_PC_FROM_RA(a0, pc);
+		a0 = *(psp - 4);
+		a1 = *(psp - 3);
+	}
+}
+EXPORT_SYMBOL(xtensa_backtrace_kernel);
+
+#endif
 
 void walk_stackframe(unsigned long *sp,
 		int (*fn)(struct stackframe *frame, void *data),
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 2a1823de69cc..b97767dbc7c8 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -52,8 +52,6 @@ static struct clocksource ccount_clocksource = {
 
 static int ccount_timer_set_next_event(unsigned long delta,
 		struct clock_event_device *dev);
-static void ccount_timer_set_mode(enum clock_event_mode mode,
-		struct clock_event_device *evt);
 struct ccount_timer {
 	struct clock_event_device evt;
 	int irq_enabled;
@@ -77,35 +75,34 @@ static int ccount_timer_set_next_event(unsigned long delta,
 	return ret;
 }
 
-static void ccount_timer_set_mode(enum clock_event_mode mode,
-		struct clock_event_device *evt)
+/*
+ * There is no way to disable the timer interrupt at the device level,
+ * only at the intenable register itself. Since enable_irq/disable_irq
+ * calls are nested, we need to make sure that these calls are
+ * balanced.
+ */
+static int ccount_timer_shutdown(struct clock_event_device *evt)
+{
+	struct ccount_timer *timer =
+		container_of(evt, struct ccount_timer, evt);
+
+	if (timer->irq_enabled) {
+		disable_irq(evt->irq);
+		timer->irq_enabled = 0;
+	}
+	return 0;
+}
+
+static int ccount_timer_set_oneshot(struct clock_event_device *evt)
 {
 	struct ccount_timer *timer =
 		container_of(evt, struct ccount_timer, evt);
 
-	/*
-	 * There is no way to disable the timer interrupt at the device level,
-	 * only at the intenable register itself. Since enable_irq/disable_irq
-	 * calls are nested, we need to make sure that these calls are
-	 * balanced.
-	 */
-	switch (mode) {
-	case CLOCK_EVT_MODE_SHUTDOWN:
-	case CLOCK_EVT_MODE_UNUSED:
-		if (timer->irq_enabled) {
-			disable_irq(evt->irq);
-			timer->irq_enabled = 0;
-		}
-		break;
-	case CLOCK_EVT_MODE_RESUME:
-	case CLOCK_EVT_MODE_ONESHOT:
-		if (!timer->irq_enabled) {
-			enable_irq(evt->irq);
-			timer->irq_enabled = 1;
-		}
-	default:
-		break;
+	if (!timer->irq_enabled) {
+		enable_irq(evt->irq);
+		timer->irq_enabled = 1;
 	}
+	return 0;
 }
 
 static irqreturn_t timer_interrupt(int irq, void *dev_id);
@@ -126,7 +123,9 @@ void local_timer_setup(unsigned cpu)
 	clockevent->features = CLOCK_EVT_FEAT_ONESHOT;
 	clockevent->rating = 300;
 	clockevent->set_next_event = ccount_timer_set_next_event;
-	clockevent->set_mode = ccount_timer_set_mode;
+	clockevent->set_state_shutdown = ccount_timer_shutdown;
+	clockevent->set_state_oneshot = ccount_timer_set_oneshot;
+	clockevent->tick_resume = ccount_timer_set_oneshot;
 	clockevent->cpumask = cpumask_of(cpu);
 	clockevent->irq = irq_create_mapping(NULL, LINUX_TIMER_INT);
 	if (WARN(!clockevent->irq, "error: can't map timer irq"))
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 9d2f45f010ef..42d441f7898b 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -62,6 +62,7 @@ extern void fast_coprocessor(void);
 
 extern void do_illegal_instruction (struct pt_regs*);
 extern void do_interrupt (struct pt_regs*);
+extern void do_nmi(struct pt_regs *);
 extern void do_unaligned_user (struct pt_regs*);
 extern void do_multihit (struct pt_regs*, unsigned long);
 extern void do_page_fault (struct pt_regs*, unsigned long);
@@ -146,6 +147,9 @@ COPROCESSOR(6),
 #if XTENSA_HAVE_COPROCESSOR(7)
 COPROCESSOR(7),
 #endif
+#if XTENSA_FAKE_NMI
+{ EXCCAUSE_MAPPED_NMI,			0,		do_nmi },
+#endif
 { EXCCAUSE_MAPPED_DEBUG,		0,		do_debug },
 { -1, -1, 0 }
 
@@ -199,6 +203,28 @@ void do_multihit(struct pt_regs *regs, unsigned long exccause)
 
 extern void do_IRQ(int, struct pt_regs *);
 
+#if XTENSA_FAKE_NMI
+
+irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id);
+
+DEFINE_PER_CPU(unsigned long, nmi_count);
+
+void do_nmi(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs;
+
+	if ((regs->ps & PS_INTLEVEL_MASK) < LOCKLEVEL)
+		trace_hardirqs_off();
+
+	old_regs = set_irq_regs(regs);
+	nmi_enter();
+	++*this_cpu_ptr(&nmi_count);
+	xtensa_pmu_irq_handler(0, NULL);
+	nmi_exit();
+	set_irq_regs(old_regs);
+}
+#endif
+
 void do_interrupt(struct pt_regs *regs)
 {
 	static const unsigned int_level_mask[] = {
@@ -211,8 +237,11 @@ void do_interrupt(struct pt_regs *regs)
 		XCHAL_INTLEVEL6_MASK,
 		XCHAL_INTLEVEL7_MASK,
 	};
-	struct pt_regs *old_regs = set_irq_regs(regs);
+	struct pt_regs *old_regs;
+
+	trace_hardirqs_off();
 
+	old_regs = set_irq_regs(regs);
 	irq_enter();
 
 	for (;;) {
diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S
index 1b397a902292..abcdb527f18a 100644
--- a/arch/xtensa/kernel/vectors.S
+++ b/arch/xtensa/kernel/vectors.S
@@ -627,7 +627,11 @@ ENTRY(_Level\level\()InterruptVector)
 	wsr	a0, excsave2
 	rsr	a0, epc\level
 	wsr	a0, epc1
+	.if	\level <= LOCKLEVEL
 	movi	a0, EXCCAUSE_LEVEL1_INTERRUPT
+	.else
+	movi	a0, EXCCAUSE_MAPPED_NMI
+	.endif
 	wsr	a0, exccause
 	rsr	a0, eps\level
 					# branch to user or kernel vector
@@ -682,11 +686,13 @@ ENDPROC(_WindowOverflow4)
 	.align 4
 _SimulateUserKernelVectorException:
 	addi	a0, a0, (1 << PS_EXCM_BIT)
+#if !XTENSA_FAKE_NMI
 	wsr	a0, ps
+#endif
 	bbsi.l	a0, PS_UM_BIT, 1f	# branch if user mode
-	rsr	a0, excsave2		# restore a0
+	xsr	a0, excsave2		# restore a0
 	j	_KernelExceptionVector	# simulate kernel vector exception
-1:	rsr	a0, excsave2		# restore a0
+1:	xsr	a0, excsave2		# restore a0
 	j	_UserExceptionVector	# simulate user vector exception
 #endif