From b2f680380ddf2f003882e59e00acd6c1952f91fc Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@kvack.org>
Date: Wed, 9 Mar 2016 15:05:56 -0500
Subject: x86/mm/32: Add support for 64-bit __get_user() on 32-bit kernels

The existing __get_user() implementation does not support fetching
64-bit values on 32-bit x86.  Implement this in a way that does not
generate any incorrect warnings as cautioned by Russell King.

Test code available at:

  http://www.kvack.org/~bcrl/x86_32-get_user.tar .

Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/uaccess.h | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a969ae607be8..8b3fb76b489b 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -333,7 +333,26 @@ do {									\
 } while (0)
 
 #ifdef CONFIG_X86_32
-#define __get_user_asm_u64(x, ptr, retval, errret)	(x) = __get_user_bad()
+#define __get_user_asm_u64(x, ptr, retval, errret)			\
+({									\
+	__typeof__(ptr) __ptr = (ptr);					\
+	asm volatile(ASM_STAC "\n"					\
+		     "1:	movl %2,%%eax\n"			\
+		     "2:	movl %3,%%edx\n"			\
+		     "3: " ASM_CLAC "\n"				\
+		     ".section .fixup,\"ax\"\n"				\
+		     "4:	mov %4,%0\n"				\
+		     "	xorl %%eax,%%eax\n"				\
+		     "	xorl %%edx,%%edx\n"				\
+		     "	jmp 3b\n"					\
+		     ".previous\n"					\
+		     _ASM_EXTABLE(1b, 4b)				\
+		     _ASM_EXTABLE(2b, 4b)				\
+		     : "=r" (retval), "=A"(x)				\
+		     : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1),	\
+		       "i" (errret), "0" (retval));			\
+})
+
 #define __get_user_asm_ex_u64(x, ptr)			(x) = __get_user_bad()
 #else
 #define __get_user_asm_u64(x, ptr, retval, errret) \
@@ -420,7 +439,7 @@ do {									\
 #define __get_user_nocheck(x, ptr, size)				\
 ({									\
 	int __gu_err;							\
-	unsigned long __gu_val;						\
+	__inttype(*(ptr)) __gu_val;					\
 	__uaccess_begin();						\
 	__get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT);	\
 	__uaccess_end();						\
-- 
cgit v1.2.3


From 3282e6b8f89eaeaf4915ee6cc57bcf06d1d6cead Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Wed, 4 May 2016 17:50:59 +0100
Subject: x86/topology: Remove redundant ENABLE_TOPO_DEFINES

Commit c8e56d20f2d1 ("x86: Kill CONFIG_X86_HT") removed CONFIG_X86_HT
and defined ENABLE_TOPO_DEFINES always if CONFIG_SMP, which makes
ENABLE_TOPO_DEFINES redundant.

This patch removes the redundant ENABLE_TOPO_DEFINES and instead uses
CONFIG_SMP directly

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1462380659-5968-1-git-send-email-sudeep.holla@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/topology.h | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 7f991bd5031b..c9a4ed73aef4 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -25,16 +25,6 @@
 #ifndef _ASM_X86_TOPOLOGY_H
 #define _ASM_X86_TOPOLOGY_H
 
-#ifdef CONFIG_X86_32
-# ifdef CONFIG_SMP
-#  define ENABLE_TOPO_DEFINES
-# endif
-#else
-# ifdef CONFIG_SMP
-#  define ENABLE_TOPO_DEFINES
-# endif
-#endif
-
 /*
  * to preserve the visibility of NUMA_NO_NODE definition,
  * moved to there from here.  May be used independent of
@@ -123,7 +113,7 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
 #define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
 #define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
 
-#ifdef ENABLE_TOPO_DEFINES
+#ifdef CONFIG_SMP
 #define topology_core_cpumask(cpu)		(per_cpu(cpu_core_map, cpu))
 #define topology_sibling_cpumask(cpu)		(per_cpu(cpu_sibling_map, cpu))
 
-- 
cgit v1.2.3


From f0133acc7d4835cfbb86393b7d2a4fba7519585b Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sun, 8 May 2016 20:58:40 +0200
Subject: x86/cpu: Correct comments and messages in P4 erratum 037 handling
 code

Remove the linebreak in the conditional and s/errata/erratum/ as the
singular is "erratum".

No functionality change.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1462733920-7224-1-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/intel.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index f71a34944b56..5354080f76c3 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -263,15 +263,14 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
 	}
 
 	/*
-	 * P4 Xeon errata 037 workaround.
+	 * P4 Xeon erratum 037 workaround.
 	 * Hardware prefetcher may cause stale data to be loaded into the cache.
 	 */
 	if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
 		if (msr_set_bit(MSR_IA32_MISC_ENABLE,
-				MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
-		    > 0) {
+				MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) {
 			pr_info("CPU: C0 stepping P4 Xeon detected.\n");
-			pr_info("CPU: Disabling hardware prefetching (Errata 037)\n");
+			pr_info("CPU: Disabling hardware prefetching (Erratum 037)\n");
 		}
 	}
 
-- 
cgit v1.2.3


From 67d7a982bab6702d84415ea889996fae72a7d3b2 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Tue, 10 May 2016 23:07:02 +0200
Subject: x86/extable: Ensure entries are swapped completely when sorting

The x86 exception table sorting was changed in this recent commit:

  29934b0fb8ff ("x86/extable: use generic search and sort routines")

... to use the arch independent code in lib/extable.c. However, the
patch was mangled somehow on its way into the kernel from the last
version posted at:

  https://lkml.org/lkml/2016/1/27/232

The committed version kind of attempted to incorporate the changes of
contemporary commit done in the x86 tree:

  548acf19234d ("x86/mm: Expand the exception table logic to allow new handling options")

... as in _completely_ _ignoring_ the x86 specific 'handler' member of
struct exception_table_entry. This effectively broke the sorting as
entries will only be partly swapped now.

Fortunately, the x86 Kconfig selects BUILDTIME_EXTABLE_SORT, so the
exception table doesn't need to be sorted at runtime. However, in case
that ever changes, we better not break the exception table sorting just
because of that.

Fix this by providing a swap_ex_entry_fixup() macro that takes care of
the 'handler' member.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1462914422-2911-1-git-send-email-minipli@googlemail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/uaccess.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 8b3fb76b489b..86c48f359686 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -108,6 +108,14 @@ struct exception_table_entry {
 
 #define ARCH_HAS_RELATIVE_EXTABLE
 
+#define swap_ex_entry_fixup(a, b, tmp, delta)			\
+	do {							\
+		(a)->fixup = (b)->fixup + (delta);		\
+		(b)->fixup = (tmp).fixup - (delta);		\
+		(a)->handler = (b)->handler + (delta);		\
+		(b)->handler = (tmp).handler - (delta);		\
+	} while (0)
+
 extern int fixup_exception(struct pt_regs *regs, int trapnr);
 extern bool ex_has_fault_handler(unsigned long ip);
 extern int early_fixup_exception(unsigned long *ip);
-- 
cgit v1.2.3


From 20f362785869196fb61a76661a48321169a9046e Mon Sep 17 00:00:00 2001
From: Lukasz Odzioba <lukasz.odzioba@intel.com>
Date: Mon, 16 May 2016 23:16:18 +0200
Subject: perf/x86/intel: Add 'static' keyword to locally used arrays

Add the 'static' keyword to intel_bdw_event_constraints[], snb_events_attrs[],
nhm_events_attrs[] and intel_skl_event_constraints arrays[], because
they are only used locally.

Signed-off-by: Lukasz Odzioba <lukasz.odzioba@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: akpm@linux-foundation.org
Cc: hpa@zytor.com
Cc: kan.liang@intel.com
Cc: lukasz.anaczkowski@intel.com
Cc: zheng.z.yan@intel.com
Link: http://lkml.kernel.org/r/1463433378-16816-1-git-send-email-lukasz.odzioba@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7c666958a625..ad08caf1a1b6 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -177,7 +177,7 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
 	EVENT_CONSTRAINT_END
 };
 
-struct event_constraint intel_skl_event_constraints[] = {
+static struct event_constraint intel_skl_event_constraints[] = {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
 	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
@@ -225,12 +225,12 @@ EVENT_ATTR_STR(mem-loads,	mem_ld_nhm,	"event=0x0b,umask=0x10,ldlat=3");
 EVENT_ATTR_STR(mem-loads,	mem_ld_snb,	"event=0xcd,umask=0x1,ldlat=3");
 EVENT_ATTR_STR(mem-stores,	mem_st_snb,	"event=0xcd,umask=0x2");
 
-struct attribute *nhm_events_attrs[] = {
+static struct attribute *nhm_events_attrs[] = {
 	EVENT_PTR(mem_ld_nhm),
 	NULL,
 };
 
-struct attribute *snb_events_attrs[] = {
+static struct attribute *snb_events_attrs[] = {
 	EVENT_PTR(mem_ld_snb),
 	EVENT_PTR(mem_st_snb),
 	NULL,
@@ -258,7 +258,7 @@ static struct event_constraint intel_hsw_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
-struct event_constraint intel_bdw_event_constraints[] = {
+static struct event_constraint intel_bdw_event_constraints[] = {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
 	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
-- 
cgit v1.2.3


From 9c489fce7a4a46c8a408e16e126bf3225401c7b5 Mon Sep 17 00:00:00 2001
From: Lukasz Odzioba <lukasz.odzioba@intel.com>
Date: Mon, 16 May 2016 23:16:59 +0200
Subject: perf/x86/intel: Change offcore response masks for Knights Landing

Due to change in register definition we need to update OCR mask:

MSR_OFFCORE_RESP0 reserved bits: 3,4,18,29,30,33,34, 8,11,14
MSR_OFFCORE_RESP1 reserved bits: 3,4,18,29,30,33,34, 38

Reported-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Lukasz Odzioba <lukasz.odzioba@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: akpm@linux-foundation.org
Cc: hpa@zytor.com
Cc: kan.liang@intel.com
Cc: lukasz.anaczkowski@intel.com
Cc: zheng.z.yan@intel.com
Link: http://lkml.kernel.org/r/1463433419-16893-1-git-send-email-lukasz.odzioba@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index ad08caf1a1b6..0941f846cc71 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -186,10 +186,8 @@ static struct event_constraint intel_skl_event_constraints[] = {
 };
 
 static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
-	INTEL_UEVENT_EXTRA_REG(0x01b7,
-			       MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0),
-	INTEL_UEVENT_EXTRA_REG(0x02b7,
-			       MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1),
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1),
 	EVENT_EXTRA_END
 };
 
-- 
cgit v1.2.3


From a54fa07930c0f7db55ecb4cc16b86d74101332c0 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Sun, 15 May 2016 23:18:24 -0700
Subject: perf/x86/intel/uncore: Locate specific box by checking full device
 info

Some platforms, e.g. Knights Landing, use a common PCI device ID for
multiple instances of an uncore PMU device type. So it is impossible to
locate the specific instances only by PCI device ID.

The current code specially handles Knights Landing by arbitrarily pointing
an instance to an unused uncore box. However, we still have no idea
which uncore device is mapped to which box.

Furthermore, there could be more platforms which use a common PCI device ID
for uncore devices. We have to specially handle them one by one.

This patch records full device information (slot, func, and device ID)
in id_table[]. So the probe function can point the instance to a specific
uncore box by checking the full device information.

Tested-by: Lukasz Odzioba <lukasz.odzioba@intel.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: tglx@linutronix.de
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: bp@suse.de
Cc: harish.chegondi@intel.com
Cc: hubert.chrzaniuk@intel.com
Cc: lawrence.f.meadows@intel.com
Link: http://lkml.kernel.org/r/1463379504-39003-1-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/uncore.c       | 43 +++++++++++-----
 arch/x86/events/intel/uncore.h       |  4 ++
 arch/x86/events/intel/uncore_snbep.c | 96 +++++++++++++++++++++++++++++++++---
 3 files changed, 122 insertions(+), 21 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index fce74062d981..65490589e52e 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -882,7 +882,7 @@ uncore_types_init(struct intel_uncore_type **types, bool setid)
 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct intel_uncore_type *type;
-	struct intel_uncore_pmu *pmu;
+	struct intel_uncore_pmu *pmu = NULL;
 	struct intel_uncore_box *box;
 	int phys_id, pkg, ret;
 
@@ -903,20 +903,37 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
 	}
 
 	type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+
 	/*
-	 * for performance monitoring unit with multiple boxes,
-	 * each box has a different function id.
+	 * Some platforms, e.g.  Knights Landing, use a common PCI device ID
+	 * for multiple instances of an uncore PMU device type. We should check
+	 * PCI slot and func to indicate the uncore box.
 	 */
-	pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
-	/* Knights Landing uses a common PCI device ID for multiple instances of
-	 * an uncore PMU device type. There is only one entry per device type in
-	 * the knl_uncore_pci_ids table inspite of multiple devices present for
-	 * some device types. Hence PCI device idx would be 0 for all devices.
-	 * So increment pmu pointer to point to an unused array element.
-	 */
-	if (boot_cpu_data.x86_model == 87) {
-		while (pmu->func_id >= 0)
-			pmu++;
+	if (id->driver_data & ~0xffff) {
+		struct pci_driver *pci_drv = pdev->driver;
+		const struct pci_device_id *ids = pci_drv->id_table;
+		unsigned int devfn;
+
+		while (ids && ids->vendor) {
+			if ((ids->vendor == pdev->vendor) &&
+			    (ids->device == pdev->device)) {
+				devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
+						  UNCORE_PCI_DEV_FUNC(ids->driver_data));
+				if (devfn == pdev->devfn) {
+					pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
+					break;
+				}
+			}
+			ids++;
+		}
+		if (pmu == NULL)
+			return -ENODEV;
+	} else {
+		/*
+		 * for performance monitoring unit with multiple boxes,
+		 * each box has a different function id.
+		 */
+		pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
 	}
 
 	if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 79766b9a3580..66c3a3657a10 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -15,7 +15,11 @@
 #define UNCORE_PMC_IDX_FIXED		UNCORE_PMC_IDX_MAX_GENERIC
 #define UNCORE_PMC_IDX_MAX		(UNCORE_PMC_IDX_FIXED + 1)
 
+#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx)	\
+		((dev << 24) | (func << 16) | (type << 8) | idx)
 #define UNCORE_PCI_DEV_DATA(type, idx)	((type << 8) | idx)
+#define UNCORE_PCI_DEV_DEV(data)	((data >> 24) & 0xff)
+#define UNCORE_PCI_DEV_FUNC(data)	((data >> 16) & 0xff)
 #define UNCORE_PCI_DEV_TYPE(data)	((data >> 8) & 0xff)
 #define UNCORE_PCI_DEV_IDX(data)	(data & 0xff)
 #define UNCORE_EXTRA_PCI_DEV		0xff
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index b2625867ebd1..7336e55c248c 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2164,21 +2164,101 @@ static struct intel_uncore_type *knl_pci_uncores[] = {
 */
 
 static const struct pci_device_id knl_uncore_pci_ids[] = {
-	{ /* MC UClk */
+	{ /* MC0 UClk */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
-		.driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 0, KNL_PCI_UNCORE_MC_UCLK, 0),
 	},
-	{ /* MC DClk Channel */
+	{ /* MC1 UClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 0, KNL_PCI_UNCORE_MC_UCLK, 1),
+	},
+	{ /* MC0 DClk CH 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 2, KNL_PCI_UNCORE_MC_DCLK, 0),
+	},
+	{ /* MC0 DClk CH 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 3, KNL_PCI_UNCORE_MC_DCLK, 1),
+	},
+	{ /* MC0 DClk CH 2 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 4, KNL_PCI_UNCORE_MC_DCLK, 2),
+	},
+	{ /* MC1 DClk CH 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 2, KNL_PCI_UNCORE_MC_DCLK, 3),
+	},
+	{ /* MC1 DClk CH 1 */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
-		.driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 3, KNL_PCI_UNCORE_MC_DCLK, 4),
+	},
+	{ /* MC1 DClk CH 2 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 4, KNL_PCI_UNCORE_MC_DCLK, 5),
+	},
+	{ /* EDC0 UClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, KNL_PCI_UNCORE_EDC_UCLK, 0),
+	},
+	{ /* EDC1 UClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, KNL_PCI_UNCORE_EDC_UCLK, 1),
+	},
+	{ /* EDC2 UClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(17, 0, KNL_PCI_UNCORE_EDC_UCLK, 2),
+	},
+	{ /* EDC3 UClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, KNL_PCI_UNCORE_EDC_UCLK, 3),
 	},
-	{ /* EDC UClk */
+	{ /* EDC4 UClk */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
-		.driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(19, 0, KNL_PCI_UNCORE_EDC_UCLK, 4),
+	},
+	{ /* EDC5 UClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(20, 0, KNL_PCI_UNCORE_EDC_UCLK, 5),
+	},
+	{ /* EDC6 UClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 0, KNL_PCI_UNCORE_EDC_UCLK, 6),
+	},
+	{ /* EDC7 UClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 0, KNL_PCI_UNCORE_EDC_UCLK, 7),
+	},
+	{ /* EDC0 EClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(24, 2, KNL_PCI_UNCORE_EDC_ECLK, 0),
+	},
+	{ /* EDC1 EClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(25, 2, KNL_PCI_UNCORE_EDC_ECLK, 1),
+	},
+	{ /* EDC2 EClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(26, 2, KNL_PCI_UNCORE_EDC_ECLK, 2),
+	},
+	{ /* EDC3 EClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(27, 2, KNL_PCI_UNCORE_EDC_ECLK, 3),
+	},
+	{ /* EDC4 EClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(28, 2, KNL_PCI_UNCORE_EDC_ECLK, 4),
+	},
+	{ /* EDC5 EClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(29, 2, KNL_PCI_UNCORE_EDC_ECLK, 5),
+	},
+	{ /* EDC6 EClk */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(30, 2, KNL_PCI_UNCORE_EDC_ECLK, 6),
 	},
-	{ /* EDC EClk */
+	{ /* EDC7 EClk */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835),
-		.driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(31, 2, KNL_PCI_UNCORE_EDC_ECLK, 7),
 	},
 	{ /* M2PCIe */
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817),
-- 
cgit v1.2.3


From 70b8301f6b8f7bc053377a9cbd0c4e42e29d9807 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 19 May 2016 17:09:55 -0700
Subject: x86/topology: Add topology_max_smt_threads()

For SMT specific workarounds it is useful to know if SMT is active
on any online CPU in the system. This currently requires a loop
over all online CPUs.

Add a global variable that is updated with the maximum number
of smt threads on any CPU on online/offline, and use it for
topology_max_smt_threads()

The single call is easier to use than a loop.

Not exported to user space because user space already can use
the existing sibling interfaces to find this out.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: http://lkml.kernel.org/r/1463703002-19686-2-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/topology.h |  9 +++++++++
 arch/x86/kernel/smpboot.c       | 25 ++++++++++++++++++++++++-
 2 files changed, 33 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 7f991bd5031b..e346572841a0 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -129,6 +129,14 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
 
 extern unsigned int __max_logical_packages;
 #define topology_max_packages()			(__max_logical_packages)
+
+extern int __max_smt_threads;
+
+static inline int topology_max_smt_threads(void)
+{
+	return __max_smt_threads;
+}
+
 int topology_update_package_map(unsigned int apicid, unsigned int cpu);
 extern int topology_phys_to_logical_pkg(unsigned int pkg);
 #else
@@ -136,6 +144,7 @@ extern int topology_phys_to_logical_pkg(unsigned int pkg);
 static inline int
 topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
 static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
+static inline int topology_max_smt_threads(void) { return 1; }
 #endif
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fafe8b923cac..2ed0ec1353f8 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -105,6 +105,9 @@ static unsigned int max_physical_pkg_id __read_mostly;
 unsigned int __max_logical_packages __read_mostly;
 EXPORT_SYMBOL(__max_logical_packages);
 
+/* Maximum number of SMT threads on any online core */
+int __max_smt_threads __read_mostly;
+
 static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
 {
 	unsigned long flags;
@@ -493,7 +496,7 @@ void set_cpu_sibling_map(int cpu)
 	bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct cpuinfo_x86 *o;
-	int i;
+	int i, threads;
 
 	cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
 
@@ -550,6 +553,10 @@ void set_cpu_sibling_map(int cpu)
 		if (match_die(c, o) && !topology_same_node(c, o))
 			primarily_use_numa_for_topology();
 	}
+
+	threads = cpumask_weight(topology_sibling_cpumask(cpu));
+	if (threads > __max_smt_threads)
+		__max_smt_threads = threads;
 }
 
 /* maps the cpu to the sched domain representing multi-core */
@@ -1441,6 +1448,21 @@ __init void prefill_possible_map(void)
 
 #ifdef CONFIG_HOTPLUG_CPU
 
+/* Recompute SMT state for all CPUs on offline */
+static void recompute_smt_state(void)
+{
+	int max_threads, cpu;
+
+	max_threads = 0;
+	for_each_online_cpu (cpu) {
+		int threads = cpumask_weight(topology_sibling_cpumask(cpu));
+
+		if (threads > max_threads)
+			max_threads = threads;
+	}
+	__max_smt_threads = max_threads;
+}
+
 static void remove_siblinginfo(int cpu)
 {
 	int sibling;
@@ -1465,6 +1487,7 @@ static void remove_siblinginfo(int cpu)
 	c->phys_proc_id = 0;
 	c->cpu_core_id = 0;
 	cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
+	recompute_smt_state();
 }
 
 static void remove_cpu_from_maps(int cpu)
-- 
cgit v1.2.3


From fc07e9f983b4b11922c22b6cccadc1f342f05a4c Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 19 May 2016 17:09:56 -0700
Subject: perf/x86: Support sysfs files depending on SMT status

Add a way to show different sysfs events attributes depending on
HyperThreading is on or off. This is difficult to determine
early at boot, so we just do it dynamically when the sysfs
attribute is read.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: http://lkml.kernel.org/r/1463703002-19686-3-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c       | 23 +++++++++++++++++++++++
 arch/x86/events/perf_event.h | 10 ++++++++++
 include/linux/perf_event.h   |  7 +++++++
 3 files changed, 40 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 33787ee817f0..929655db5084 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1622,6 +1622,29 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, cha
 }
 EXPORT_SYMBOL_GPL(events_sysfs_show);
 
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+			  char *page)
+{
+	struct perf_pmu_events_ht_attr *pmu_attr =
+		container_of(attr, struct perf_pmu_events_ht_attr, attr);
+
+	/*
+	 * Report conditional events depending on Hyper-Threading.
+	 *
+	 * This is overly conservative as usually the HT special
+	 * handling is not needed if the other CPU thread is idle.
+	 *
+	 * Note this does not (and cannot) handle the case when thread
+	 * siblings are invisible, for example with virtualization
+	 * if they are owned by some other guest.  The user tool
+	 * has to re-read when a thread sibling gets onlined later.
+	 */
+	return sprintf(page, "%s",
+			topology_max_smt_threads() > 1 ?
+			pmu_attr->event_str_ht :
+			pmu_attr->event_str_noht);
+}
+
 EVENT_ATTR(cpu-cycles,			CPU_CYCLES		);
 EVENT_ATTR(instructions,		INSTRUCTIONS		);
 EVENT_ATTR(cache-references,		CACHE_REFERENCES	);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8bd764df815d..e2d7285a2dac 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -668,6 +668,14 @@ static struct perf_pmu_events_attr event_attr_##v = {			\
 	.event_str	= str,						\
 };
 
+#define EVENT_ATTR_STR_HT(_name, v, noht, ht)				\
+static struct perf_pmu_events_ht_attr event_attr_##v = {		\
+	.attr		= __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\
+	.id		= 0,						\
+	.event_str_noht	= noht,						\
+	.event_str_ht	= ht,						\
+}
+
 extern struct x86_pmu x86_pmu __read_mostly;
 
 static inline bool x86_pmu_has_lbr_callstack(void)
@@ -803,6 +811,8 @@ struct attribute **merge_attr(struct attribute **a, struct attribute **b);
 
 ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
 			  char *page);
+ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
+			  char *page);
 
 #ifdef CONFIG_CPU_SUP_AMD
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 92e9ce737432..a7593d653b40 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1334,6 +1334,13 @@ struct perf_pmu_events_attr {
 	const char *event_str;
 };
 
+struct perf_pmu_events_ht_attr {
+	struct device_attribute			attr;
+	u64					id;
+	const char				*event_str_ht;
+	const char				*event_str_noht;
+};
+
 ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
 			      char *page);
 
-- 
cgit v1.2.3


From a39fcae7a83629312cc06cee7a745b9a8203327f Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 19 May 2016 17:09:57 -0700
Subject: perf/x86/intel: Add topdown events to Intel Core

Add declarations for the events needed for topdown to the
Intel big core CPUs starting with Sandy Bridge. We need
to report different values if HyperThreading is on or off.

The only thing this patch does is to export some events
in sysfs.

topdown level 1 uses a set of abstracted metrics which
are generic to out of order CPU cores (although some
CPUs may not implement all of them):

topdown-total-slots	  Available slots in the pipeline
topdown-slots-issued	  Slots issued into the pipeline
topdown-slots-retired	  Slots successfully retired
topdown-fetch-bubbles	  Pipeline gaps in the frontend
topdown-recovery-bubbles  Pipeline gaps during recovery
			  from misspeculation

A slot is a single operation in the CPU pipe line.

These metrics then allow to compute four useful metrics:
FrontendBound, BackendBound, Retiring, BadSpeculation.

The formulas to compute the metrics are generic, they
only change based on the availability on the abstracted
input values.

The kernel declares the events supported by the current
CPU and their scaling factors (such as the pipeline width)
and perf stat then computes the formulas based on the
available metrics.  This is similar how existing
perf metrics, such as TSC metrics or IPC, are implemented.

This abstracts all CPU pipe line specific knowledge in the
kernel driver, but still avoids the need for larger scale perf
interface changes.

For HyperThreading the any bit is needed to get accurate
values when both threads are executing. This implies that
the events can only be collected as root or with
perf_event_paranoid=-1 for now.

The basic scheme is based on the following paper:
  Yasin, A Top Down Method for Performance analysis and Counter architecture ISPASS14 (pdf available via google)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: http://lkml.kernel.org/r/1463703002-19686-4-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 50 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 0941f846cc71..4f51bc44b89f 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -228,9 +228,46 @@ static struct attribute *nhm_events_attrs[] = {
 	NULL,
 };
 
+/*
+ * topdown events for Intel Core CPUs.
+ *
+ * The events are all in slots, which is a free slot in a 4 wide
+ * pipeline. Some events are already reported in slots, for cycle
+ * events we multiply by the pipeline width (4).
+ *
+ * With Hyper Threading on, topdown metrics are either summed or averaged
+ * between the threads of a core: (count_t0 + count_t1).
+ *
+ * For the average case the metric is always scaled to pipeline width,
+ * so we use factor 2 ((count_t0 + count_t1) / 2 * 4)
+ */
+
+EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots,
+	"event=0x3c,umask=0x0",			/* cpu_clk_unhalted.thread */
+	"event=0x3c,umask=0x0,any=1");		/* cpu_clk_unhalted.thread_any */
+EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2");
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued,
+	"event=0xe,umask=0x1");			/* uops_issued.any */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired,
+	"event=0xc2,umask=0x2");		/* uops_retired.retire_slots */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles,
+	"event=0x9c,umask=0x1");		/* idq_uops_not_delivered_core */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
+	"event=0xd,umask=0x3,cmask=1",		/* int_misc.recovery_cycles */
+	"event=0xd,umask=0x3,cmask=1,any=1");	/* int_misc.recovery_cycles_any */
+EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
+	"4", "2");
+
 static struct attribute *snb_events_attrs[] = {
 	EVENT_PTR(mem_ld_snb),
 	EVENT_PTR(mem_st_snb),
+	EVENT_PTR(td_slots_issued),
+	EVENT_PTR(td_slots_retired),
+	EVENT_PTR(td_fetch_bubbles),
+	EVENT_PTR(td_total_slots),
+	EVENT_PTR(td_total_slots_scale),
+	EVENT_PTR(td_recovery_bubbles),
+	EVENT_PTR(td_recovery_bubbles_scale),
 	NULL,
 };
 
@@ -3435,6 +3472,13 @@ static struct attribute *hsw_events_attrs[] = {
 	EVENT_PTR(cycles_ct),
 	EVENT_PTR(mem_ld_hsw),
 	EVENT_PTR(mem_st_hsw),
+	EVENT_PTR(td_slots_issued),
+	EVENT_PTR(td_slots_retired),
+	EVENT_PTR(td_fetch_bubbles),
+	EVENT_PTR(td_total_slots),
+	EVENT_PTR(td_total_slots_scale),
+	EVENT_PTR(td_recovery_bubbles),
+	EVENT_PTR(td_recovery_bubbles_scale),
 	NULL
 };
 
@@ -3803,6 +3847,12 @@ __init int intel_pmu_init(void)
 		memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
 		intel_pmu_lbr_init_skl();
 
+		/* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */
+		event_attr_td_recovery_bubbles.event_str_noht =
+			"event=0xd,umask=0x1,cmask=1";
+		event_attr_td_recovery_bubbles.event_str_ht =
+			"event=0xd,umask=0x1,cmask=1,any=1";
+
 		x86_pmu.event_constraints = intel_skl_event_constraints;
 		x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_skl_extra_regs;
-- 
cgit v1.2.3


From eb12b8ece71cfd4c96df37198b9903fc639768d8 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 19 May 2016 17:09:58 -0700
Subject: perf/x86/intel: Add topdown events to Intel Atom

Add topdown event declarations to Silvermont / Airmont.
These cores do not support the full Top Down metrics, but an useful
subset (FrontendBound, Retiring, Backend Bound/Bad Speculation).

The perf stat tool automatically handles the missing events
and combines the available metrics.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: http://lkml.kernel.org/r/1463703002-19686-5-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 4f51bc44b89f..593b1676b5d1 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1367,6 +1367,29 @@ static __initconst const u64 atom_hw_cache_event_ids
  },
 };
 
+EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c");
+EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2");
+/* no_alloc_cycles.not_delivered */
+EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm,
+	       "event=0xca,umask=0x50");
+EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm,
+	       "event=0xc2,umask=0x10");
+/* uops_retired.all */
+EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm,
+	       "event=0xc2,umask=0x10");
+
+static struct attribute *slm_events_attrs[] = {
+	EVENT_PTR(td_total_slots_slm),
+	EVENT_PTR(td_total_slots_scale_slm),
+	EVENT_PTR(td_fetch_bubbles_slm),
+	EVENT_PTR(td_fetch_bubbles_scale_slm),
+	EVENT_PTR(td_slots_issued_slm),
+	EVENT_PTR(td_slots_retired_slm),
+	NULL
+};
+
 static struct extra_reg intel_slm_extra_regs[] __read_mostly =
 {
 	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
@@ -3629,6 +3652,7 @@ __init int intel_pmu_init(void)
 		x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_slm_extra_regs;
 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		x86_pmu.cpu_events = slm_events_attrs;
 		pr_cont("Silvermont events, ");
 		break;
 
-- 
cgit v1.2.3


From 030ba6cd105c68ce919c5e239853b567490cd059 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 19 May 2016 17:09:59 -0700
Subject: perf/x86/intel: Use new topology_max_smt_threads() in HT leak
 workaround

Now that we have topology_max_smt_threads() use it
to detect the HT workarounds for older CPUs.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: http://lkml.kernel.org/r/1463703002-19686-6-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 593b1676b5d1..5081b4cdad0d 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3989,16 +3989,14 @@ __init int intel_pmu_init(void)
  */
 static __init int fixup_ht_bug(void)
 {
-	int cpu = smp_processor_id();
-	int w, c;
+	int c;
 	/*
 	 * problem not present on this CPU model, nothing to do
 	 */
 	if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
 		return 0;
 
-	w = cpumask_weight(topology_sibling_cpumask(cpu));
-	if (w > 1) {
+	if (topology_max_smt_threads() > 1) {
 		pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
 		return 0;
 	}
-- 
cgit v1.2.3


From cab43282682e0f46d6a74dd4f54f52595af5eefa Mon Sep 17 00:00:00 2001
From: Wenyou Yang <wenyou.yang@atmel.com>
Date: Wed, 8 Jun 2016 12:15:11 +0800
Subject: ARM: at91/dt: sama5d2: Use new compatible for ohci node

Use compatible "atmel,sama5d2-ohci" to be capable of suspending
ports while sleep to save the power consumption.

Signed-off-by: Wenyou Yang <wenyou.yang@atmel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/sama5d2.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi
index 2827e7ab5ebc..5dd2734e67ba 100644
--- a/arch/arm/boot/dts/sama5d2.dtsi
+++ b/arch/arm/boot/dts/sama5d2.dtsi
@@ -232,7 +232,7 @@
 		};
 
 		usb1: ohci@00400000 {
-			compatible = "atmel,at91rm9200-ohci", "usb-ohci";
+			compatible = "atmel,sama5d2-ohci", "usb-ohci";
 			reg = <0x00400000 0x100000>;
 			interrupts = <41 IRQ_TYPE_LEVEL_HIGH 2>;
 			clocks = <&uhphs_clk>, <&uhphs_clk>, <&uhpck>;
-- 
cgit v1.2.3


From ef5f9f47d4ec4cf42bac48c7c4dafacc1b9f0630 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 2 Jun 2016 17:19:29 -0700
Subject: perf/x86/intel: Use Intel family macros for core perf events

Use the new model number macros instead of spelling things out
in the comments.

Note that this is missing a Nehalem model that is mentioned in
intel_idle which is fixed up in a later patch.

The resulting binary (arch/x86/events/intel/core.o) is exactly
the same with and without this patch modulo some harmless changes
to restoring %esi in the return path of functions, even those
untouched by this patch.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: jacob.jun.pan@intel.com
Link: http://lkml.kernel.org/r/20160603001929.C5F1C079@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 87 ++++++++++++++++++++++----------------------
 1 file changed, 44 insertions(+), 43 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 5081b4cdad0d..3ed528c2370c 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -16,6 +16,7 @@
 
 #include <asm/cpufeature.h>
 #include <asm/hardirq.h>
+#include <asm/intel-family.h>
 #include <asm/apic.h>
 
 #include "../perf_event.h"
@@ -3319,11 +3320,11 @@ static int intel_snb_pebs_broken(int cpu)
 	u32 rev = UINT_MAX; /* default to broken for unknown models */
 
 	switch (cpu_data(cpu).x86_model) {
-	case 42: /* SNB */
+	case INTEL_FAM6_SANDYBRIDGE:
 		rev = 0x28;
 		break;
 
-	case 45: /* SNB-EP */
+	case INTEL_FAM6_SANDYBRIDGE_X:
 		switch (cpu_data(cpu).x86_mask) {
 		case 6: rev = 0x618; break;
 		case 7: rev = 0x70c; break;
@@ -3573,15 +3574,15 @@ __init int intel_pmu_init(void)
 	 * Install the hw-cache-events table:
 	 */
 	switch (boot_cpu_data.x86_model) {
-	case 14: /* 65nm Core "Yonah" */
+	case INTEL_FAM6_CORE_YONAH:
 		pr_cont("Core events, ");
 		break;
 
-	case 15: /* 65nm Core2 "Merom"          */
+	case INTEL_FAM6_CORE2_MEROM:
 		x86_add_quirk(intel_clovertown_quirk);
-	case 22: /* 65nm Core2 "Merom-L"        */
-	case 23: /* 45nm Core2 "Penryn"         */
-	case 29: /* 45nm Core2 "Dunnington (MP) */
+	case INTEL_FAM6_CORE2_MEROM_L:
+	case INTEL_FAM6_CORE2_PENRYN:
+	case INTEL_FAM6_CORE2_DUNNINGTON:
 		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
@@ -3592,9 +3593,9 @@ __init int intel_pmu_init(void)
 		pr_cont("Core2 events, ");
 		break;
 
-	case 30: /* 45nm Nehalem    */
-	case 26: /* 45nm Nehalem-EP */
-	case 46: /* 45nm Nehalem-EX */
+	case INTEL_FAM6_NEHALEM:
+	case INTEL_FAM6_NEHALEM_EP:
+	case INTEL_FAM6_NEHALEM_EX:
 		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -3622,11 +3623,11 @@ __init int intel_pmu_init(void)
 		pr_cont("Nehalem events, ");
 		break;
 
-	case 28: /* 45nm Atom "Pineview"   */
-	case 38: /* 45nm Atom "Lincroft"   */
-	case 39: /* 32nm Atom "Penwell"    */
-	case 53: /* 32nm Atom "Cloverview" */
-	case 54: /* 32nm Atom "Cedarview"  */
+	case INTEL_FAM6_ATOM_PINEVIEW:
+	case INTEL_FAM6_ATOM_LINCROFT:
+	case INTEL_FAM6_ATOM_PENWELL:
+	case INTEL_FAM6_ATOM_CLOVERVIEW:
+	case INTEL_FAM6_ATOM_CEDARVIEW:
 		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
@@ -3638,9 +3639,9 @@ __init int intel_pmu_init(void)
 		pr_cont("Atom events, ");
 		break;
 
-	case 55: /* 22nm Atom "Silvermont"                */
-	case 76: /* 14nm Atom "Airmont"                   */
-	case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+	case INTEL_FAM6_ATOM_SILVERMONT1:
+	case INTEL_FAM6_ATOM_SILVERMONT2:
+	case INTEL_FAM6_ATOM_AIRMONT:
 		memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
 			sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@ -3656,8 +3657,8 @@ __init int intel_pmu_init(void)
 		pr_cont("Silvermont events, ");
 		break;
 
-	case 92: /* 14nm Atom "Goldmont" */
-	case 95: /* 14nm Atom "Goldmont Denverton" */
+	case INTEL_FAM6_ATOM_GOLDMONT:
+	case INTEL_FAM6_ATOM_DENVERTON:
 		memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
@@ -3680,9 +3681,9 @@ __init int intel_pmu_init(void)
 		pr_cont("Goldmont events, ");
 		break;
 
-	case 37: /* 32nm Westmere    */
-	case 44: /* 32nm Westmere-EP */
-	case 47: /* 32nm Westmere-EX */
+	case INTEL_FAM6_WESTMERE:
+	case INTEL_FAM6_WESTMERE_EP:
+	case INTEL_FAM6_WESTMERE_EX:
 		memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
@@ -3709,8 +3710,8 @@ __init int intel_pmu_init(void)
 		pr_cont("Westmere events, ");
 		break;
 
-	case 42: /* 32nm SandyBridge         */
-	case 45: /* 32nm SandyBridge-E/EN/EP */
+	case INTEL_FAM6_SANDYBRIDGE:
+	case INTEL_FAM6_SANDYBRIDGE_X:
 		x86_add_quirk(intel_sandybridge_quirk);
 		x86_add_quirk(intel_ht_bug);
 		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
@@ -3723,7 +3724,7 @@ __init int intel_pmu_init(void)
 		x86_pmu.event_constraints = intel_snb_event_constraints;
 		x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
 		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
-		if (boot_cpu_data.x86_model == 45)
+		if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
 			x86_pmu.extra_regs = intel_snbep_extra_regs;
 		else
 			x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -3745,8 +3746,8 @@ __init int intel_pmu_init(void)
 		pr_cont("SandyBridge events, ");
 		break;
 
-	case 58: /* 22nm IvyBridge       */
-	case 62: /* 22nm IvyBridge-EP/EX */
+	case INTEL_FAM6_IVYBRIDGE:
+	case INTEL_FAM6_IVYBRIDGE_X:
 		x86_add_quirk(intel_ht_bug);
 		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
@@ -3762,7 +3763,7 @@ __init int intel_pmu_init(void)
 		x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
 		x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
 		x86_pmu.pebs_prec_dist = true;
-		if (boot_cpu_data.x86_model == 62)
+		if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
 			x86_pmu.extra_regs = intel_snbep_extra_regs;
 		else
 			x86_pmu.extra_regs = intel_snb_extra_regs;
@@ -3780,10 +3781,10 @@ __init int intel_pmu_init(void)
 		break;
 
 
-	case 60: /* 22nm Haswell Core */
-	case 63: /* 22nm Haswell Server */
-	case 69: /* 22nm Haswell ULT */
-	case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+	case INTEL_FAM6_HASWELL_CORE:
+	case INTEL_FAM6_HASWELL_X:
+	case INTEL_FAM6_HASWELL_ULT:
+	case INTEL_FAM6_HASWELL_GT3E:
 		x86_add_quirk(intel_ht_bug);
 		x86_pmu.late_ack = true;
 		memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -3807,10 +3808,10 @@ __init int intel_pmu_init(void)
 		pr_cont("Haswell events, ");
 		break;
 
-	case 61: /* 14nm Broadwell Core-M */
-	case 86: /* 14nm Broadwell Xeon D */
-	case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-	case 79: /* 14nm Broadwell Server */
+	case INTEL_FAM6_BROADWELL_CORE:
+	case INTEL_FAM6_BROADWELL_XEON_D:
+	case INTEL_FAM6_BROADWELL_GT3E:
+	case INTEL_FAM6_BROADWELL_X:
 		x86_pmu.late_ack = true;
 		memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -3843,7 +3844,7 @@ __init int intel_pmu_init(void)
 		pr_cont("Broadwell events, ");
 		break;
 
-	case 87: /* Knights Landing Xeon Phi */
+	case INTEL_FAM6_XEON_PHI_KNL:
 		memcpy(hw_cache_event_ids,
 		       slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs,
@@ -3861,11 +3862,11 @@ __init int intel_pmu_init(void)
 		pr_cont("Knights Landing events, ");
 		break;
 
-	case 142: /* 14nm Kabylake Mobile */
-	case 158: /* 14nm Kabylake Desktop */
-	case 78: /* 14nm Skylake Mobile */
-	case 94: /* 14nm Skylake Desktop */
-	case 85: /* 14nm Skylake Server */
+	case INTEL_FAM6_SKYLAKE_MOBILE:
+	case INTEL_FAM6_SKYLAKE_DESKTOP:
+	case INTEL_FAM6_SKYLAKE_X:
+	case INTEL_FAM6_KABYLAKE_MOBILE:
+	case INTEL_FAM6_KABYLAKE_DESKTOP:
 		x86_pmu.late_ack = true;
 		memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
-- 
cgit v1.2.3


From 7f2236d0bf9a33bb539551b653ae842430654240 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 2 Jun 2016 17:19:30 -0700
Subject: perf/x86/rapl: Use Intel family macros for RAPL

Use the new INTEL_FAM6_* macros for rapl.c.

Note that this is missing at least one Westmere model and Skylake
Server which will we fixed later in this series.

The resulting binary structure 'rapl_cpu_match' is the same
before and after this patch.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: jacob.jun.pan@intel.com
Link: http://lkml.kernel.org/r/20160603001930.6AC50BE3@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/rapl.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index e30eef4f29a6..8012fe6c7c8b 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -55,6 +55,7 @@
 #include <linux/slab.h>
 #include <linux/perf_event.h>
 #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 #include "../perf_event.h"
 
 MODULE_LICENSE("GPL");
@@ -786,26 +787,26 @@ static const struct intel_rapl_init_fun skl_rapl_init __initconst = {
 };
 
 static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
-	X86_RAPL_MODEL_MATCH(42, snb_rapl_init),	/* Sandy Bridge */
-	X86_RAPL_MODEL_MATCH(45, snbep_rapl_init),	/* Sandy Bridge-EP */
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE,   snb_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init),
 
-	X86_RAPL_MODEL_MATCH(58, snb_rapl_init),	/* Ivy Bridge */
-	X86_RAPL_MODEL_MATCH(62, snbep_rapl_init),	/* IvyTown */
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE,   snb_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init),
 
-	X86_RAPL_MODEL_MATCH(60, hsw_rapl_init),	/* Haswell */
-	X86_RAPL_MODEL_MATCH(63, hsx_rapl_init),	/* Haswell-Server */
-	X86_RAPL_MODEL_MATCH(69, hsw_rapl_init),	/* Haswell-Celeron */
-	X86_RAPL_MODEL_MATCH(70, hsw_rapl_init),	/* Haswell GT3e */
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X,    hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT,  hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init),
 
-	X86_RAPL_MODEL_MATCH(61, hsw_rapl_init),	/* Broadwell */
-	X86_RAPL_MODEL_MATCH(71, hsw_rapl_init),	/* Broadwell-H */
-	X86_RAPL_MODEL_MATCH(79, hsx_rapl_init),	/* Broadwell-Server */
-	X86_RAPL_MODEL_MATCH(86, hsx_rapl_init),	/* Broadwell Xeon D */
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE,   hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E,   hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,	  hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
 
-	X86_RAPL_MODEL_MATCH(87, knl_rapl_init),	/* Knights Landing */
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
 
-	X86_RAPL_MODEL_MATCH(78, skl_rapl_init),	/* Skylake */
-	X86_RAPL_MODEL_MATCH(94, skl_rapl_init),	/* Skylake H/S */
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE,  skl_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
 	{},
 };
 
-- 
cgit v1.2.3


From 353bf605a771e3c86b21de017e9525aba7d64770 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 2 Jun 2016 17:19:33 -0700
Subject: perf/x86/msr: Use Intel family macros for MSR events code

Use the new INTEL_MODEL_* macros for arch/x86/events/msr.c.

This code appears to be missing handling for "WESTMERE2" and
"SKYLAKE_X".

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: jacob.jun.pan@intel.com
Link: http://lkml.kernel.org/r/20160603001933.99A402B0@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/msr.c | 47 ++++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 85ef3c2e80e0..83cf13e368cd 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -1,4 +1,5 @@
 #include <linux/perf_event.h>
+#include <asm/intel-family.h>
 
 enum perf_msr_id {
 	PERF_MSR_TSC			= 0,
@@ -34,39 +35,39 @@ static bool test_intel(int idx)
 		return false;
 
 	switch (boot_cpu_data.x86_model) {
-	case 30: /* 45nm Nehalem    */
-	case 26: /* 45nm Nehalem-EP */
-	case 46: /* 45nm Nehalem-EX */
+	case INTEL_FAM6_NEHALEM:
+	case INTEL_FAM6_NEHALEM_EP:
+	case INTEL_FAM6_NEHALEM_EX:
 
-	case 37: /* 32nm Westmere    */
-	case 44: /* 32nm Westmere-EP */
-	case 47: /* 32nm Westmere-EX */
+	case INTEL_FAM6_WESTMERE:
+	case INTEL_FAM6_WESTMERE_EP:
+	case INTEL_FAM6_WESTMERE_EX:
 
-	case 42: /* 32nm SandyBridge         */
-	case 45: /* 32nm SandyBridge-E/EN/EP */
+	case INTEL_FAM6_SANDYBRIDGE:
+	case INTEL_FAM6_SANDYBRIDGE_X:
 
-	case 58: /* 22nm IvyBridge       */
-	case 62: /* 22nm IvyBridge-EP/EX */
+	case INTEL_FAM6_IVYBRIDGE:
+	case INTEL_FAM6_IVYBRIDGE_X:
 
-	case 60: /* 22nm Haswell Core */
-	case 63: /* 22nm Haswell Server */
-	case 69: /* 22nm Haswell ULT */
-	case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+	case INTEL_FAM6_HASWELL_CORE:
+	case INTEL_FAM6_HASWELL_X:
+	case INTEL_FAM6_HASWELL_ULT:
+	case INTEL_FAM6_HASWELL_GT3E:
 
-	case 61: /* 14nm Broadwell Core-M */
-	case 86: /* 14nm Broadwell Xeon D */
-	case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-	case 79: /* 14nm Broadwell Server */
+	case INTEL_FAM6_BROADWELL_CORE:
+	case INTEL_FAM6_BROADWELL_XEON_D:
+	case INTEL_FAM6_BROADWELL_GT3E:
+	case INTEL_FAM6_BROADWELL_X:
 
-	case 55: /* 22nm Atom "Silvermont"                */
-	case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-	case 76: /* 14nm Atom "Airmont"                   */
+	case INTEL_FAM6_ATOM_SILVERMONT1:
+	case INTEL_FAM6_ATOM_SILVERMONT2:
+	case INTEL_FAM6_ATOM_AIRMONT:
 		if (idx == PERF_MSR_SMI)
 			return true;
 		break;
 
-	case 78: /* 14nm Skylake Mobile */
-	case 94: /* 14nm Skylake Desktop */
+	case INTEL_FAM6_SKYLAKE_MOBILE:
+	case INTEL_FAM6_SKYLAKE_DESKTOP:
 		if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
 			return true;
 		break;
-- 
cgit v1.2.3


From 5134596caee9e834d2486edc45efad4c9e6effc3 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 2 Jun 2016 17:19:35 -0700
Subject: perf/x86/msr: Add missing Intel models

This patch presumes that Kabylake and Skylake Server will be the
same as the existing Skylake parts and adds them to the MSR
events code.

Also add handling for "WESTMERE2".

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: jacob.jun.pan@intel.com
Link: http://lkml.kernel.org/r/20160603001935.FE6B3847@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/msr.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 83cf13e368cd..50b3a056f96b 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -40,6 +40,7 @@ static bool test_intel(int idx)
 	case INTEL_FAM6_NEHALEM_EX:
 
 	case INTEL_FAM6_WESTMERE:
+	case INTEL_FAM6_WESTMERE2:
 	case INTEL_FAM6_WESTMERE_EP:
 	case INTEL_FAM6_WESTMERE_EX:
 
@@ -68,6 +69,9 @@ static bool test_intel(int idx)
 
 	case INTEL_FAM6_SKYLAKE_MOBILE:
 	case INTEL_FAM6_SKYLAKE_DESKTOP:
+	case INTEL_FAM6_SKYLAKE_X:
+	case INTEL_FAM6_KABYLAKE_MOBILE:
+	case INTEL_FAM6_KABYLAKE_DESKTOP:
 		if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
 			return true;
 		break;
-- 
cgit v1.2.3


From bf4ad54199333d10c212499b57f26ffeb8222c81 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 2 Jun 2016 17:19:40 -0700
Subject: perf/x86/cstate: Use Intel Model name macros

This should be getting old by now.  Use the new macros intead of
open-coded magic numbers.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: jacob.jun.pan@intel.com
Link: http://lkml.kernel.org/r/20160603001940.FE69D646@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/cstate.c | 47 +++++++++++++++++++++---------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 9ba4e4136a15..4c7638b91fa5 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -89,6 +89,7 @@
 #include <linux/slab.h>
 #include <linux/perf_event.h>
 #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 #include "../perf_event.h"
 
 MODULE_LICENSE("GPL");
@@ -511,37 +512,37 @@ static const struct cstate_model slm_cstates __initconst = {
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
 
 static const struct x86_cpu_id intel_cstates_match[] __initconst = {
-	X86_CSTATES_MODEL(30, nhm_cstates),    /* 45nm Nehalem              */
-	X86_CSTATES_MODEL(26, nhm_cstates),    /* 45nm Nehalem-EP           */
-	X86_CSTATES_MODEL(46, nhm_cstates),    /* 45nm Nehalem-EX           */
+	X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM,    nhm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EP, nhm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_NEHALEM_EX, nhm_cstates),
 
-	X86_CSTATES_MODEL(37, nhm_cstates),    /* 32nm Westmere             */
-	X86_CSTATES_MODEL(44, nhm_cstates),    /* 32nm Westmere-EP          */
-	X86_CSTATES_MODEL(47, nhm_cstates),    /* 32nm Westmere-EX          */
+	X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE,    nhm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EP, nhm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_WESTMERE_EX, nhm_cstates),
 
-	X86_CSTATES_MODEL(42, snb_cstates),    /* 32nm SandyBridge          */
-	X86_CSTATES_MODEL(45, snb_cstates),    /* 32nm SandyBridge-E/EN/EP  */
+	X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE,   snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_SANDYBRIDGE_X, snb_cstates),
 
-	X86_CSTATES_MODEL(58, snb_cstates),    /* 22nm IvyBridge            */
-	X86_CSTATES_MODEL(62, snb_cstates),    /* 22nm IvyBridge-EP/EX      */
+	X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE,   snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates),
 
-	X86_CSTATES_MODEL(60, snb_cstates),    /* 22nm Haswell Core         */
-	X86_CSTATES_MODEL(63, snb_cstates),    /* 22nm Haswell Server       */
-	X86_CSTATES_MODEL(70, snb_cstates),    /* 22nm Haswell + GT3e       */
+	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X,	   snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates),
 
-	X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT          */
+	X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
 
-	X86_CSTATES_MODEL(55, slm_cstates),    /* 22nm Atom Silvermont      */
-	X86_CSTATES_MODEL(77, slm_cstates),    /* 22nm Atom Avoton/Rangely  */
-	X86_CSTATES_MODEL(76, slm_cstates),    /* 22nm Atom Airmont         */
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT,     slm_cstates),
 
-	X86_CSTATES_MODEL(61, snb_cstates),    /* 14nm Broadwell Core-M     */
-	X86_CSTATES_MODEL(86, snb_cstates),    /* 14nm Broadwell Xeon D     */
-	X86_CSTATES_MODEL(71, snb_cstates),    /* 14nm Broadwell + GT3e     */
-	X86_CSTATES_MODEL(79, snb_cstates),    /* 14nm Broadwell Server     */
+	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE,   snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E,   snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X,      snb_cstates),
 
-	X86_CSTATES_MODEL(78, snb_cstates),    /* 14nm Skylake Mobile       */
-	X86_CSTATES_MODEL(94, snb_cstates),    /* 14nm Skylake Desktop      */
+	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE,  snb_cstates),
+	X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
 	{ },
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
-- 
cgit v1.2.3


From a07301ab3dabd1e31696c1bf1775aba24eb7573d Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 2 Jun 2016 17:19:42 -0700
Subject: perf/x86/uncore: Use Intel family name macros for uncore

Another straightforward replacement of magic numbers

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: jacob.jun.pan@intel.com
Link: http://lkml.kernel.org/r/20160603001942.537570B6@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/uncore.c | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 65490589e52e..4e70d2721249 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1,4 +1,5 @@
 #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 #include "uncore.h"
 
 static struct intel_uncore_type *empty_uncore[] = { NULL, };
@@ -1382,26 +1383,26 @@ static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
 };
 
 static const struct x86_cpu_id intel_uncore_match[] __initconst = {
-	X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init),	/* Nehalem */
-	X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
-	X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init),	/* Westmere */
-	X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
-	X86_UNCORE_MODEL_MATCH(42, snb_uncore_init),	/* Sandy Bridge */
-	X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init),	/* Ivy Bridge */
-	X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init),	/* Haswell */
-	X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init),	/* Haswell Celeron */
-	X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init),	/* Haswell */
-	X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init),	/* Broadwell */
-	X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init),	/* Broadwell */
-	X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init),	/* Sandy Bridge-EP */
-	X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init),	/* Nehalem-EX */
-	X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init),	/* Westmere-EX aka. Xeon E7 */
-	X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init),	/* Ivy Bridge-EP */
-	X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init),	/* Haswell-EP */
-	X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init),	/* BDX-EP */
-	X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init),	/* BDX-DE */
-	X86_UNCORE_MODEL_MATCH(87, knl_uncore_init),	/* Knights Landing */
-	X86_UNCORE_MODEL_MATCH(94, skl_uncore_init),	/* SkyLake */
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP,	  nhm_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM,	  nhm_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE,	  nhm_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP,	  nhm_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE,	  snb_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE,	  ivb_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE,	  hsw_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT,	  hsw_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E,	  hsw_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X,  snbep_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX,	  nhmex_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX,	  nhmex_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X,	  ivbep_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X,	  hswep_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,	  bdx_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL,	  knl_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
 	{},
 };
 
-- 
cgit v1.2.3


From 348c5ac6c7dc117e1de095bf07c86c31101d56f3 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
Date: Thu, 2 Jun 2016 17:19:53 -0700
Subject: perf/x86/rapl: Add Skylake server model detection

SKX uses similar RAPL interface as Broadwell server.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: jacob.jun.pan@intel.com
Link: http://lkml.kernel.org/r/20160603001953.38848836@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/rapl.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 8012fe6c7c8b..d0c58b35155f 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -807,6 +807,7 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE,  skl_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,	 hsx_rapl_init),
 	{},
 };
 
-- 
cgit v1.2.3


From d5e0c89a8ccde900c3245474915ea0f518abdb79 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 2 Jun 2016 17:19:39 -0700
Subject: x86/platform: Use new Intel model number macros

Remove the open-coded model numbers.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: jacob.jun.pan@intel.com
Link: http://lkml.kernel.org/r/20160603001939.D1D7FC2F@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/atom/punit_atom_debug.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c
index 81c769e80614..109782996867 100644
--- a/arch/x86/platform/atom/punit_atom_debug.c
+++ b/arch/x86/platform/atom/punit_atom_debug.c
@@ -23,6 +23,7 @@
 #include <linux/seq_file.h>
 #include <linux/io.h>
 #include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
 #include <asm/iosf_mbi.h>
 
 /* Power gate status reg */
@@ -143,8 +144,8 @@ static void punit_dbgfs_unregister(void)
 	  (kernel_ulong_t)&drv_data }
 
 static const struct x86_cpu_id intel_punit_cpu_ids[] = {
-	ICPU(55, punit_device_byt), /* Valleyview, Bay Trail */
-	ICPU(76, punit_device_cht), /* Braswell, Cherry Trail */
+	ICPU(INTEL_FAM6_ATOM_SILVERMONT1, punit_device_byt),
+	ICPU(INTEL_FAM6_ATOM_AIRMONT,	  punit_device_cht),
 	{}
 };
 
-- 
cgit v1.2.3


From d1898b733619bd46194bd25aa6452d238ff2dc4e Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Wed, 1 Jun 2016 10:42:20 -0700
Subject: x86/fpu: Add tracepoints to dump FPU state at key points

I've been carrying this patch around for a bit and it's helped me
solve at least a couple FPU-related bugs.  In addition to using
it for debugging, I also drug it out because using AVX (and
AVX2/AVX-512) can have serious power consequences for a modern
core.  It's very important to be able to figure out who is using
it.

It's also insanely useful to go out and see who is using a given
feature, like MPX or Memory Protection Keys.  If you, for
instance, want to find all processes using protection keys, you
can do:

	echo 'xfeatures & 0x200' > filter

Since 0x200 is the protection keys feature bit.

Note that this touches the KVM code.  KVM did a CREATE_TRACE_POINTS
and then included a bunch of random headers.  If anyone one of
those included other tracepoints, it would have defined the *OTHER*
tracepoints.  That's bogus, so move it to the right place.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20160601174220.3CDFB90E@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/internal.h |   5 ++
 arch/x86/include/asm/trace/fpu.h    | 119 ++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/fpu/core.c          |  18 ++++++
 arch/x86/kernel/fpu/signal.c        |   3 +
 arch/x86/kvm/x86.c                  |   6 +-
 5 files changed, 148 insertions(+), 3 deletions(-)
 create mode 100644 arch/x86/include/asm/trace/fpu.h

(limited to 'arch')

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 31ac8e6d9f36..116b58347501 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -18,6 +18,7 @@
 #include <asm/fpu/api.h>
 #include <asm/fpu/xstate.h>
 #include <asm/cpufeature.h>
+#include <asm/trace/fpu.h>
 
 /*
  * High level FPU state handling functions:
@@ -524,6 +525,7 @@ static inline void __fpregs_deactivate(struct fpu *fpu)
 
 	fpu->fpregs_active = 0;
 	this_cpu_write(fpu_fpregs_owner_ctx, NULL);
+	trace_x86_fpu_regs_deactivated(fpu);
 }
 
 /* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
@@ -533,6 +535,7 @@ static inline void __fpregs_activate(struct fpu *fpu)
 
 	fpu->fpregs_active = 1;
 	this_cpu_write(fpu_fpregs_owner_ctx, fpu);
+	trace_x86_fpu_regs_activated(fpu);
 }
 
 /*
@@ -604,11 +607,13 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
 
 		/* But leave fpu_fpregs_owner_ctx! */
 		old_fpu->fpregs_active = 0;
+		trace_x86_fpu_regs_deactivated(old_fpu);
 
 		/* Don't change CR0.TS if we just switch! */
 		if (fpu.preload) {
 			new_fpu->counter++;
 			__fpregs_activate(new_fpu);
+			trace_x86_fpu_regs_activated(new_fpu);
 			prefetch(&new_fpu->state);
 		} else {
 			__fpregs_deactivate_hw();
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
new file mode 100644
index 000000000000..9217ab1f5bf6
--- /dev/null
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -0,0 +1,119 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM x86_fpu
+
+#if !defined(_TRACE_FPU_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FPU_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(x86_fpu,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu),
+
+	TP_STRUCT__entry(
+		__field(struct fpu *, fpu)
+		__field(bool, fpregs_active)
+		__field(bool, fpstate_active)
+		__field(int, counter)
+		__field(u64, xfeatures)
+		__field(u64, xcomp_bv)
+		),
+
+	TP_fast_assign(
+		__entry->fpu		= fpu;
+		__entry->fpregs_active	= fpu->fpregs_active;
+		__entry->fpstate_active	= fpu->fpstate_active;
+		__entry->counter	= fpu->counter;
+		if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+			__entry->xfeatures = fpu->state.xsave.header.xfeatures;
+			__entry->xcomp_bv  = fpu->state.xsave.header.xcomp_bv;
+		}
+	),
+	TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d counter: %d xfeatures: %llx xcomp_bv: %llx",
+			__entry->fpu,
+			__entry->fpregs_active,
+			__entry->fpstate_active,
+			__entry->counter,
+			__entry->xfeatures,
+			__entry->xcomp_bv
+	)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_state,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_after_save,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_before_restore,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_after_restore,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_regs_activated,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_dropped,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_copy_src,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_copy_dst,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_xstate_check_failed,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH asm/trace/
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE fpu
+#endif /* _TRACE_FPU_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 97027545a72d..7d564742e499 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -12,6 +12,9 @@
 
 #include <linux/hardirq.h>
 
+#define CREATE_TRACE_POINTS
+#include <asm/trace/fpu.h>
+
 /*
  * Represents the initial FPU state. It's mostly (but not completely) zeroes,
  * depending on the FPU hardware format:
@@ -192,6 +195,7 @@ void fpu__save(struct fpu *fpu)
 	WARN_ON_FPU(fpu != &current->thread.fpu);
 
 	preempt_disable();
+	trace_x86_fpu_before_save(fpu);
 	if (fpu->fpregs_active) {
 		if (!copy_fpregs_to_fpstate(fpu)) {
 			if (use_eager_fpu())
@@ -200,6 +204,7 @@ void fpu__save(struct fpu *fpu)
 				fpregs_deactivate(fpu);
 		}
 	}
+	trace_x86_fpu_after_save(fpu);
 	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(fpu__save);
@@ -275,6 +280,9 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	}
 	preempt_enable();
 
+	trace_x86_fpu_copy_src(src_fpu);
+	trace_x86_fpu_copy_dst(dst_fpu);
+
 	return 0;
 }
 
@@ -288,7 +296,9 @@ void fpu__activate_curr(struct fpu *fpu)
 
 	if (!fpu->fpstate_active) {
 		fpstate_init(&fpu->state);
+		trace_x86_fpu_init_state(fpu);
 
+		trace_x86_fpu_activate_state(fpu);
 		/* Safe to do for the current task: */
 		fpu->fpstate_active = 1;
 	}
@@ -314,7 +324,9 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
 	} else {
 		if (!fpu->fpstate_active) {
 			fpstate_init(&fpu->state);
+			trace_x86_fpu_init_state(fpu);
 
+			trace_x86_fpu_activate_state(fpu);
 			/* Safe to do for current and for stopped child tasks: */
 			fpu->fpstate_active = 1;
 		}
@@ -347,7 +359,9 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 		fpu->last_cpu = -1;
 	} else {
 		fpstate_init(&fpu->state);
+		trace_x86_fpu_init_state(fpu);
 
+		trace_x86_fpu_activate_state(fpu);
 		/* Safe to do for stopped child tasks: */
 		fpu->fpstate_active = 1;
 	}
@@ -432,9 +446,11 @@ void fpu__restore(struct fpu *fpu)
 
 	/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
 	kernel_fpu_disable();
+	trace_x86_fpu_before_restore(fpu);
 	fpregs_activate(fpu);
 	copy_kernel_to_fpregs(&fpu->state);
 	fpu->counter++;
+	trace_x86_fpu_after_restore(fpu);
 	kernel_fpu_enable();
 }
 EXPORT_SYMBOL_GPL(fpu__restore);
@@ -463,6 +479,8 @@ void fpu__drop(struct fpu *fpu)
 
 	fpu->fpstate_active = 0;
 
+	trace_x86_fpu_dropped(fpu);
+
 	preempt_enable();
 }
 
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 31c6a60505e6..c6f2a3cee2c2 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -10,6 +10,7 @@
 #include <asm/fpu/regset.h>
 
 #include <asm/sigframe.h>
+#include <asm/trace/fpu.h>
 
 static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
 
@@ -282,6 +283,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 			 */
 			state_size = sizeof(struct fxregs_state);
 			fx_only = 1;
+			trace_x86_fpu_xstate_check_failed(fpu);
 		} else {
 			state_size = fx_sw_user.xstate_size;
 			xfeatures = fx_sw_user.xfeatures;
@@ -311,6 +313,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) ||
 		    __copy_from_user(&env, buf, sizeof(env))) {
 			fpstate_init(&fpu->state);
+			trace_x86_fpu_init_state(fpu);
 			err = -1;
 		} else {
 			sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 902d9da12392..1ba3b7d3cae9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -55,9 +55,6 @@
 #include <linux/irqbypass.h>
 #include <trace/events/kvm.h>
 
-#define CREATE_TRACE_POINTS
-#include "trace.h"
-
 #include <asm/debugreg.h>
 #include <asm/msr.h>
 #include <asm/desc.h>
@@ -68,6 +65,9 @@
 #include <asm/div64.h>
 #include <asm/irq_remapping.h>
 
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
 #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
-- 
cgit v1.2.3


From f5967101e9de12addcda4510dfbac66d7c5779c3 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 30 May 2016 12:56:27 +0200
Subject: x86/hweight: Get rid of the special calling convention

People complained about ARCH_HWEIGHT_CFLAGS and how it throws a wrench
into kcov, lto, etc, experimentations.

Add asm versions for __sw_hweight{32,64}() and do explicit saving and
restoring of clobbered registers. This gets rid of the special calling
convention. We get to call those functions on !X86_FEATURE_POPCNT CPUs.

We still need to hardcode POPCNT and register operands as some old gas
versions which we support, do not know about POPCNT.

Btw, remove redundant REX prefix from 32-bit POPCNT because alternatives
can do padding now.

Suggested-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1464605787-20603-1-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig                    |  5 ---
 arch/x86/include/asm/arch_hweight.h | 24 +++++-------
 arch/x86/kernel/i386_ksyms_32.c     |  2 +
 arch/x86/kernel/x8664_ksyms_64.c    |  3 ++
 arch/x86/lib/Makefile               |  2 +-
 arch/x86/lib/hweight.S              | 77 +++++++++++++++++++++++++++++++++++++
 lib/Makefile                        |  5 ---
 lib/hweight.c                       |  4 ++
 8 files changed, 97 insertions(+), 25 deletions(-)
 create mode 100644 arch/x86/lib/hweight.S

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0a7b885964ba..729d41d9ced3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -294,11 +294,6 @@ config X86_32_LAZY_GS
 	def_bool y
 	depends on X86_32 && !CC_STACKPROTECTOR
 
-config ARCH_HWEIGHT_CFLAGS
-	string
-	default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
-	default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64
-
 config ARCH_SUPPORTS_UPROBES
 	def_bool y
 
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index 02e799fa43d1..e7cd63175de4 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -4,8 +4,8 @@
 #include <asm/cpufeatures.h>
 
 #ifdef CONFIG_64BIT
-/* popcnt %edi, %eax -- redundant REX prefix for alignment */
-#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
+/* popcnt %edi, %eax */
+#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc7"
 /* popcnt %rdi, %rax */
 #define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7"
 #define REG_IN "D"
@@ -17,19 +17,15 @@
 #define REG_OUT "a"
 #endif
 
-/*
- * __sw_hweightXX are called from within the alternatives below
- * and callee-clobbered registers need to be taken care of. See
- * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
- * compiler switches.
- */
+#define __HAVE_ARCH_SW_HWEIGHT
+
 static __always_inline unsigned int __arch_hweight32(unsigned int w)
 {
-	unsigned int res = 0;
+	unsigned int res;
 
 	asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
-		     : "="REG_OUT (res)
-		     : REG_IN (w));
+			 : "="REG_OUT (res)
+			 : REG_IN (w));
 
 	return res;
 }
@@ -53,11 +49,11 @@ static inline unsigned long __arch_hweight64(__u64 w)
 #else
 static __always_inline unsigned long __arch_hweight64(__u64 w)
 {
-	unsigned long res = 0;
+	unsigned long res;
 
 	asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT)
-		     : "="REG_OUT (res)
-		     : REG_IN (w));
+			 : "="REG_OUT (res)
+			 : REG_IN (w));
 
 	return res;
 }
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 64341aa485ae..d40ee8a38fed 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -42,3 +42,5 @@ EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(___preempt_schedule);
 EXPORT_SYMBOL(___preempt_schedule_notrace);
 #endif
+
+EXPORT_SYMBOL(__sw_hweight32);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index cd05942bc918..f1aebfb49c36 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -44,6 +44,9 @@ EXPORT_SYMBOL(clear_page);
 
 EXPORT_SYMBOL(csum_partial);
 
+EXPORT_SYMBOL(__sw_hweight32);
+EXPORT_SYMBOL(__sw_hweight64);
+
 /*
  * Export string functions. We normally rely on gcc builtin for most of these,
  * but gcc sometimes decides not to inline them.
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 72a576752a7e..ec969cc3eb20 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -25,7 +25,7 @@ lib-y += memcpy_$(BITS).o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
 
-obj-y += msr.o msr-reg.o msr-reg-export.o
+obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
 
 ifeq ($(CONFIG_X86_32),y)
         obj-y += atomic64_32.o
diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
new file mode 100644
index 000000000000..02de3d74d2c5
--- /dev/null
+++ b/arch/x86/lib/hweight.S
@@ -0,0 +1,77 @@
+#include <linux/linkage.h>
+
+#include <asm/asm.h>
+
+/*
+ * unsigned int __sw_hweight32(unsigned int w)
+ * %rdi: w
+ */
+ENTRY(__sw_hweight32)
+
+#ifdef CONFIG_X86_64
+	movl %edi, %eax				# w
+#endif
+	__ASM_SIZE(push,) %__ASM_REG(dx)
+	movl %eax, %edx				# w -> t
+	shrl %edx				# t >>= 1
+	andl $0x55555555, %edx			# t &= 0x55555555
+	subl %edx, %eax				# w -= t
+
+	movl %eax, %edx				# w -> t
+	shrl $2, %eax				# w_tmp >>= 2
+	andl $0x33333333, %edx			# t	&= 0x33333333
+	andl $0x33333333, %eax			# w_tmp &= 0x33333333
+	addl %edx, %eax				# w = w_tmp + t
+
+	movl %eax, %edx				# w -> t
+	shrl $4, %edx				# t >>= 4
+	addl %edx, %eax				# w_tmp += t
+	andl  $0x0f0f0f0f, %eax			# w_tmp &= 0x0f0f0f0f
+	imull $0x01010101, %eax, %eax		# w_tmp *= 0x01010101
+	shrl $24, %eax				# w = w_tmp >> 24
+	__ASM_SIZE(pop,) %__ASM_REG(dx)
+	ret
+ENDPROC(__sw_hweight32)
+
+ENTRY(__sw_hweight64)
+#ifdef CONFIG_X86_64
+	pushq   %rdx
+
+	movq    %rdi, %rdx                      # w -> t
+	movabsq $0x5555555555555555, %rax
+	shrq    %rdx                            # t >>= 1
+	andq    %rdx, %rax                      # t &= 0x5555555555555555
+	movabsq $0x3333333333333333, %rdx
+	subq    %rax, %rdi                      # w -= t
+
+	movq    %rdi, %rax                      # w -> t
+	shrq    $2, %rdi                        # w_tmp >>= 2
+	andq    %rdx, %rax                      # t     &= 0x3333333333333333
+	andq    %rdi, %rdx                      # w_tmp &= 0x3333333333333333
+	addq    %rdx, %rax                      # w = w_tmp + t
+
+	movq    %rax, %rdx                      # w -> t
+	shrq    $4, %rdx                        # t >>= 4
+	addq    %rdx, %rax                      # w_tmp += t
+	movabsq $0x0f0f0f0f0f0f0f0f, %rdx
+	andq    %rdx, %rax                      # w_tmp &= 0x0f0f0f0f0f0f0f0f
+	movabsq $0x0101010101010101, %rdx
+	imulq   %rdx, %rax                      # w_tmp *= 0x0101010101010101
+	shrq    $56, %rax                       # w = w_tmp >> 56
+
+	popq    %rdx
+	ret
+#else /* CONFIG_X86_32 */
+	/* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
+	pushl   %ecx
+
+	call    __sw_hweight32
+	movl    %eax, %ecx                      # stash away result
+	movl    %edx, %eax                      # second part of input
+	call    __sw_hweight32
+	addl    %ecx, %eax                      # result
+
+	popl    %ecx
+	ret
+#endif
+ENDPROC(__sw_hweight64)
diff --git a/lib/Makefile b/lib/Makefile
index ff6a7a6c6395..07d06a8b9788 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -15,9 +15,6 @@ KCOV_INSTRUMENT_rbtree.o := n
 KCOV_INSTRUMENT_list_debug.o := n
 KCOV_INSTRUMENT_debugobjects.o := n
 KCOV_INSTRUMENT_dynamic_debug.o := n
-# Kernel does not boot if we instrument this file as it uses custom calling
-# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
-KCOV_INSTRUMENT_hweight.o := n
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
@@ -74,8 +71,6 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
 obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
 obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
 
-GCOV_PROFILE_hweight.o := n
-CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 
 obj-$(CONFIG_BTREE) += btree.o
diff --git a/lib/hweight.c b/lib/hweight.c
index 9a5c1f221558..43273a7d83cf 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,6 +9,7 @@
  * The Hamming Weight of a number is the total number of bits set in it.
  */
 
+#ifndef __HAVE_ARCH_SW_HWEIGHT
 unsigned int __sw_hweight32(unsigned int w)
 {
 #ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
@@ -25,6 +26,7 @@ unsigned int __sw_hweight32(unsigned int w)
 #endif
 }
 EXPORT_SYMBOL(__sw_hweight32);
+#endif
 
 unsigned int __sw_hweight16(unsigned int w)
 {
@@ -43,6 +45,7 @@ unsigned int __sw_hweight8(unsigned int w)
 }
 EXPORT_SYMBOL(__sw_hweight8);
 
+#ifndef __HAVE_ARCH_SW_HWEIGHT
 unsigned long __sw_hweight64(__u64 w)
 {
 #if BITS_PER_LONG == 32
@@ -65,3 +68,4 @@ unsigned long __sw_hweight64(__u64 w)
 #endif
 }
 EXPORT_SYMBOL(__sw_hweight64);
+#endif
-- 
cgit v1.2.3


From 8ee62b1870be8e630158701632a533d0378e15b8 Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hpe.com>
Date: Fri, 3 Jun 2016 22:26:02 -0700
Subject: locking/rwsem: Convert sem->count to 'atomic_long_t'

Convert the rwsem count variable to an atomic_long_t since we use it
as an atomic variable. This also allows us to remove the
rwsem_atomic_{add,update}() "abstraction" which would now be an unnecesary
level of indirection. In follow up patches, we also remove the
rwsem_atomic_{add,update}() definitions across the various architectures.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Jason Low <jason.low2@hpe.com>
[ Build warning fixes on various architectures. ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jason Low <jason.low2@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: Terry Rudd <terry.rudd@hpe.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Waiman Long <Waiman.Long@hpe.com>
Link: http://lkml.kernel.org/r/1465017963-4839-2-git-send-email-jason.low2@hpe.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/rwsem.h | 26 +++++++++++++-------------
 arch/ia64/include/asm/rwsem.h  | 24 ++++++++++++------------
 include/asm-generic/rwsem.h    |  6 +++---
 include/linux/rwsem.h          |  8 +++++---
 kernel/locking/rwsem-xadd.c    | 32 +++++++++++++++++---------------
 5 files changed, 50 insertions(+), 46 deletions(-)

(limited to 'arch')

diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
index 0131a7058778..b40021aabb9f 100644
--- a/arch/alpha/include/asm/rwsem.h
+++ b/arch/alpha/include/asm/rwsem.h
@@ -25,8 +25,8 @@ static inline void __down_read(struct rw_semaphore *sem)
 {
 	long oldcount;
 #ifndef	CONFIG_SMP
-	oldcount = sem->count;
-	sem->count += RWSEM_ACTIVE_READ_BIAS;
+	oldcount = sem->count.counter;
+	sem->count.counter += RWSEM_ACTIVE_READ_BIAS;
 #else
 	long temp;
 	__asm__ __volatile__(
@@ -52,13 +52,13 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 {
 	long old, new, res;
 
-	res = sem->count;
+	res = atomic_long_read(&sem->count);
 	do {
 		new = res + RWSEM_ACTIVE_READ_BIAS;
 		if (new <= 0)
 			break;
 		old = res;
-		res = cmpxchg(&sem->count, old, new);
+		res = atomic_long_cmpxchg(&sem->count, old, new);
 	} while (res != old);
 	return res >= 0 ? 1 : 0;
 }
@@ -67,8 +67,8 @@ static inline long ___down_write(struct rw_semaphore *sem)
 {
 	long oldcount;
 #ifndef	CONFIG_SMP
-	oldcount = sem->count;
-	sem->count += RWSEM_ACTIVE_WRITE_BIAS;
+	oldcount = sem->count.counter;
+	sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS;
 #else
 	long temp;
 	__asm__ __volatile__(
@@ -106,7 +106,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem)
  */
 static inline int __down_write_trylock(struct rw_semaphore *sem)
 {
-	long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
+	long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
 			   RWSEM_ACTIVE_WRITE_BIAS);
 	if (ret == RWSEM_UNLOCKED_VALUE)
 		return 1;
@@ -117,8 +117,8 @@ static inline void __up_read(struct rw_semaphore *sem)
 {
 	long oldcount;
 #ifndef	CONFIG_SMP
-	oldcount = sem->count;
-	sem->count -= RWSEM_ACTIVE_READ_BIAS;
+	oldcount = sem->count.counter;
+	sem->count.counter -= RWSEM_ACTIVE_READ_BIAS;
 #else
 	long temp;
 	__asm__ __volatile__(
@@ -142,8 +142,8 @@ static inline void __up_write(struct rw_semaphore *sem)
 {
 	long count;
 #ifndef	CONFIG_SMP
-	sem->count -= RWSEM_ACTIVE_WRITE_BIAS;
-	count = sem->count;
+	sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS;
+	count = sem->count.counter;
 #else
 	long temp;
 	__asm__ __volatile__(
@@ -171,8 +171,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 {
 	long oldcount;
 #ifndef	CONFIG_SMP
-	oldcount = sem->count;
-	sem->count -= RWSEM_WAITING_BIAS;
+	oldcount = sem->count.counter;
+	sem->count.counter -= RWSEM_WAITING_BIAS;
 #else
 	long temp;
 	__asm__ __volatile__(
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index 8b23e070b844..c5d544f188ed 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -40,7 +40,7 @@
 static inline void
 __down_read (struct rw_semaphore *sem)
 {
-	long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1);
+	long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1);
 
 	if (result < 0)
 		rwsem_down_read_failed(sem);
@@ -55,9 +55,9 @@ ___down_write (struct rw_semaphore *sem)
 	long old, new;
 
 	do {
-		old = sem->count;
+		old = atomic_long_read(&sem->count);
 		new = old + RWSEM_ACTIVE_WRITE_BIAS;
-	} while (cmpxchg_acq(&sem->count, old, new) != old);
+	} while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old);
 
 	return old;
 }
@@ -85,7 +85,7 @@ __down_write_killable (struct rw_semaphore *sem)
 static inline void
 __up_read (struct rw_semaphore *sem)
 {
-	long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1);
+	long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1);
 
 	if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0)
 		rwsem_wake(sem);
@@ -100,9 +100,9 @@ __up_write (struct rw_semaphore *sem)
 	long old, new;
 
 	do {
-		old = sem->count;
+		old = atomic_long_read(&sem->count);
 		new = old - RWSEM_ACTIVE_WRITE_BIAS;
-	} while (cmpxchg_rel(&sem->count, old, new) != old);
+	} while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
 
 	if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0)
 		rwsem_wake(sem);
@@ -115,8 +115,8 @@ static inline int
 __down_read_trylock (struct rw_semaphore *sem)
 {
 	long tmp;
-	while ((tmp = sem->count) >= 0) {
-		if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) {
+	while ((tmp = atomic_long_read(&sem->count)) >= 0) {
+		if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) {
 			return 1;
 		}
 	}
@@ -129,8 +129,8 @@ __down_read_trylock (struct rw_semaphore *sem)
 static inline int
 __down_write_trylock (struct rw_semaphore *sem)
 {
-	long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE,
-			      RWSEM_ACTIVE_WRITE_BIAS);
+	long tmp = atomic_long_cmpxchg_acquire(&sem->count,
+			RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS);
 	return tmp == RWSEM_UNLOCKED_VALUE;
 }
 
@@ -143,9 +143,9 @@ __downgrade_write (struct rw_semaphore *sem)
 	long old, new;
 
 	do {
-		old = sem->count;
+		old = atomic_long_read(&sem->count);
 		new = old - RWSEM_WAITING_BIAS;
-	} while (cmpxchg_rel(&sem->count, old, new) != old);
+	} while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
 
 	if (old < 0)
 		rwsem_downgrade_wake(sem);
diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
index 3fc94a046bf5..a3a93eca766c 100644
--- a/include/asm-generic/rwsem.h
+++ b/include/asm-generic/rwsem.h
@@ -41,8 +41,8 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 {
 	long tmp;
 
-	while ((tmp = sem->count) >= 0) {
-		if (tmp == cmpxchg_acquire(&sem->count, tmp,
+	while ((tmp = atomic_long_read(&sem->count)) >= 0) {
+		if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp,
 				   tmp + RWSEM_ACTIVE_READ_BIAS)) {
 			return 1;
 		}
@@ -79,7 +79,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
 {
 	long tmp;
 
-	tmp = cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
+	tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
 		      RWSEM_ACTIVE_WRITE_BIAS);
 	return tmp == RWSEM_UNLOCKED_VALUE;
 }
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index d37fbb34d06f..dd1d14250340 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -23,10 +23,11 @@ struct rw_semaphore;
 
 #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
 #include <linux/rwsem-spinlock.h> /* use a generic implementation */
+#define __RWSEM_INIT_COUNT(name)	.count = RWSEM_UNLOCKED_VALUE
 #else
 /* All arch specific implementations share the same struct */
 struct rw_semaphore {
-	long count;
+	atomic_long_t count;
 	struct list_head wait_list;
 	raw_spinlock_t wait_lock;
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
@@ -54,9 +55,10 @@ extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
 /* In all implementations count != 0 means locked */
 static inline int rwsem_is_locked(struct rw_semaphore *sem)
 {
-	return sem->count != 0;
+	return atomic_long_read(&sem->count) != 0;
 }
 
+#define __RWSEM_INIT_COUNT(name)	.count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
 #endif
 
 /* Common initializer macros and functions */
@@ -74,7 +76,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 #endif
 
 #define __RWSEM_INITIALIZER(name)				\
-	{ .count = RWSEM_UNLOCKED_VALUE,			\
+	{ __RWSEM_INIT_COUNT(name),				\
 	  .wait_list = LIST_HEAD_INIT((name).wait_list),	\
 	  .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock)	\
 	  __RWSEM_OPT_INIT(name)				\
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index b957da7fcb19..63b40a5c62ec 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -80,7 +80,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
 	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
 	lockdep_init_map(&sem->dep_map, name, key, 0);
 #endif
-	sem->count = RWSEM_UNLOCKED_VALUE;
+	atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
 	raw_spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
@@ -153,10 +153,11 @@ __rwsem_mark_wake(struct rw_semaphore *sem,
 	if (wake_type != RWSEM_WAKE_READ_OWNED) {
 		adjustment = RWSEM_ACTIVE_READ_BIAS;
  try_reader_grant:
-		oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
+		oldcount = atomic_long_add_return(adjustment, &sem->count) - adjustment;
+
 		if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
 			/* A writer stole the lock. Undo our reader grant. */
-			if (rwsem_atomic_update(-adjustment, sem) &
+			if (atomic_long_sub_return(adjustment, &sem->count) &
 						RWSEM_ACTIVE_MASK)
 				goto out;
 			/* Last active locker left. Retry waking readers. */
@@ -186,7 +187,7 @@ __rwsem_mark_wake(struct rw_semaphore *sem,
 		adjustment -= RWSEM_WAITING_BIAS;
 
 	if (adjustment)
-		rwsem_atomic_add(adjustment, sem);
+		atomic_long_add(adjustment, &sem->count);
 
 	next = sem->wait_list.next;
 	loop = woken;
@@ -233,7 +234,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 	list_add_tail(&waiter.list, &sem->wait_list);
 
 	/* we're now waiting on the lock, but no longer actively locking */
-	count = rwsem_atomic_update(adjustment, sem);
+	count = atomic_long_add_return(adjustment, &sem->count);
 
 	/* If there are no active locks, wake the front queued process(es).
 	 *
@@ -282,7 +283,8 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
 			RWSEM_ACTIVE_WRITE_BIAS :
 			RWSEM_ACTIVE_WRITE_BIAS + RWSEM_WAITING_BIAS;
 
-	if (cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count) == RWSEM_WAITING_BIAS) {
+	if (atomic_long_cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count)
+							== RWSEM_WAITING_BIAS) {
 		rwsem_set_owner(sem);
 		return true;
 	}
@@ -296,13 +298,13 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
  */
 static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
 {
-	long old, count = READ_ONCE(sem->count);
+	long old, count = atomic_long_read(&sem->count);
 
 	while (true) {
 		if (!(count == 0 || count == RWSEM_WAITING_BIAS))
 			return false;
 
-		old = cmpxchg_acquire(&sem->count, count,
+		old = atomic_long_cmpxchg_acquire(&sem->count, count,
 				      count + RWSEM_ACTIVE_WRITE_BIAS);
 		if (old == count) {
 			rwsem_set_owner(sem);
@@ -324,7 +326,7 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
 	rcu_read_lock();
 	owner = READ_ONCE(sem->owner);
 	if (!owner) {
-		long count = READ_ONCE(sem->count);
+		long count = atomic_long_read(&sem->count);
 		/*
 		 * If sem->owner is not set, yet we have just recently entered the
 		 * slowpath with the lock being active, then there is a possibility
@@ -375,7 +377,7 @@ bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
 	 * held by readers. Check the counter to verify the
 	 * state.
 	 */
-	count = READ_ONCE(sem->count);
+	count = atomic_long_read(&sem->count);
 	return (count == 0 || count == RWSEM_WAITING_BIAS);
 }
 
@@ -460,7 +462,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 	WAKE_Q(wake_q);
 
 	/* undo write bias from down_write operation, stop active locking */
-	count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
+	count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count);
 
 	/* do optimistic spinning and steal lock if possible */
 	if (rwsem_optimistic_spin(sem))
@@ -483,7 +485,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 
 	/* we're now waiting on the lock, but no longer actively locking */
 	if (waiting) {
-		count = READ_ONCE(sem->count);
+		count = atomic_long_read(&sem->count);
 
 		/*
 		 * If there were already threads queued before us and there are
@@ -505,7 +507,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 		}
 
 	} else
-		count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
+		count = atomic_long_add_return(RWSEM_WAITING_BIAS, &sem->count);
 
 	/* wait until we successfully acquire the lock */
 	set_current_state(state);
@@ -521,7 +523,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 
 			schedule();
 			set_current_state(state);
-		} while ((count = sem->count) & RWSEM_ACTIVE_MASK);
+		} while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
 
 		raw_spin_lock_irq(&sem->wait_lock);
 	}
@@ -536,7 +538,7 @@ out_nolock:
 	raw_spin_lock_irq(&sem->wait_lock);
 	list_del(&waiter.list);
 	if (list_empty(&sem->wait_list))
-		rwsem_atomic_update(-RWSEM_WAITING_BIAS, sem);
+		atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
 	else
 		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 	raw_spin_unlock_irq(&sem->wait_lock);
-- 
cgit v1.2.3


From d157bd860f1c828593730dca594d0ce51956833b Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hpe.com>
Date: Mon, 16 May 2016 17:38:02 -0700
Subject: locking/rwsem: Remove rwsem_atomic_add() and rwsem_atomic_update()

The rwsem-xadd count has been converted to an atomic variable and the
rwsem code now directly uses atomic_long_add() and
atomic_long_add_return(), so we can remove the arch implementations of
rwsem_atomic_add() and rwsem_atomic_update().

Signed-off-by: Jason Low <jason.low2@hpe.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Lameter <cl@linux.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Jason Low <jason.low2@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Terry Rudd <terry.rudd@hpe.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Waiman Long <Waiman.Long@hpe.com>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/rwsem.h | 42 ------------------------------------------
 arch/ia64/include/asm/rwsem.h  |  7 -------
 arch/s390/include/asm/rwsem.h  | 37 -------------------------------------
 arch/x86/include/asm/rwsem.h   | 18 ------------------
 include/asm-generic/rwsem.h    | 16 ----------------
 5 files changed, 120 deletions(-)

(limited to 'arch')

diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
index b40021aabb9f..77873d0ad293 100644
--- a/arch/alpha/include/asm/rwsem.h
+++ b/arch/alpha/include/asm/rwsem.h
@@ -191,47 +191,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 		rwsem_downgrade_wake(sem);
 }
 
-static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem)
-{
-#ifndef	CONFIG_SMP
-	sem->count += val;
-#else
-	long temp;
-	__asm__ __volatile__(
-	"1:	ldq_l	%0,%1\n"
-	"	addq	%0,%2,%0\n"
-	"	stq_c	%0,%1\n"
-	"	beq	%0,2f\n"
-	".subsection 2\n"
-	"2:	br	1b\n"
-	".previous"
-	:"=&r" (temp), "=m" (sem->count)
-	:"Ir" (val), "m" (sem->count));
-#endif
-}
-
-static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
-{
-#ifndef	CONFIG_SMP
-	sem->count += val;
-	return sem->count;
-#else
-	long ret, temp;
-	__asm__ __volatile__(
-	"1:	ldq_l	%0,%1\n"
-	"	addq 	%0,%3,%2\n"
-	"	addq	%0,%3,%0\n"
-	"	stq_c	%2,%1\n"
-	"	beq	%2,2f\n"
-	".subsection 2\n"
-	"2:	br	1b\n"
-	".previous"
-	:"=&r" (ret), "=m" (sem->count), "=&r" (temp)
-	:"Ir" (val), "m" (sem->count));
-
-	return ret;
-#endif
-}
-
 #endif /* __KERNEL__ */
 #endif /* _ALPHA_RWSEM_H */
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index c5d544f188ed..8fa98dd303b4 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -151,11 +151,4 @@ __downgrade_write (struct rw_semaphore *sem)
 		rwsem_downgrade_wake(sem);
 }
 
-/*
- * Implement atomic add functionality.  These used to be "inline" functions, but GCC v3.1
- * doesn't quite optimize this stuff right and ends up with bad calls to fetchandadd.
- */
-#define rwsem_atomic_add(delta, sem)	atomic64_add(delta, (atomic64_t *)(&(sem)->count))
-#define rwsem_atomic_update(delta, sem)	atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
-
 #endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index c75e4471e618..597e7e96b59e 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -207,41 +207,4 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 		rwsem_downgrade_wake(sem);
 }
 
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
-	signed long old, new;
-
-	asm volatile(
-		"	lg	%0,%2\n"
-		"0:	lgr	%1,%0\n"
-		"	agr	%1,%4\n"
-		"	csg	%0,%1,%2\n"
-		"	jl	0b"
-		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
-		: "Q" (sem->count), "d" (delta)
-		: "cc", "memory");
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
-	signed long old, new;
-
-	asm volatile(
-		"	lg	%0,%2\n"
-		"0:	lgr	%1,%0\n"
-		"	agr	%1,%4\n"
-		"	csg	%0,%1,%2\n"
-		"	jl	0b"
-		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
-		: "Q" (sem->count), "d" (delta)
-		: "cc", "memory");
-	return new;
-}
-
 #endif /* _S390_RWSEM_H */
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index 453744c1d347..089ced4edbbc 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -213,23 +213,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 		     : "memory", "cc");
 }
 
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
-	asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
-		     : "+m" (sem->count)
-		     : "er" (delta));
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
-	return delta + xadd(&sem->count, delta);
-}
-
 #endif /* __KERNEL__ */
 #endif /* _ASM_X86_RWSEM_H */
diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
index a3a93eca766c..5be122e3d326 100644
--- a/include/asm-generic/rwsem.h
+++ b/include/asm-generic/rwsem.h
@@ -106,14 +106,6 @@ static inline void __up_write(struct rw_semaphore *sem)
 		rwsem_wake(sem);
 }
 
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
-	atomic_long_add(delta, (atomic_long_t *)&sem->count);
-}
-
 /*
  * downgrade write lock to read lock
  */
@@ -134,13 +126,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 		rwsem_downgrade_wake(sem);
 }
 
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
-	return atomic_long_add_return(delta, (atomic_long_t *)&sem->count);
-}
-
 #endif	/* __KERNEL__ */
 #endif	/* _ASM_GENERIC_RWSEM_H */
-- 
cgit v1.2.3


From 6428671bae97caa7040e24e79e969fd87908f4f3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 1 Jun 2016 20:58:15 +0200
Subject: locking/mutex: Optimize mutex_trylock() fast-path

A while back Viro posted a number of 'interesting' mutex_is_locked()
users on IRC, one of those was RCU.

RCU seems to use mutex_is_locked() to avoid doing mutex_trylock(), the
regular load before modify pattern.

While the use isn't wrong per se, its curious in that its needed at all,
mutex_trylock() should be good enough on its own to avoid the pointless
cacheline bounces.

So fix those and remove the mutex_is_locked() (ab)use from RCU.

Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Paul McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Davidlohr Bueso <dave@stgolabs.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <Waiman.Long@hpe.com>
Link: http://lkml.kernel.org/r/20160601185815.GW3190@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/ia64/include/asm/mutex.h    | 2 +-
 arch/powerpc/include/asm/mutex.h | 2 +-
 arch/x86/include/asm/mutex_32.h  | 2 +-
 arch/x86/include/asm/mutex_64.h  | 6 +++---
 include/asm-generic/mutex-dec.h  | 2 +-
 include/asm-generic/mutex-xchg.h | 6 +++++-
 kernel/rcu/tree.c                | 1 -
 7 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h
index f41e66d65e31..28cb819e0ff9 100644
--- a/arch/ia64/include/asm/mutex.h
+++ b/arch/ia64/include/asm/mutex.h
@@ -82,7 +82,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-	if (cmpxchg_acq(count, 1, 0) == 1)
+	if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1)
 		return 1;
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h
index 127ab23e1f6c..078155fa1189 100644
--- a/arch/powerpc/include/asm/mutex.h
+++ b/arch/powerpc/include/asm/mutex.h
@@ -124,7 +124,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-	if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1))
+	if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1))
 		return 1;
 	return 0;
 }
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
index 85e6cda45a02..e9355a84fc67 100644
--- a/arch/x86/include/asm/mutex_32.h
+++ b/arch/x86/include/asm/mutex_32.h
@@ -101,7 +101,7 @@ static inline int __mutex_fastpath_trylock(atomic_t *count,
 					   int (*fail_fn)(atomic_t *))
 {
 	/* cmpxchg because it never induces a false contention state. */
-	if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+	if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
 		return 1;
 
 	return 0;
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index 07537a44216e..d9850758464e 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -118,10 +118,10 @@ do {								\
 static inline int __mutex_fastpath_trylock(atomic_t *count,
 					   int (*fail_fn)(atomic_t *))
 {
-	if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+	if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
 		return 1;
-	else
-		return 0;
+
+	return 0;
 }
 
 #endif /* _ASM_X86_MUTEX_64_H */
diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h
index fd694cfd678a..c54829d3de37 100644
--- a/include/asm-generic/mutex-dec.h
+++ b/include/asm-generic/mutex-dec.h
@@ -80,7 +80,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-	if (likely(atomic_cmpxchg_acquire(count, 1, 0) == 1))
+	if (likely(atomic_read(count) == 1 && atomic_cmpxchg_acquire(count, 1, 0) == 1))
 		return 1;
 	return 0;
 }
diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h
index a6b4a7bd6ac9..3269ec4e195f 100644
--- a/include/asm-generic/mutex-xchg.h
+++ b/include/asm-generic/mutex-xchg.h
@@ -91,8 +91,12 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-	int prev = atomic_xchg_acquire(count, 0);
+	int prev;
 
+	if (atomic_read(count) != 1)
+		return 0;
+
+	prev = atomic_xchg_acquire(count, 0);
 	if (unlikely(prev < 0)) {
 		/*
 		 * The lock was marked contended so we must restore that
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c7f1bc4f817c..b7326893221f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3681,7 +3681,6 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
 	if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
 	    (rnp == rnp_root ||
 	     ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
-	    !mutex_is_locked(&rsp->exp_mutex) &&
 	    mutex_trylock(&rsp->exp_mutex))
 		goto fastpath;
 
-- 
cgit v1.2.3


From 2823d4da5d8a0c222747b24eceb65f5b30717d02 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 8 Jun 2016 12:38:37 -0700
Subject: x86, bitops: remove use of "sbb" to return CF

Use SETC instead of SBB to return the value of CF from assembly. Using
SETcc enables uniformity with other flags-returning pieces of assembly
code.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Link: http://lkml.kernel.org/r/1465414726-197858-2-git-send-email-hpa@linux.intel.com
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/bitops.h      | 24 ++++++++++++------------
 arch/x86/include/asm/percpu.h      | 12 ++++++------
 arch/x86/include/asm/signal.h      |  6 +++---
 arch/x86/include/asm/sync_bitops.h | 18 +++++++++---------
 arch/x86/kernel/vm86_32.c          |  5 +----
 5 files changed, 31 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 7766d1cf096e..b2b797d1f49a 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -230,11 +230,11 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr)
  */
 static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
+	unsigned char oldbit;
 
 	asm("bts %2,%1\n\t"
-	    "sbb %0,%0"
-	    : "=r" (oldbit), ADDR
+	    "setc %0"
+	    : "=qm" (oldbit), ADDR
 	    : "Ir" (nr));
 	return oldbit;
 }
@@ -270,11 +270,11 @@ static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *a
  */
 static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
+	unsigned char oldbit;
 
 	asm volatile("btr %2,%1\n\t"
-		     "sbb %0,%0"
-		     : "=r" (oldbit), ADDR
+		     "setc %0"
+		     : "=qm" (oldbit), ADDR
 		     : "Ir" (nr));
 	return oldbit;
 }
@@ -282,11 +282,11 @@ static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long
 /* WARNING: non atomic and it can be reordered! */
 static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
+	unsigned char oldbit;
 
 	asm volatile("btc %2,%1\n\t"
-		     "sbb %0,%0"
-		     : "=r" (oldbit), ADDR
+		     "setc %0"
+		     : "=qm" (oldbit), ADDR
 		     : "Ir" (nr) : "memory");
 
 	return oldbit;
@@ -313,11 +313,11 @@ static __always_inline int constant_test_bit(long nr, const volatile unsigned lo
 
 static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr)
 {
-	int oldbit;
+	unsigned char oldbit;
 
 	asm volatile("bt %2,%1\n\t"
-		     "sbb %0,%0"
-		     : "=r" (oldbit)
+		     "setc %0"
+		     : "=qm" (oldbit)
 		     : "m" (*(unsigned long *)addr), "Ir" (nr));
 
 	return oldbit;
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index e0ba66ca68c6..65039e9571db 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -510,9 +510,9 @@ do {									\
 /* This is not atomic against other CPUs -- CPU preemption needs to be off */
 #define x86_test_and_clear_bit_percpu(bit, var)				\
 ({									\
-	int old__;							\
-	asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0"		\
-		     : "=r" (old__), "+m" (var)				\
+	unsigned char old__;						\
+	asm volatile("btr %2,"__percpu_arg(1)"\n\tsetc %0"		\
+		     : "=qm" (old__), "+m" (var)			\
 		     : "dIr" (bit));					\
 	old__;								\
 })
@@ -532,11 +532,11 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
 static inline int x86_this_cpu_variable_test_bit(int nr,
                         const unsigned long __percpu *addr)
 {
-	int oldbit;
+	unsigned char oldbit;
 
 	asm volatile("bt "__percpu_arg(2)",%1\n\t"
-			"sbb %0,%0"
-			: "=r" (oldbit)
+			"setc %0"
+			: "=qm" (oldbit)
 			: "m" (*(unsigned long *)addr), "Ir" (nr));
 
 	return oldbit;
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 2138c9ae19ee..dd1e7d6387ab 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -81,9 +81,9 @@ static inline int __const_sigismember(sigset_t *set, int _sig)
 
 static inline int __gen_sigismember(sigset_t *set, int _sig)
 {
-	int ret;
-	asm("btl %2,%1\n\tsbbl %0,%0"
-	    : "=r"(ret) : "m"(*set), "Ir"(_sig-1) : "cc");
+	unsigned char ret;
+	asm("btl %2,%1\n\tsetc %0"
+	    : "=qm"(ret) : "m"(*set), "Ir"(_sig-1) : "cc");
 	return ret;
 }
 
diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h
index f28a24b51dc7..cbf8847d02a0 100644
--- a/arch/x86/include/asm/sync_bitops.h
+++ b/arch/x86/include/asm/sync_bitops.h
@@ -79,10 +79,10 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr)
  */
 static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
+	unsigned char oldbit;
 
-	asm volatile("lock; bts %2,%1\n\tsbbl %0,%0"
-		     : "=r" (oldbit), "+m" (ADDR)
+	asm volatile("lock; bts %2,%1\n\tsetc %0"
+		     : "=qm" (oldbit), "+m" (ADDR)
 		     : "Ir" (nr) : "memory");
 	return oldbit;
 }
@@ -97,10 +97,10 @@ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
  */
 static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
+	unsigned char oldbit;
 
-	asm volatile("lock; btr %2,%1\n\tsbbl %0,%0"
-		     : "=r" (oldbit), "+m" (ADDR)
+	asm volatile("lock; btr %2,%1\n\tsetc %0"
+		     : "=qm" (oldbit), "+m" (ADDR)
 		     : "Ir" (nr) : "memory");
 	return oldbit;
 }
@@ -115,10 +115,10 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
  */
 static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	int oldbit;
+	unsigned char oldbit;
 
-	asm volatile("lock; btc %2,%1\n\tsbbl %0,%0"
-		     : "=r" (oldbit), "+m" (ADDR)
+	asm volatile("lock; btc %2,%1\n\tsetc %0"
+		     : "=qm" (oldbit), "+m" (ADDR)
 		     : "Ir" (nr) : "memory");
 	return oldbit;
 }
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 3dce1ca0a653..01f30e56f99e 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -440,10 +440,7 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs *regs)
 
 static inline int is_revectored(int nr, struct revectored_struct *bitmap)
 {
-	__asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0"
-		:"=r" (nr)
-		:"m" (*bitmap), "r" (nr));
-	return nr;
+	return test_bit(nr, bitmap->__map);
 }
 
 #define val_byte(val, n) (((__u8 *)&val)[n])
-- 
cgit v1.2.3


From 117780eef7740729e803bdcc0d5f2f48137ea8e3 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 8 Jun 2016 12:38:38 -0700
Subject: x86, asm: use bool for bitops and other assembly outputs

The gcc people have confirmed that using "bool" when combined with
inline assembly always is treated as a byte-sized operand that can be
assumed to be 0 or 1, which is exactly what the SET instruction
emits.  Change the output types and intermediate variables of as many
operations as practical to "bool".

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Link: http://lkml.kernel.org/r/1465414726-197858-3-git-send-email-hpa@linux.intel.com
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/boot/bitops.h             |  8 +++++---
 arch/x86/boot/boot.h               |  8 ++++----
 arch/x86/boot/string.c             |  2 +-
 arch/x86/include/asm/apm.h         |  6 +++---
 arch/x86/include/asm/archrandom.h  | 16 ++++++++--------
 arch/x86/include/asm/atomic.h      |  8 ++++----
 arch/x86/include/asm/atomic64_64.h | 10 +++++-----
 arch/x86/include/asm/bitops.h      | 28 ++++++++++++++--------------
 arch/x86/include/asm/local.h       |  8 ++++----
 arch/x86/include/asm/percpu.h      |  8 ++++----
 arch/x86/include/asm/rmwcc.h       |  4 ++--
 arch/x86/include/asm/rwsem.h       | 17 +++++++++--------
 include/linux/random.h             | 12 ++++++------
 13 files changed, 69 insertions(+), 66 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h
index 878e4b9940d9..0d41d68131cc 100644
--- a/arch/x86/boot/bitops.h
+++ b/arch/x86/boot/bitops.h
@@ -16,14 +16,16 @@
 #define BOOT_BITOPS_H
 #define _LINUX_BITOPS_H		/* Inhibit inclusion of <linux/bitops.h> */
 
-static inline int constant_test_bit(int nr, const void *addr)
+#include <linux/types.h>
+
+static inline bool constant_test_bit(int nr, const void *addr)
 {
 	const u32 *p = (const u32 *)addr;
 	return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0;
 }
-static inline int variable_test_bit(int nr, const void *addr)
+static inline bool variable_test_bit(int nr, const void *addr)
 {
-	u8 v;
+	bool v;
 	const u32 *p = (const u32 *)addr;
 
 	asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr));
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 9011a88353de..2edb2d53c3a2 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -176,16 +176,16 @@ static inline void wrgs32(u32 v, addr_t addr)
 }
 
 /* Note: these only return true/false, not a signed return value! */
-static inline int memcmp_fs(const void *s1, addr_t s2, size_t len)
+static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len)
 {
-	u8 diff;
+	bool diff;
 	asm volatile("fs; repe; cmpsb; setnz %0"
 		     : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
 	return diff;
 }
-static inline int memcmp_gs(const void *s1, addr_t s2, size_t len)
+static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len)
 {
-	u8 diff;
+	bool diff;
 	asm volatile("gs; repe; cmpsb; setnz %0"
 		     : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
 	return diff;
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 318b8465d302..cc3bd583dce1 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -17,7 +17,7 @@
 
 int memcmp(const void *s1, const void *s2, size_t len)
 {
-	u8 diff;
+	bool diff;
 	asm("repe; cmpsb; setnz %0"
 	    : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
 	return diff;
diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
index 20370c6db74b..93eebc636c76 100644
--- a/arch/x86/include/asm/apm.h
+++ b/arch/x86/include/asm/apm.h
@@ -45,11 +45,11 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
 		: "memory", "cc");
 }
 
-static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
-						u32 ecx_in, u32 *eax)
+static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
+					    u32 ecx_in, u32 *eax)
 {
 	int	cx, dx, si;
-	u8	error;
+	bool	error;
 
 	/*
 	 * N.B. We do NOT need a cld after the BIOS call
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 69f1366f1aa3..ab6f599ce2fd 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -43,7 +43,7 @@
 #ifdef CONFIG_ARCH_RANDOM
 
 /* Instead of arch_get_random_long() when alternatives haven't run. */
-static inline int rdrand_long(unsigned long *v)
+static inline bool rdrand_long(unsigned long *v)
 {
 	int ok;
 	asm volatile("1: " RDRAND_LONG "\n\t"
@@ -53,13 +53,13 @@ static inline int rdrand_long(unsigned long *v)
 		     "2:"
 		     : "=r" (ok), "=a" (*v)
 		     : "0" (RDRAND_RETRY_LOOPS));
-	return ok;
+	return !!ok;
 }
 
 /* A single attempt at RDSEED */
 static inline bool rdseed_long(unsigned long *v)
 {
-	unsigned char ok;
+	bool ok;
 	asm volatile(RDSEED_LONG "\n\t"
 		     "setc %0"
 		     : "=qm" (ok), "=a" (*v));
@@ -67,7 +67,7 @@ static inline bool rdseed_long(unsigned long *v)
 }
 
 #define GET_RANDOM(name, type, rdrand, nop)			\
-static inline int name(type *v)					\
+static inline bool name(type *v)				\
 {								\
 	int ok;							\
 	alternative_io("movl $0, %0\n\t"			\
@@ -80,13 +80,13 @@ static inline int name(type *v)					\
 		       X86_FEATURE_RDRAND,                      \
 		       ASM_OUTPUT2("=r" (ok), "=a" (*v)),       \
 		       "0" (RDRAND_RETRY_LOOPS));		\
-	return ok;						\
+	return !!ok;						\
 }
 
 #define GET_SEED(name, type, rdseed, nop)			\
-static inline int name(type *v)					\
+static inline bool name(type *v)				\
 {								\
-	unsigned char ok;					\
+	bool ok;						\
 	alternative_io("movb $0, %0\n\t"			\
 		       nop,					\
 		       rdseed "\n\t"				\
@@ -119,7 +119,7 @@ GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4);
 
 #else
 
-static inline int rdrand_long(unsigned long *v)
+static inline bool rdrand_long(unsigned long *v)
 {
 	return 0;
 }
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 3e8674288198..17d881248e6c 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -75,7 +75,7 @@ static __always_inline void atomic_sub(int i, atomic_t *v)
  * true if the result is zero, or false for all
  * other cases.
  */
-static __always_inline int atomic_sub_and_test(int i, atomic_t *v)
+static __always_inline bool atomic_sub_and_test(int i, atomic_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e");
 }
@@ -112,7 +112,7 @@ static __always_inline void atomic_dec(atomic_t *v)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static __always_inline int atomic_dec_and_test(atomic_t *v)
+static __always_inline bool atomic_dec_and_test(atomic_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
 }
@@ -125,7 +125,7 @@ static __always_inline int atomic_dec_and_test(atomic_t *v)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static __always_inline int atomic_inc_and_test(atomic_t *v)
+static __always_inline bool atomic_inc_and_test(atomic_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e");
 }
@@ -139,7 +139,7 @@ static __always_inline int atomic_inc_and_test(atomic_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static __always_inline int atomic_add_negative(int i, atomic_t *v)
+static __always_inline bool atomic_add_negative(int i, atomic_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s");
 }
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 037351022f54..4f881d7f0c39 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -70,7 +70,7 @@ static inline void atomic64_sub(long i, atomic64_t *v)
  * true if the result is zero, or false for all
  * other cases.
  */
-static inline int atomic64_sub_and_test(long i, atomic64_t *v)
+static inline bool atomic64_sub_and_test(long i, atomic64_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", "e");
 }
@@ -109,7 +109,7 @@ static __always_inline void atomic64_dec(atomic64_t *v)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static inline int atomic64_dec_and_test(atomic64_t *v)
+static inline bool atomic64_dec_and_test(atomic64_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e");
 }
@@ -122,7 +122,7 @@ static inline int atomic64_dec_and_test(atomic64_t *v)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static inline int atomic64_inc_and_test(atomic64_t *v)
+static inline bool atomic64_inc_and_test(atomic64_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e");
 }
@@ -136,7 +136,7 @@ static inline int atomic64_inc_and_test(atomic64_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static inline int atomic64_add_negative(long i, atomic64_t *v)
+static inline bool atomic64_add_negative(long i, atomic64_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", "s");
 }
@@ -180,7 +180,7 @@ static inline long atomic64_xchg(atomic64_t *v, long new)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
+static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
 {
 	long c, old;
 	c = atomic64_read(v);
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index b2b797d1f49a..8cbb7f495546 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -201,7 +201,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");
 }
@@ -213,7 +213,7 @@ static __always_inline int test_and_set_bit(long nr, volatile unsigned long *add
  *
  * This is the same as test_and_set_bit on x86.
  */
-static __always_inline int
+static __always_inline bool
 test_and_set_bit_lock(long nr, volatile unsigned long *addr)
 {
 	return test_and_set_bit(nr, addr);
@@ -228,9 +228,9 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr)
  * If two examples of this operation race, one can appear to succeed
  * but actually fail.  You must protect multiple accesses with a lock.
  */
-static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
+	bool oldbit;
 
 	asm("bts %2,%1\n\t"
 	    "setc %0"
@@ -247,7 +247,7 @@ static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *a
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");
 }
@@ -268,9 +268,9 @@ static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *a
  * accessed from a hypervisor on the same CPU if running in a VM: don't change
  * this without also updating arch/x86/kernel/kvm.c
  */
-static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
+	bool oldbit;
 
 	asm volatile("btr %2,%1\n\t"
 		     "setc %0"
@@ -280,9 +280,9 @@ static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long
 }
 
 /* WARNING: non atomic and it can be reordered! */
-static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	unsigned char oldbit;
+	bool oldbit;
 
 	asm volatile("btc %2,%1\n\t"
 		     "setc %0"
@@ -300,20 +300,20 @@ static __always_inline int __test_and_change_bit(long nr, volatile unsigned long
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr)
+static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");
 }
 
-static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr)
+static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
 {
 	return ((1UL << (nr & (BITS_PER_LONG-1))) &
 		(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
 }
 
-static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr)
+static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr)
 {
-	unsigned char oldbit;
+	bool oldbit;
 
 	asm volatile("bt %2,%1\n\t"
 		     "setc %0"
@@ -329,7 +329,7 @@ static __always_inline int variable_test_bit(long nr, volatile const unsigned lo
  * @nr: bit number to test
  * @addr: Address to start counting from
  */
-static int test_bit(int nr, const volatile unsigned long *addr);
+static bool test_bit(int nr, const volatile unsigned long *addr);
 #endif
 
 #define test_bit(nr, addr)			\
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 4ad6560847b1..0cdc65b0d14d 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -50,7 +50,7 @@ static inline void local_sub(long i, local_t *l)
  * true if the result is zero, or false for all
  * other cases.
  */
-static inline int local_sub_and_test(long i, local_t *l)
+static inline bool local_sub_and_test(long i, local_t *l)
 {
 	GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", "e");
 }
@@ -63,7 +63,7 @@ static inline int local_sub_and_test(long i, local_t *l)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static inline int local_dec_and_test(local_t *l)
+static inline bool local_dec_and_test(local_t *l)
 {
 	GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e");
 }
@@ -76,7 +76,7 @@ static inline int local_dec_and_test(local_t *l)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static inline int local_inc_and_test(local_t *l)
+static inline bool local_inc_and_test(local_t *l)
 {
 	GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e");
 }
@@ -90,7 +90,7 @@ static inline int local_inc_and_test(local_t *l)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static inline int local_add_negative(long i, local_t *l)
+static inline bool local_add_negative(long i, local_t *l)
 {
 	GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", "s");
 }
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 65039e9571db..184d7f3ecb9f 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -510,14 +510,14 @@ do {									\
 /* This is not atomic against other CPUs -- CPU preemption needs to be off */
 #define x86_test_and_clear_bit_percpu(bit, var)				\
 ({									\
-	unsigned char old__;						\
+	bool old__;							\
 	asm volatile("btr %2,"__percpu_arg(1)"\n\tsetc %0"		\
 		     : "=qm" (old__), "+m" (var)			\
 		     : "dIr" (bit));					\
 	old__;								\
 })
 
-static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
+static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr,
                         const unsigned long __percpu *addr)
 {
 	unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
@@ -529,10 +529,10 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
 #endif
 }
 
-static inline int x86_this_cpu_variable_test_bit(int nr,
+static inline bool x86_this_cpu_variable_test_bit(int nr,
                         const unsigned long __percpu *addr)
 {
-	unsigned char oldbit;
+	bool oldbit;
 
 	asm volatile("bt "__percpu_arg(2)",%1\n\t"
 			"setc %0"
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 8f7866a5b9a4..a15b73d90be3 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -23,11 +23,11 @@ cc_label:								\
 
 #define __GEN_RMWcc(fullop, var, cc, ...)				\
 do {									\
-	char c;								\
+	bool c;								\
 	asm volatile (fullop "; set" cc " %1"				\
 			: "+m" (var), "=qm" (c)				\
 			: __VA_ARGS__ : "memory");			\
-	return c != 0;							\
+	return c;							\
 } while (0)
 
 #define GEN_UNARY_RMWcc(op, var, arg0, cc)				\
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index 453744c1d347..c5087706c02e 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -77,7 +77,7 @@ static inline void __down_read(struct rw_semaphore *sem)
 /*
  * trylock for reading -- returns 1 if successful, 0 if contention
  */
-static inline int __down_read_trylock(struct rw_semaphore *sem)
+static inline bool __down_read_trylock(struct rw_semaphore *sem)
 {
 	long result, tmp;
 	asm volatile("# beginning __down_read_trylock\n\t"
@@ -93,7 +93,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 		     : "+m" (sem->count), "=&a" (result), "=&r" (tmp)
 		     : "i" (RWSEM_ACTIVE_READ_BIAS)
 		     : "memory", "cc");
-	return result >= 0 ? 1 : 0;
+	return result >= 0;
 }
 
 /*
@@ -134,9 +134,10 @@ static inline int __down_write_killable(struct rw_semaphore *sem)
 /*
  * trylock for writing -- returns 1 if successful, 0 if contention
  */
-static inline int __down_write_trylock(struct rw_semaphore *sem)
+static inline bool __down_write_trylock(struct rw_semaphore *sem)
 {
-	long result, tmp;
+	bool result;
+	long tmp0, tmp1;
 	asm volatile("# beginning __down_write_trylock\n\t"
 		     "  mov          %0,%1\n\t"
 		     "1:\n\t"
@@ -144,14 +145,14 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
 		     /* was the active mask 0 before? */
 		     "  jnz          2f\n\t"
 		     "  mov          %1,%2\n\t"
-		     "  add          %3,%2\n\t"
+		     "  add          %4,%2\n\t"
 		     LOCK_PREFIX "  cmpxchg  %2,%0\n\t"
 		     "  jnz	     1b\n\t"
 		     "2:\n\t"
-		     "  sete         %b1\n\t"
-		     "  movzbl       %b1, %k1\n\t"
+		     "  sete         %3\n\t"
 		     "# ending __down_write_trylock\n\t"
-		     : "+m" (sem->count), "=&a" (result), "=&r" (tmp)
+		     : "+m" (sem->count), "=&a" (tmp0), "=&r" (tmp1),
+		       "=qm" (result)
 		     : "er" (RWSEM_ACTIVE_WRITE_BIAS)
 		     : "memory", "cc");
 	return result;
diff --git a/include/linux/random.h b/include/linux/random.h
index e47e533742b5..3d6e9815cd85 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -95,27 +95,27 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
 #ifdef CONFIG_ARCH_RANDOM
 # include <asm/archrandom.h>
 #else
-static inline int arch_get_random_long(unsigned long *v)
+static inline bool arch_get_random_long(unsigned long *v)
 {
 	return 0;
 }
-static inline int arch_get_random_int(unsigned int *v)
+static inline bool arch_get_random_int(unsigned int *v)
 {
 	return 0;
 }
-static inline int arch_has_random(void)
+static inline bool arch_has_random(void)
 {
 	return 0;
 }
-static inline int arch_get_random_seed_long(unsigned long *v)
+static inline bool arch_get_random_seed_long(unsigned long *v)
 {
 	return 0;
 }
-static inline int arch_get_random_seed_int(unsigned int *v)
+static inline bool arch_get_random_seed_int(unsigned int *v)
 {
 	return 0;
 }
-static inline int arch_has_random_seed(void)
+static inline bool arch_has_random_seed(void)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 18fe58229d80c7f4f138a07e84ba608e1ebd232b Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 8 Jun 2016 12:38:39 -0700
Subject: x86, asm: change the GEN_*_RMWcc() macros to not quote the condition

Change the lexical defintion of the GEN_*_RMWcc() macros to not take
the condition code as a quoted string.  This will help support
changing them to use the new __GCC_ASM_FLAG_OUTPUTS__ feature in a
subsequent patch.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Link: http://lkml.kernel.org/r/1465414726-197858-4-git-send-email-hpa@linux.intel.com
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/atomic.h      | 8 ++++----
 arch/x86/include/asm/atomic64_64.h | 8 ++++----
 arch/x86/include/asm/bitops.h      | 6 +++---
 arch/x86/include/asm/local.h       | 8 ++++----
 arch/x86/include/asm/preempt.h     | 2 +-
 arch/x86/include/asm/rmwcc.h       | 4 ++--
 6 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 17d881248e6c..7322c1566f63 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -77,7 +77,7 @@ static __always_inline void atomic_sub(int i, atomic_t *v)
  */
 static __always_inline bool atomic_sub_and_test(int i, atomic_t *v)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
 }
 
 /**
@@ -114,7 +114,7 @@ static __always_inline void atomic_dec(atomic_t *v)
  */
 static __always_inline bool atomic_dec_and_test(atomic_t *v)
 {
-	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
+	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
 }
 
 /**
@@ -127,7 +127,7 @@ static __always_inline bool atomic_dec_and_test(atomic_t *v)
  */
 static __always_inline bool atomic_inc_and_test(atomic_t *v)
 {
-	GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e");
+	GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
 }
 
 /**
@@ -141,7 +141,7 @@ static __always_inline bool atomic_inc_and_test(atomic_t *v)
  */
 static __always_inline bool atomic_add_negative(int i, atomic_t *v)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
 }
 
 /**
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 4f881d7f0c39..57bf925710d9 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -72,7 +72,7 @@ static inline void atomic64_sub(long i, atomic64_t *v)
  */
 static inline bool atomic64_sub_and_test(long i, atomic64_t *v)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", "e");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
 }
 
 /**
@@ -111,7 +111,7 @@ static __always_inline void atomic64_dec(atomic64_t *v)
  */
 static inline bool atomic64_dec_and_test(atomic64_t *v)
 {
-	GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e");
+	GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
 }
 
 /**
@@ -124,7 +124,7 @@ static inline bool atomic64_dec_and_test(atomic64_t *v)
  */
 static inline bool atomic64_inc_and_test(atomic64_t *v)
 {
-	GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e");
+	GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
 }
 
 /**
@@ -138,7 +138,7 @@ static inline bool atomic64_inc_and_test(atomic64_t *v)
  */
 static inline bool atomic64_add_negative(long i, atomic64_t *v)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", "s");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
 }
 
 /**
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 8cbb7f495546..ed8f4851262f 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -203,7 +203,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
  */
 static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c);
 }
 
 /**
@@ -249,7 +249,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
  */
 static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c);
 }
 
 /**
@@ -302,7 +302,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
  */
 static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
 {
-	GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");
+	GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c);
 }
 
 static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 0cdc65b0d14d..7511978093eb 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -52,7 +52,7 @@ static inline void local_sub(long i, local_t *l)
  */
 static inline bool local_sub_and_test(long i, local_t *l)
 {
-	GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", "e");
+	GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", e);
 }
 
 /**
@@ -65,7 +65,7 @@ static inline bool local_sub_and_test(long i, local_t *l)
  */
 static inline bool local_dec_and_test(local_t *l)
 {
-	GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e");
+	GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", e);
 }
 
 /**
@@ -78,7 +78,7 @@ static inline bool local_dec_and_test(local_t *l)
  */
 static inline bool local_inc_and_test(local_t *l)
 {
-	GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e");
+	GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", e);
 }
 
 /**
@@ -92,7 +92,7 @@ static inline bool local_inc_and_test(local_t *l)
  */
 static inline bool local_add_negative(long i, local_t *l)
 {
-	GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", "s");
+	GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", s);
 }
 
 /**
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index d397deb58146..17f218645701 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -81,7 +81,7 @@ static __always_inline void __preempt_count_sub(int val)
  */
 static __always_inline bool __preempt_count_dec_and_test(void)
 {
-	GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e");
+	GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e);
 }
 
 /*
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index a15b73d90be3..e3264c414c4a 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -5,7 +5,7 @@
 
 #define __GEN_RMWcc(fullop, var, cc, ...)				\
 do {									\
-	asm_volatile_goto (fullop "; j" cc " %l[cc_label]"		\
+	asm_volatile_goto (fullop "; j" #cc " %l[cc_label]"		\
 			: : "m" (var), ## __VA_ARGS__ 			\
 			: "memory" : cc_label);				\
 	return 0;							\
@@ -24,7 +24,7 @@ cc_label:								\
 #define __GEN_RMWcc(fullop, var, cc, ...)				\
 do {									\
 	bool c;								\
-	asm volatile (fullop "; set" cc " %1"				\
+	asm volatile (fullop "; set" #cc " %1"				\
 			: "+m" (var), "=qm" (c)				\
 			: __VA_ARGS__ : "memory");			\
 	return c;							\
-- 
cgit v1.2.3


From ff3554b409b82d349f71e9d7082648b7b0a1a5bb Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 8 Jun 2016 12:38:40 -0700
Subject: x86, asm: define CC_SET() and CC_OUT() macros

The CC_SET() and CC_OUT() macros can be used together to take
advantage of the new __GCC_ASM_FLAG_OUTPUTS__ feature in gcc 6+ while
remaining backwards compatible.  CC_SET() generates a SET instruction
on older compilers; CC_OUT() makes sure the output is received in the
correct variable.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Link: http://lkml.kernel.org/r/1465414726-197858-5-git-send-email-hpa@linux.intel.com
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/asm.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index f5063b6659eb..7acb51c49fec 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -42,6 +42,18 @@
 #define _ASM_SI		__ASM_REG(si)
 #define _ASM_DI		__ASM_REG(di)
 
+/*
+ * Macros to generate condition code outputs from inline assembly,
+ * The output operand must be type "bool".
+ */
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+# define CC_SET(c) "\n\t/* output condition code " #c "*/\n"
+# define CC_OUT(c) "=@cc" #c
+#else
+# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n"
+# define CC_OUT(c) [_cc_ ## c] "=qm"
+#endif
+
 /* Exception table entry */
 #ifdef __ASSEMBLY__
 # define _ASM_EXTABLE_HANDLE(from, to, handler)			\
-- 
cgit v1.2.3


From ba741e356c49bfce0adcfa851080666870867f6b Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 8 Jun 2016 12:38:41 -0700
Subject: x86, asm: change GEN_*_RMWcc() to use CC_SET()/CC_OUT()

Change the GEN_*_RMWcc() macros to use the CC_SET()/CC_OUT() macros
defined in <asm/asm.h>, and disable the use of asm goto if
__GCC_ASM_FLAG_OUTPUTS__ is enabled.  This allows gcc to receive the
flags output directly in gcc 6+.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Link: http://lkml.kernel.org/r/1465414726-197858-6-git-send-email-hpa@linux.intel.com
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/rmwcc.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index e3264c414c4a..661dd305694a 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -1,7 +1,9 @@
 #ifndef _ASM_X86_RMWcc
 #define _ASM_X86_RMWcc
 
-#ifdef CC_HAVE_ASM_GOTO
+#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
+
+/* Use asm goto */
 
 #define __GEN_RMWcc(fullop, var, cc, ...)				\
 do {									\
@@ -19,13 +21,15 @@ cc_label:								\
 #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)			\
 	__GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val))
 
-#else /* !CC_HAVE_ASM_GOTO */
+#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
+
+/* Use flags output or a set instruction */
 
 #define __GEN_RMWcc(fullop, var, cc, ...)				\
 do {									\
 	bool c;								\
-	asm volatile (fullop "; set" #cc " %1"				\
-			: "+m" (var), "=qm" (c)				\
+	asm volatile (fullop ";" CC_SET(cc)				\
+			: "+m" (var), CC_OUT(cc) (c)			\
 			: __VA_ARGS__ : "memory");			\
 	return c;							\
 } while (0)
@@ -36,6 +40,6 @@ do {									\
 #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)			\
 	__GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val))
 
-#endif /* CC_HAVE_ASM_GOTO */
+#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
 
 #endif /* _ASM_X86_RMWcc */
-- 
cgit v1.2.3


From 86b61240d4c233b440cd29daf0baa440daf4a148 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 8 Jun 2016 12:38:42 -0700
Subject: x86, asm: Use CC_SET()/CC_OUT() in <asm/bitops.h>

Remove open-coded uses of set instructions to use CC_SET()/CC_OUT() in
<asm/bitops.h>.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Link: http://lkml.kernel.org/r/1465414726-197858-7-git-send-email-hpa@linux.intel.com
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/bitops.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index ed8f4851262f..68557f52b961 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -233,8 +233,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
 	bool oldbit;
 
 	asm("bts %2,%1\n\t"
-	    "setc %0"
-	    : "=qm" (oldbit), ADDR
+	    CC_SET(c)
+	    : CC_OUT(c) (oldbit), ADDR
 	    : "Ir" (nr));
 	return oldbit;
 }
@@ -273,8 +273,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
 	bool oldbit;
 
 	asm volatile("btr %2,%1\n\t"
-		     "setc %0"
-		     : "=qm" (oldbit), ADDR
+		     CC_SET(c)
+		     : CC_OUT(c) (oldbit), ADDR
 		     : "Ir" (nr));
 	return oldbit;
 }
@@ -285,8 +285,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
 	bool oldbit;
 
 	asm volatile("btc %2,%1\n\t"
-		     "setc %0"
-		     : "=qm" (oldbit), ADDR
+		     CC_SET(c)
+		     : CC_OUT(c) (oldbit), ADDR
 		     : "Ir" (nr) : "memory");
 
 	return oldbit;
@@ -316,8 +316,8 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
 	bool oldbit;
 
 	asm volatile("bt %2,%1\n\t"
-		     "setc %0"
-		     : "=qm" (oldbit)
+		     CC_SET(c)
+		     : CC_OUT(c) (oldbit)
 		     : "m" (*(unsigned long *)addr), "Ir" (nr));
 
 	return oldbit;
-- 
cgit v1.2.3


From 64be6d36f5674f3424d1901772f76e21874f4954 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 8 Jun 2016 12:38:43 -0700
Subject: x86, asm: Use CC_SET()/CC_OUT() in <asm/percpu.h>

Remove open-coded uses of set instructions to use CC_SET()/CC_OUT() in
<asm/percpu.h>.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Link: http://lkml.kernel.org/r/1465414726-197858-8-git-send-email-hpa@linux.intel.com
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/percpu.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 184d7f3ecb9f..e02e3f80d363 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -511,8 +511,9 @@ do {									\
 #define x86_test_and_clear_bit_percpu(bit, var)				\
 ({									\
 	bool old__;							\
-	asm volatile("btr %2,"__percpu_arg(1)"\n\tsetc %0"		\
-		     : "=qm" (old__), "+m" (var)			\
+	asm volatile("btr %2,"__percpu_arg(1)"\n\t"			\
+		     CC_SET(c)						\
+		     : CC_OUT(c) (old__), "+m" (var)			\
 		     : "dIr" (bit));					\
 	old__;								\
 })
@@ -535,8 +536,8 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
 	bool oldbit;
 
 	asm volatile("bt "__percpu_arg(2)",%1\n\t"
-			"setc %0"
-			: "=qm" (oldbit)
+			CC_SET(c)
+			: CC_OUT(c) (oldbit)
 			: "m" (*(unsigned long *)addr), "Ir" (nr));
 
 	return oldbit;
-- 
cgit v1.2.3


From 35ccfb7114e2f0f454f264c049b03c31f4c6bbc0 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Wed, 8 Jun 2016 12:38:44 -0700
Subject: x86, asm: Use CC_SET()/CC_OUT() in <asm/rwsem.h>

Remove open-coded uses of set instructions to use CC_SET()/CC_OUT() in
<asm/rwsem.h>.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Link: http://lkml.kernel.org/r/1465414726-197858-9-git-send-email-hpa@linux.intel.com
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/rwsem.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index c5087706c02e..1e8be263065e 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -149,10 +149,10 @@ static inline bool __down_write_trylock(struct rw_semaphore *sem)
 		     LOCK_PREFIX "  cmpxchg  %2,%0\n\t"
 		     "  jnz	     1b\n\t"
 		     "2:\n\t"
-		     "  sete         %3\n\t"
+		     CC_SET(e)
 		     "# ending __down_write_trylock\n\t"
 		     : "+m" (sem->count), "=&a" (tmp0), "=&r" (tmp1),
-		       "=qm" (result)
+		       CC_OUT(e) (result)
 		     : "er" (RWSEM_ACTIVE_WRITE_BIAS)
 		     : "memory", "cc");
 	return result;
-- 
cgit v1.2.3


From 66928b4eb92dfb6d87c204238057b9278b36452b Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Wed, 8 Jun 2016 12:38:45 -0700
Subject: x86, asm, boot: Use CC_SET()/CC_OUT() in arch/x86/boot/boot.h

Remove open-coded uses of set instructions to use CC_SET()/CC_OUT() in
arch/x86/boot/boot.h.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1465414726-197858-10-git-send-email-hpa@linux.intel.com
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/boot/boot.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 2edb2d53c3a2..7c1495f2b799 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -24,6 +24,7 @@
 #include <linux/types.h>
 #include <linux/edd.h>
 #include <asm/setup.h>
+#include <asm/asm.h>
 #include "bitops.h"
 #include "ctype.h"
 #include "cpuflags.h"
@@ -179,15 +180,15 @@ static inline void wrgs32(u32 v, addr_t addr)
 static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len)
 {
 	bool diff;
-	asm volatile("fs; repe; cmpsb; setnz %0"
-		     : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
+	asm volatile("fs; repe; cmpsb" CC_SET(nz)
+		     : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
 	return diff;
 }
 static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len)
 {
 	bool diff;
-	asm volatile("gs; repe; cmpsb; setnz %0"
-		     : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
+	asm volatile("gs; repe; cmpsb" CC_SET(nz)
+		     : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
 	return diff;
 }
 
-- 
cgit v1.2.3


From 3b290398638ee4e57f1fb2e35c02005cba9a737f Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Wed, 8 Jun 2016 12:38:46 -0700
Subject: x86, asm: Use CC_SET()/CC_OUT() and static_cpu_has() in archrandom.h

Use CC_SET()/CC_OUT() and static_cpu_has().  This produces code good
enough to eliminate ad hoc use of alternatives in <asm/archrandom.h>,
greatly simplifying the code.

While we are at it, make x86_init_rdrand() compile out completely if
we don't need it.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Link: http://lkml.kernel.org/r/1465414726-197858-11-git-send-email-hpa@linux.intel.com

v2: fix a conflict between <linux/random.h> and <asm/archrandom.h>
    discovered by Ingo Molnar.  There are a few places in x86-specific
    code where we need all of <arch/archrandom.h> even when
    CONFIG_ARCH_RANDOM is disabled, so <linux/random.h> does not
    suffice.
---
 arch/x86/include/asm/archrandom.h | 128 ++++++++++++++++++--------------------
 arch/x86/kernel/cpu/rdrand.c      |   4 +-
 2 files changed, 62 insertions(+), 70 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index ab6f599ce2fd..5b0579abb398 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -25,8 +25,6 @@
 
 #include <asm/processor.h>
 #include <asm/cpufeature.h>
-#include <asm/alternative.h>
-#include <asm/nops.h>
 
 #define RDRAND_RETRY_LOOPS	10
 
@@ -40,97 +38,91 @@
 # define RDSEED_LONG	RDSEED_INT
 #endif
 
-#ifdef CONFIG_ARCH_RANDOM
+/* Unconditional execution of RDRAND and RDSEED */
 
-/* Instead of arch_get_random_long() when alternatives haven't run. */
 static inline bool rdrand_long(unsigned long *v)
 {
-	int ok;
-	asm volatile("1: " RDRAND_LONG "\n\t"
-		     "jc 2f\n\t"
-		     "decl %0\n\t"
-		     "jnz 1b\n\t"
-		     "2:"
-		     : "=r" (ok), "=a" (*v)
-		     : "0" (RDRAND_RETRY_LOOPS));
-	return !!ok;
+	bool ok;
+	unsigned int retry = RDRAND_RETRY_LOOPS;
+	do {
+		asm volatile(RDRAND_LONG "\n\t"
+			     CC_SET(c)
+			     : CC_OUT(c) (ok), "=a" (*v));
+		if (ok)
+			return true;
+	} while (--retry);
+	return false;
+}
+
+static inline bool rdrand_int(unsigned int *v)
+{
+	bool ok;
+	unsigned int retry = RDRAND_RETRY_LOOPS;
+	do {
+		asm volatile(RDRAND_INT "\n\t"
+			     CC_SET(c)
+			     : CC_OUT(c) (ok), "=a" (*v));
+		if (ok)
+			return true;
+	} while (--retry);
+	return false;
 }
 
-/* A single attempt at RDSEED */
 static inline bool rdseed_long(unsigned long *v)
 {
 	bool ok;
 	asm volatile(RDSEED_LONG "\n\t"
-		     "setc %0"
-		     : "=qm" (ok), "=a" (*v));
+		     CC_SET(c)
+		     : CC_OUT(c) (ok), "=a" (*v));
 	return ok;
 }
 
-#define GET_RANDOM(name, type, rdrand, nop)			\
-static inline bool name(type *v)				\
-{								\
-	int ok;							\
-	alternative_io("movl $0, %0\n\t"			\
-		       nop,					\
-		       "\n1: " rdrand "\n\t"			\
-		       "jc 2f\n\t"				\
-		       "decl %0\n\t"                            \
-		       "jnz 1b\n\t"                             \
-		       "2:",                                    \
-		       X86_FEATURE_RDRAND,                      \
-		       ASM_OUTPUT2("=r" (ok), "=a" (*v)),       \
-		       "0" (RDRAND_RETRY_LOOPS));		\
-	return !!ok;						\
-}
-
-#define GET_SEED(name, type, rdseed, nop)			\
-static inline bool name(type *v)				\
-{								\
-	bool ok;						\
-	alternative_io("movb $0, %0\n\t"			\
-		       nop,					\
-		       rdseed "\n\t"				\
-		       "setc %0",				\
-		       X86_FEATURE_RDSEED,                      \
-		       ASM_OUTPUT2("=q" (ok), "=a" (*v)));	\
-	return ok;						\
+static inline bool rdseed_int(unsigned int *v)
+{
+	bool ok;
+	asm volatile(RDSEED_INT "\n\t"
+		     CC_SET(c)
+		     : CC_OUT(c) (ok), "=a" (*v));
+	return ok;
 }
 
-#ifdef CONFIG_X86_64
-
-GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5);
-GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4);
-
-GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP5);
-GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4);
-
-#else
-
-GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3);
-GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3);
-
-GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP4);
-GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4);
-
-#endif /* CONFIG_X86_64 */
-
+/* Conditional execution based on CPU type */
 #define arch_has_random()	static_cpu_has(X86_FEATURE_RDRAND)
 #define arch_has_random_seed()	static_cpu_has(X86_FEATURE_RDSEED)
 
-#else
+/*
+ * These are the generic interfaces; they must not be declared if the
+ * stubs in <linux/random.h> are to be invoked,
+ * i.e. CONFIG_ARCH_RANDOM is not defined.
+ */
+#ifdef CONFIG_ARCH_RANDOM
 
-static inline bool rdrand_long(unsigned long *v)
+static inline bool arch_get_random_long(unsigned long *v)
 {
-	return 0;
+	return arch_has_random() ? rdrand_long(v) : false;
 }
 
-static inline bool rdseed_long(unsigned long *v)
+static inline bool arch_get_random_int(unsigned int *v)
 {
-	return 0;
+	return arch_has_random() ? rdrand_int(v) : false;
 }
 
-#endif  /* CONFIG_ARCH_RANDOM */
+static inline bool arch_get_random_seed_long(unsigned long *v)
+{
+	return arch_has_random_seed() ? rdseed_long(v) : false;
+}
+
+static inline bool arch_get_random_seed_int(unsigned int *v)
+{
+	return arch_has_random_seed() ? rdseed_int(v) : false;
+}
 
 extern void x86_init_rdrand(struct cpuinfo_x86 *c);
 
+#else  /* !CONFIG_ARCH_RANDOM */
+
+static inline void x86_init_rdrand(struct cpuinfo_x86 *c) { }
+
+#endif  /* !CONFIG_ARCH_RANDOM */
+
 #endif /* ASM_X86_ARCHRANDOM_H */
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index f6f50c4ceaec..cfa97ff67bda 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -39,9 +39,9 @@ __setup("nordrand", x86_rdrand_setup);
  */
 #define SANITY_CHECK_LOOPS 8
 
+#ifdef CONFIG_ARCH_RANDOM
 void x86_init_rdrand(struct cpuinfo_x86 *c)
 {
-#ifdef CONFIG_ARCH_RANDOM
 	unsigned long tmp;
 	int i;
 
@@ -55,5 +55,5 @@ void x86_init_rdrand(struct cpuinfo_x86 *c)
 			return;
 		}
 	}
-#endif
 }
+#endif
-- 
cgit v1.2.3


From 0145071b33142cbccffb51486b1ce921677b1d2d Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 2 Jun 2016 14:20:18 +0200
Subject: x86: Do away with ARCH_[WANT_OPTIONAL|REQUIRE]_GPIOLIB
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This replaces:

- "select ARCH_REQUIRE_GPIOLIB" with "select GPIOLIB" as this can
  now be selected directly.

- "select ARCH_WANT_OPTIONAL_GPIOLIB" with no dependency: GPIOLIB
  is now selectable by everyone, so we need not declare our
  intent to select it.

When ordering the symbols the following rationale was used:
if the selects were in alphabetical order, I moved select GPIOLIB
to be in alphabetical order, but if the selects were not
maintained in alphabetical order, I just replaced
"select ARCH_REQUIRE_GPIOLIB" with "select GPIOLIB".

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Michael Büsch <m@bues.ch>
Link: http://lkml.kernel.org/r/1464870018-8281-1-git-send-email-linus.walleij@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0a7b885964ba..607382b95372 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -49,7 +49,6 @@ config X86
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_IPC_PARSE_VERSION	if X86_32
-	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select BUILDTIME_EXTABLE_SORT
 	select CLKEVT_I8253
 	select CLKSRC_I8253			if X86_32
@@ -643,7 +642,7 @@ config STA2X11
 	select X86_DMA_REMAP
 	select SWIOTLB
 	select MFD_STA2X11
-	select ARCH_REQUIRE_GPIOLIB
+	select GPIOLIB
 	default n
 	---help---
 	  This adds support for boards based on the STA2X11 IO-Hub,
-- 
cgit v1.2.3


From 7faf90ef995ea470f32f43810266ece8ac8ba6c7 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Thu, 3 Mar 2016 13:01:40 +0100
Subject: lguest: Read length of device_cap later

Read the length of the capability with type VIRTIO_PCI_CAP_DEVICE_CFG
only when we're sure we're going to need it. Which is just before the
check whether the virtio console actually has an emerg_wr field.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: lguest@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1457006501-5377-2-git-send-email-pebolle@tiscali.nl
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/lguest/boot.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 3847e736702e..146d11c8cf78 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1258,7 +1258,7 @@ static void probe_pci_console(void)
 		u8 vndr = read_pci_config_byte(0, 1, 0, cap);
 		if (vndr == PCI_CAP_ID_VNDR) {
 			u8 type, bar;
-			u32 offset, length;
+			u32 offset;
 
 			type = read_pci_config_byte(0, 1, 0,
 			    cap + offsetof(struct virtio_pci_cap, cfg_type));
@@ -1266,15 +1266,12 @@ static void probe_pci_console(void)
 			    cap + offsetof(struct virtio_pci_cap, bar));
 			offset = read_pci_config(0, 1, 0,
 			    cap + offsetof(struct virtio_pci_cap, offset));
-			length = read_pci_config(0, 1, 0,
-			    cap + offsetof(struct virtio_pci_cap, length));
 
 			switch (type) {
 			case VIRTIO_PCI_CAP_DEVICE_CFG:
 				if (bar == 0) {
 					device_cap = cap;
 					device_offset = offset;
-					device_len = length;
 				}
 				break;
 			case VIRTIO_PCI_CAP_PCI_CFG:
@@ -1297,6 +1294,8 @@ static void probe_pci_console(void)
 	 * emerg_wr.  If it doesn't support VIRTIO_CONSOLE_F_EMERG_WRITE
 	 * it should ignore the access.
 	 */
+	device_len = read_pci_config(0, 1, 0,
+			device_cap + offsetof(struct virtio_pci_cap, length));
 	if (device_len < (offsetof(struct virtio_console_config, emerg_wr)
 			  + sizeof(u32))) {
 		printk(KERN_ERR "lguest: console missing emerg_wr field\n");
-- 
cgit v1.2.3


From cf2cf0f50c14e86e04cda2c684357eed77922666 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Thu, 3 Mar 2016 13:01:41 +0100
Subject: lguest: Read offset of device_cap later

Read the offset of the capability with type VIRTIO_PCI_CAP_DEVICE_CFG
only when we're sure we're going to need it. Which is when all checks
have passed and we know we have a virtio console with an emerg_wr field.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: lguest@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1457006501-5377-3-git-send-email-pebolle@tiscali.nl
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/lguest/boot.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 146d11c8cf78..25da5bc8d83d 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1233,8 +1233,6 @@ static void write_bar_via_cfg(u32 cfg_offset, u32 off, u32 val)
 static void probe_pci_console(void)
 {
 	u8 cap, common_cap = 0, device_cap = 0;
-	/* Offset within BAR0 */
-	u32 device_offset;
 	u32 device_len;
 
 	/* Avoid recursive printk into here. */
@@ -1258,21 +1256,16 @@ static void probe_pci_console(void)
 		u8 vndr = read_pci_config_byte(0, 1, 0, cap);
 		if (vndr == PCI_CAP_ID_VNDR) {
 			u8 type, bar;
-			u32 offset;
 
 			type = read_pci_config_byte(0, 1, 0,
 			    cap + offsetof(struct virtio_pci_cap, cfg_type));
 			bar = read_pci_config_byte(0, 1, 0,
 			    cap + offsetof(struct virtio_pci_cap, bar));
-			offset = read_pci_config(0, 1, 0,
-			    cap + offsetof(struct virtio_pci_cap, offset));
 
 			switch (type) {
 			case VIRTIO_PCI_CAP_DEVICE_CFG:
-				if (bar == 0) {
+				if (bar == 0)
 					device_cap = cap;
-					device_offset = offset;
-				}
 				break;
 			case VIRTIO_PCI_CAP_PCI_CFG:
 				console_access_cap = cap;
@@ -1302,7 +1295,8 @@ static void probe_pci_console(void)
 		return;
 	}
 
-	console_cfg_offset = device_offset;
+	console_cfg_offset = read_pci_config(0, 1, 0,
+			device_cap + offsetof(struct virtio_pci_cap, offset));
 	printk(KERN_INFO "lguest: Console via virtio-pci emerg_wr\n");
 }
 
-- 
cgit v1.2.3


From 4855531eb8582a98cb905d2baf86021254d7a675 Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.y.wang@intel.com>
Date: Wed, 8 Jun 2016 14:59:53 +0800
Subject: x86/ioapic: Simplify ioapic_setup_resources()

Optimize the function by removing the variable 'num'.

Signed-off-by: Rui Wang <rui.y.wang@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: tony.luck@intel.com
Cc: linux-pci@vger.kernel.org
Cc: rjw@rjwysocki.net
Cc: linux-acpi@vger.kernel.org
Cc: bhelgaas@google.com
Link: http://lkml.kernel.org/r/1465369193-4816-4-git-send-email-rui.y.wang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/io_apic.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 446702ed99dc..e58729597a7a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2567,29 +2567,25 @@ static struct resource * __init ioapic_setup_resources(void)
 	unsigned long n;
 	struct resource *res;
 	char *mem;
-	int i, num = 0;
+	int i;
 
-	for_each_ioapic(i)
-		num++;
-	if (num == 0)
+	if (nr_ioapics == 0)
 		return NULL;
 
 	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
-	n *= num;
+	n *= nr_ioapics;
 
 	mem = alloc_bootmem(n);
 	res = (void *)mem;
 
-	mem += sizeof(struct resource) * num;
+	mem += sizeof(struct resource) * nr_ioapics;
 
-	num = 0;
 	for_each_ioapic(i) {
-		res[num].name = mem;
-		res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		res[i].name = mem;
+		res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 		snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
 		mem += IOAPIC_RESOURCE_NAME_SIZE;
-		ioapics[i].iomem_res = &res[num];
-		num++;
+		ioapics[i].iomem_res = &res[i];
 	}
 
 	ioapic_resources = res;
-- 
cgit v1.2.3


From 3c8fad9183ab7b3b3471fd2bb3d604104dd447cb Mon Sep 17 00:00:00 2001
From: Claudio Fontana <claudio.fontana@huawei.com>
Date: Thu, 9 Jun 2016 12:31:58 +0200
Subject: x86/apic: Fix misspelled APIC

Signed-off-by: Claudio Fontana <claudio.fontana@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: trivial@kernel.org
Link: http://lkml.kernel.org/r/1465468318-19867-1-git-send-email-hw.claudio@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/apic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 60078a67d7e3..f943d2f453a4 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2045,7 +2045,7 @@ int generic_processor_info(int apicid, int version)
 		int thiscpu = max + disabled_cpus - 1;
 
 		pr_warning(
-			"ACPI: NR_CPUS/possible_cpus limit of %i almost"
+			"APIC: NR_CPUS/possible_cpus limit of %i almost"
 			" reached. Keeping one slot for boot cpu."
 			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
 
@@ -2057,7 +2057,7 @@ int generic_processor_info(int apicid, int version)
 		int thiscpu = max + disabled_cpus;
 
 		pr_warning(
-			"ACPI: NR_CPUS/possible_cpus limit of %i reached."
+			"APIC: NR_CPUS/possible_cpus limit of %i reached."
 			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
 
 		disabled_cpus++;
@@ -2085,7 +2085,7 @@ int generic_processor_info(int apicid, int version)
 	if (topology_update_package_map(apicid, cpu) < 0) {
 		int thiscpu = max + disabled_cpus;
 
-		pr_warning("ACPI: Package limit reached. Processor %d/0x%x ignored.\n",
+		pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n",
 			   thiscpu, apicid);
 		disabled_cpus++;
 		return -ENOSPC;
-- 
cgit v1.2.3


From 99158f10e91768d34c5004c40c42f802b719bcae Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Tue, 24 May 2016 15:48:38 -0700
Subject: x86/xen: Simplify set_aliased_prot()

A year ago, via the following commit:

  aa1acff356bb ("x86/xen: Probe target addresses in set_aliased_prot() before the hypercall")

I added an explicit probe to work around a hypercall issue.  The code can
be simplified by using probe_kernel_read().

No change in functionality.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: David Vrabel <david.vrabel@citrix.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Vrabel <dvrabel@cantab.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: xen-devel <xen-devel@lists.xen.org>
Link: http://lkml.kernel.org/r/0706f1a2538e481194514197298cca6b5e3f2638.1464129798.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/xen/enlighten.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 760789ae8562..0f87db2cc6a8 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -521,9 +521,7 @@ static void set_aliased_prot(void *v, pgprot_t prot)
 
 	preempt_disable();
 
-	pagefault_disable();	/* Avoid warnings due to being atomic. */
-	__get_user(dummy, (unsigned char __user __force *)v);
-	pagefault_enable();
+	probe_kernel_read(&dummy, v, 1);
 
 	if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
 		BUG();
-- 
cgit v1.2.3


From b2de43605410d1970dc9e0f349e399f1d561be13 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 27 May 2016 14:11:06 -0700
Subject: x86/mce: Do not use bank 1 for APEI generated error logs

BIOS can report a memory error to Linux using ACPI/APEI mechanism. When
it does this, we create a fictitious machine check error record and
feed it into the standard mce_log() function. The error record needs a
machine check bank number, and for some reason we chose "1" for this.

But "1" is a valid bank number, and this causes confusion and heartburn
among h/w folks who are concerned that a memory error signature was
somehow logged in bank 1.

Change to use "-1" (field is a "u8" so will typically print as 255).
This should make it clearer that this error did not originate in a
machine check bank.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Aristeu Rozanski <arozansk@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/b7fffb2b326bc1dd150ffceb9919a803f9496e0e.1464805958.git.tony.luck@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/mcheck/mce-apei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index 34c89a3e8260..83f1a98d37db 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -46,7 +46,7 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
 		return;
 
 	mce_setup(&m);
-	m.bank = 1;
+	m.bank = -1;
 	/* Fake a memory read error with unknown channel */
 	m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
 
-- 
cgit v1.2.3


From 2348140d58f4f4245e9635ea8f1a77e940a4d877 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Mon, 13 Jun 2016 18:32:44 +0800
Subject: KVM: Fix steal clock warp during guest CPU hotplug
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sometimes, after CPU hotplug you can observe a spike in stolen time
(100%) followed by the CPU being marked as 100% idle when it's actually
busy with a CPU hog task.  The trace looks like the following:

 cpuhp/1-12    [001] d.h1   167.461657: account_process_tick: steal = 1291385514, prev_steal_time = 0
 cpuhp/1-12    [001] d.h1   167.461659: account_process_tick: steal_jiffies = 1291
  <idle>-0     [001] d.h1   167.462663: account_process_tick: steal = 18732255, prev_steal_time = 1291000000
  <idle>-0     [001] d.h1   167.462664: account_process_tick: steal_jiffies = 18446744072437

The sudden decrease of "steal" causes steal_jiffies to underflow.
The root cause is kvm_steal_time being reset to 0 after hot-plugging
back in a CPU.  Instead, the preexisting value can be used, which is
what the core scheduler code expects.

John Stultz also reported a similar issue after guest S3.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1465813966-3116-2-git-send-email-wanpeng.li@hotmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/kvm.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index eea2a6f72b31..1ef5e48b3a36 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -301,8 +301,6 @@ static void kvm_register_steal_time(void)
 	if (!has_steal_clock)
 		return;
 
-	memset(st, 0, sizeof(*st));
-
 	wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
 	pr_info("kvm-stealtime: cpu %d, msr %llx\n",
 		cpu, (unsigned long long) slow_virt_to_phys(st));
-- 
cgit v1.2.3


From 281ee056e3f27d925350d65e5eb504b1320d7d5a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 31 May 2016 16:25:27 -0500
Subject: perf/x86/intel/uncore: Remove redundant pci_get_drvdata()

Remove redundant pci_get_drvdata() call.  There's another call a few lines
down, just before we test "box" for NULL.

No functional change intended.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/20160531212527.28718.92371.stgit@bhelgaas-glaptop2.roam.corp.google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/uncore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 4e70d2721249..dc965d2cf076 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -974,7 +974,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
 
 static void uncore_pci_remove(struct pci_dev *pdev)
 {
-	struct intel_uncore_box *box = pci_get_drvdata(pdev);
+	struct intel_uncore_box *box;
 	struct intel_uncore_pmu *pmu;
 	int i, phys_id, pkg;
 
-- 
cgit v1.2.3


From 2c95afc1e83d93fac3be6923465e1753c2c53b0a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 9 Jun 2016 06:14:38 -0700
Subject: perf/x86/intel, watchdog: Switch NMI watchdog to ref cycles on x86

The NMI watchdog uses either the fixed cycles or a generic cycles
counter. This causes a lot of conflicts with users of the PMU who want
to run a full group including the cycles fixed counter, for example
the --topdown support recently added to perf stat. The code needs to
fall back to not use groups, which can cause measurement inaccuracy
due to multiplexing errors.

This patch switches the NMI watchdog to use reference cycles
on Intel systems.  This is actually more accurate than cycles,
because cycles can tick faster than the measured CPU Frequency
due to Turbo mode.

The ref cycles always tick at their frequency, or slower when
the system is idling. That means the NMI watchdog can never
expire too early, unlike with cycles.

The reference cycles tick roughly at the frequency of the TSC,
so the same period computation can be used.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: http://lkml.kernel.org/r/1465478079-19993-1-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/hw_nmi.c | 8 ++++++++
 include/linux/nmi.h           | 1 +
 kernel/watchdog.c             | 7 +++++++
 3 files changed, 16 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 7788ce643bf4..016f4263fad4 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -18,8 +18,16 @@
 #include <linux/nmi.h>
 #include <linux/module.h>
 #include <linux/delay.h>
+#include <linux/perf_event.h>
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
+int hw_nmi_get_event(void)
+{
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		return PERF_COUNT_HW_REF_CPU_CYCLES;
+	return PERF_COUNT_HW_CPU_CYCLES;
+}
+
 u64 hw_nmi_get_sample_period(int watchdog_thresh)
 {
 	return (u64)(cpu_khz) * 1000 * watchdog_thresh;
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 4630eeae18e0..79858af27209 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -66,6 +66,7 @@ static inline bool trigger_allbutself_cpu_backtrace(void)
 
 #ifdef CONFIG_LOCKUP_DETECTOR
 u64 hw_nmi_get_sample_period(int watchdog_thresh);
+int hw_nmi_get_event(void);
 extern int nmi_watchdog_enabled;
 extern int soft_watchdog_enabled;
 extern int watchdog_user_enabled;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 9acb29f280ec..8dd30fcd91be 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -315,6 +315,12 @@ static int is_softlockup(unsigned long touch_ts)
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 
+/* Can be overriden by architecture */
+__weak int hw_nmi_get_event(void)
+{
+	return PERF_COUNT_HW_CPU_CYCLES;
+}
+
 static struct perf_event_attr wd_hw_attr = {
 	.type		= PERF_TYPE_HARDWARE,
 	.config		= PERF_COUNT_HW_CPU_CYCLES,
@@ -604,6 +610,7 @@ static int watchdog_nmi_enable(unsigned int cpu)
 
 	wd_attr = &wd_hw_attr;
 	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
+	wd_attr->config = hw_nmi_get_event();
 
 	/* Try to register using hardware perf events */
 	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
-- 
cgit v1.2.3


From b464d1270a8016edcf1fd20d77cefdecf9b0b73e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 30 May 2016 14:32:04 +0200
Subject: locking/barriers, tile: Provide TILE specific
 smp_acquire__after_ctrl_dep()

Since TILE doesn't do read speculation, its control dependencies also
guarantee LOAD->LOAD order and we don't need the additional RMB
otherwise required to provide ACQUIRE semantics.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/tile/include/asm/barrier.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h
index d55222806c2f..4c419ab95ab7 100644
--- a/arch/tile/include/asm/barrier.h
+++ b/arch/tile/include/asm/barrier.h
@@ -87,6 +87,13 @@ mb_incoherent(void)
 #define __smp_mb__after_atomic()	__smp_mb()
 #endif
 
+/*
+ * The TILE architecture does not do speculative reads; this ensures
+ * that a control dependency also orders against loads and already provides
+ * a LOAD->{LOAD,STORE} order and can forgo the additional RMB.
+ */
+#define smp_acquire__after_ctrl_dep()	barrier()
+
 #include <asm-generic/barrier.h>
 
 #endif /* !__ASSEMBLY__ */
-- 
cgit v1.2.3


From 726328d92a42b6d4b76078e2659f43067f82c4e8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 26 May 2016 10:35:03 +0200
Subject: locking/spinlock, arch: Update and fix spin_unlock_wait()
 implementations

This patch updates/fixes all spin_unlock_wait() implementations.

The update is in semantics; where it previously was only a control
dependency, we now upgrade to a full load-acquire to match the
store-release from the spin_unlock() we waited on. This ensures that
when spin_unlock_wait() returns, we're guaranteed to observe the full
critical section we waited on.

This fixes a number of spin_unlock_wait() users that (not
unreasonably) rely on this.

I also fixed a number of ticket lock versions to only wait on the
current lock holder, instead of for a full unlock, as this is
sufficient.

Furthermore; again for ticket locks; I added an smp_rmb() in between
the initial ticket load and the spin loop testing the current value
because I could not convince myself the address dependency is
sufficient, esp. if the loads are of different sizes.

I'm more than happy to remove this smp_rmb() again if people are
certain the address dependency does indeed work as expected.

Note: PPC32 will be fixed independently

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: chris@zankel.net
Cc: cmetcalf@mellanox.com
Cc: davem@davemloft.net
Cc: dhowells@redhat.com
Cc: james.hogan@imgtec.com
Cc: jejb@parisc-linux.org
Cc: linux@armlinux.org.uk
Cc: mpe@ellerman.id.au
Cc: ralf@linux-mips.org
Cc: realmz6@gmail.com
Cc: rkuo@codeaurora.org
Cc: rth@twiddle.net
Cc: schwidefsky@de.ibm.com
Cc: tony.luck@intel.com
Cc: vgupta@synopsys.com
Cc: ysato@users.sourceforge.jp
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/spinlock.h    |  9 +++++++--
 arch/arc/include/asm/spinlock.h      |  7 +++++--
 arch/arm/include/asm/spinlock.h      | 19 +++++++++++++++++--
 arch/blackfin/include/asm/spinlock.h |  5 +++--
 arch/hexagon/include/asm/spinlock.h  | 10 ++++++++--
 arch/ia64/include/asm/spinlock.h     |  4 ++++
 arch/m32r/include/asm/spinlock.h     |  9 +++++++--
 arch/metag/include/asm/spinlock.h    | 14 ++++++++++++--
 arch/mips/include/asm/spinlock.h     | 19 +++++++++++++++++--
 arch/mn10300/include/asm/spinlock.h  |  8 +++++++-
 arch/parisc/include/asm/spinlock.h   |  9 +++++++--
 arch/s390/include/asm/spinlock.h     |  3 +++
 arch/sh/include/asm/spinlock.h       | 10 ++++++++--
 arch/sparc/include/asm/spinlock_32.h |  7 +++++--
 arch/sparc/include/asm/spinlock_64.h | 10 +++++++---
 arch/tile/lib/spinlock_32.c          |  6 ++++++
 arch/tile/lib/spinlock_64.c          |  6 ++++++
 arch/xtensa/include/asm/spinlock.h   | 10 ++++++++--
 include/asm-generic/barrier.h        |  2 +-
 include/linux/spinlock_up.h          | 10 +++++++---
 20 files changed, 145 insertions(+), 32 deletions(-)

(limited to 'arch')

diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index fed9c6f44c19..a40b9fc0c6c3 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -3,6 +3,8 @@
 
 #include <linux/kernel.h>
 #include <asm/current.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
@@ -13,8 +15,11 @@
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 #define arch_spin_is_locked(x)	((x)->lock != 0)
-#define arch_spin_unlock_wait(x) \
-		do { cpu_relax(); } while ((x)->lock)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, !VAL);
+}
 
 static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index cded4a9b5438..233d5ffe6ec7 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -15,8 +15,11 @@
 
 #define arch_spin_is_locked(x)	((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__)
 #define arch_spin_lock_flags(lock, flags)	arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-	do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->slock, !VAL);
+}
 
 #ifdef CONFIG_ARC_HAS_LLSC
 
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 0fa418463f49..4bec45442072 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -6,6 +6,8 @@
 #endif
 
 #include <linux/prefetch.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 /*
  * sev and wfe are ARMv6K extensions.  Uniprocessor ARMv6 may not have the K
@@ -50,8 +52,21 @@ static inline void dsb_sev(void)
  * memory.
  */
 
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	u16 owner = READ_ONCE(lock->tickets.owner);
+
+	for (;;) {
+		arch_spinlock_t tmp = READ_ONCE(*lock);
+
+		if (tmp.tickets.owner == tmp.tickets.next ||
+		    tmp.tickets.owner != owner)
+			break;
+
+		wfe();
+	}
+	smp_acquire__after_ctrl_dep();
+}
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h
index 490c7caa02d9..c58f4a83ed6f 100644
--- a/arch/blackfin/include/asm/spinlock.h
+++ b/arch/blackfin/include/asm/spinlock.h
@@ -12,6 +12,8 @@
 #else
 
 #include <linux/atomic.h>
+#include <asm/processor.h>
+#include <asm/barrier.h>
 
 asmlinkage int __raw_spin_is_locked_asm(volatile int *ptr);
 asmlinkage void __raw_spin_lock_asm(volatile int *ptr);
@@ -48,8 +50,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
 static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
-	while (arch_spin_is_locked(lock))
-		cpu_relax();
+	smp_cond_load_acquire(&lock->lock, !VAL);
 }
 
 static inline int arch_read_can_lock(arch_rwlock_t *rw)
diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h
index 12ca4ebc0338..a1c55788c5d6 100644
--- a/arch/hexagon/include/asm/spinlock.h
+++ b/arch/hexagon/include/asm/spinlock.h
@@ -23,6 +23,8 @@
 #define _ASM_SPINLOCK_H
 
 #include <asm/irqflags.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 /*
  * This file is pulled in for SMP builds.
@@ -176,8 +178,12 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
  * SMP spinlocks are intended to allow only a single CPU at the lock
  */
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(lock) \
-	do {while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, !VAL);
+}
+
 #define arch_spin_is_locked(x) ((x)->lock != 0)
 
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 45698cd15b7b..ca9e76149a4a 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -15,6 +15,8 @@
 
 #include <linux/atomic.h>
 #include <asm/intrinsics.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 #define arch_spin_lock_init(x)			((x)->lock = 0)
 
@@ -86,6 +88,8 @@ static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock)
 			return;
 		cpu_relax();
 	}
+
+	smp_acquire__after_ctrl_dep();
 }
 
 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h
index fa13694eaae3..323c7fc953cd 100644
--- a/arch/m32r/include/asm/spinlock.h
+++ b/arch/m32r/include/asm/spinlock.h
@@ -13,6 +13,8 @@
 #include <linux/atomic.h>
 #include <asm/dcache_clear.h>
 #include <asm/page.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
@@ -27,8 +29,11 @@
 
 #define arch_spin_is_locked(x)		(*(volatile int *)(&(x)->slock) <= 0)
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-		do { cpu_relax(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->slock, VAL > 0);
+}
 
 /**
  * arch_spin_trylock - Try spin lock and return a result
diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h
index 86a7cf3d1386..c0c7a22be1ae 100644
--- a/arch/metag/include/asm/spinlock.h
+++ b/arch/metag/include/asm/spinlock.h
@@ -1,14 +1,24 @@
 #ifndef __ASM_SPINLOCK_H
 #define __ASM_SPINLOCK_H
 
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
 #ifdef CONFIG_METAG_ATOMICITY_LOCK1
 #include <asm/spinlock_lock1.h>
 #else
 #include <asm/spinlock_lnkget.h>
 #endif
 
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+/*
+ * both lock1 and lnkget are test-and-set spinlocks with 0 unlocked and 1
+ * locked.
+ */
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, !VAL);
+}
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index 40196bebe849..f485afe51514 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -12,6 +12,7 @@
 #include <linux/compiler.h>
 
 #include <asm/barrier.h>
+#include <asm/processor.h>
 #include <asm/compiler.h>
 #include <asm/war.h>
 
@@ -48,8 +49,22 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 }
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-	while (arch_spin_is_locked(x)) { cpu_relax(); }
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	u16 owner = READ_ONCE(lock->h.serving_now);
+	smp_rmb();
+	for (;;) {
+		arch_spinlock_t tmp = READ_ONCE(*lock);
+
+		if (tmp.h.serving_now == tmp.h.ticket ||
+		    tmp.h.serving_now != owner)
+			break;
+
+		cpu_relax();
+	}
+	smp_acquire__after_ctrl_dep();
+}
 
 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
diff --git a/arch/mn10300/include/asm/spinlock.h b/arch/mn10300/include/asm/spinlock.h
index 1ae580f38933..9c7b8f7942d8 100644
--- a/arch/mn10300/include/asm/spinlock.h
+++ b/arch/mn10300/include/asm/spinlock.h
@@ -12,6 +12,8 @@
 #define _ASM_SPINLOCK_H
 
 #include <linux/atomic.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 #include <asm/rwlock.h>
 #include <asm/page.h>
 
@@ -23,7 +25,11 @@
  */
 
 #define arch_spin_is_locked(x)	(*(volatile signed char *)(&(x)->slock) != 0)
-#define arch_spin_unlock_wait(x) do { barrier(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->slock, !VAL);
+}
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 64f2992e439f..e32936cd7f10 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -13,8 +13,13 @@ static inline int arch_spin_is_locked(arch_spinlock_t *x)
 }
 
 #define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0)
-#define arch_spin_unlock_wait(x) \
-		do { cpu_relax(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *x)
+{
+	volatile unsigned int *a = __ldcw_align(x);
+
+	smp_cond_load_acquire(a, VAL);
+}
 
 static inline void arch_spin_lock_flags(arch_spinlock_t *x,
 					 unsigned long flags)
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 63ebf37d3143..7e9e09f600fa 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -10,6 +10,8 @@
 #define __ASM_SPINLOCK_H
 
 #include <linux/smp.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
 
 #define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval)
 
@@ -97,6 +99,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	while (arch_spin_is_locked(lock))
 		arch_spin_relax(lock);
+	smp_acquire__after_ctrl_dep();
 }
 
 /*
diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h
index bdc0f3b6c56a..416834b60ad0 100644
--- a/arch/sh/include/asm/spinlock.h
+++ b/arch/sh/include/asm/spinlock.h
@@ -19,14 +19,20 @@
 #error "Need movli.l/movco.l for spinlocks"
 #endif
 
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
  */
 
 #define arch_spin_is_locked(x)		((x)->lock <= 0)
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
-	do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, VAL > 0);
+}
 
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index bcc98fc35281..d9c5876c6121 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -9,12 +9,15 @@
 #ifndef __ASSEMBLY__
 
 #include <asm/psr.h>
+#include <asm/barrier.h>
 #include <asm/processor.h> /* for cpu_relax */
 
 #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
 
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, !VAL);
+}
 
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
index 968917694978..87990b7c6b0d 100644
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -8,6 +8,9 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/processor.h>
+#include <asm/barrier.h>
+
 /* To get debugging spinlocks which detect and catch
  * deadlock situations, set CONFIG_DEBUG_SPINLOCK
  * and rebuild your kernel.
@@ -23,9 +26,10 @@
 
 #define arch_spin_is_locked(lp)	((lp)->lock != 0)
 
-#define arch_spin_unlock_wait(lp)	\
-	do {	rmb();			\
-	} while((lp)->lock)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->lock, !VAL);
+}
 
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index 88c2a53362e7..076c6cc43113 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
 	do {
 		delay_backoff(iterations++);
 	} while (READ_ONCE(lock->current_ticket) == curr);
+
+	/*
+	 * The TILE architecture doesn't do read speculation; therefore
+	 * a control dependency guarantees a LOAD->{LOAD,STORE} order.
+	 */
+	barrier();
 }
 EXPORT_SYMBOL(arch_spin_unlock_wait);
 
diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c
index c8d1f94ff1fe..a4b5b2cbce93 100644
--- a/arch/tile/lib/spinlock_64.c
+++ b/arch/tile/lib/spinlock_64.c
@@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
 	do {
 		delay_backoff(iterations++);
 	} while (arch_spin_current(READ_ONCE(lock->lock)) == curr);
+
+	/*
+	 * The TILE architecture doesn't do read speculation; therefore
+	 * a control dependency guarantees a LOAD->{LOAD,STORE} order.
+	 */
+	barrier();
 }
 EXPORT_SYMBOL(arch_spin_unlock_wait);
 
diff --git a/arch/xtensa/include/asm/spinlock.h b/arch/xtensa/include/asm/spinlock.h
index 1d95fa5dcd10..a36221cf6363 100644
--- a/arch/xtensa/include/asm/spinlock.h
+++ b/arch/xtensa/include/asm/spinlock.h
@@ -11,6 +11,9 @@
 #ifndef _XTENSA_SPINLOCK_H
 #define _XTENSA_SPINLOCK_H
 
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
 /*
  * spinlock
  *
@@ -29,8 +32,11 @@
  */
 
 #define arch_spin_is_locked(x) ((x)->slock != 0)
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->slock, !VAL);
+}
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index ab7b0bd7d4dd..fe297b599b0a 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -194,7 +194,7 @@ do {									\
 })
 #endif
 
-#endif
+#endif	/* CONFIG_SMP */
 
 /* Barriers for virtual machine guests when talking to an SMP host */
 #define virt_mb() __smp_mb()
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 8b3ac0d718eb..0d9848de677d 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -6,6 +6,7 @@
 #endif
 
 #include <asm/processor.h>	/* for cpu_relax() */
+#include <asm/barrier.h>
 
 /*
  * include/linux/spinlock_up.h - UP-debug version of spinlocks.
@@ -25,6 +26,11 @@
 #ifdef CONFIG_DEBUG_SPINLOCK
 #define arch_spin_is_locked(x)		((x)->slock == 0)
 
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	smp_cond_load_acquire(&lock->slock, VAL);
+}
+
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	lock->slock = 0;
@@ -67,6 +73,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 
 #else /* DEBUG_SPINLOCK */
 #define arch_spin_is_locked(lock)	((void)(lock), 0)
+#define arch_spin_unlock_wait(lock)	do { barrier(); (void)(lock); } while (0)
 /* for sched/core.c and kernel_lock.c: */
 # define arch_spin_lock(lock)		do { barrier(); (void)(lock); } while (0)
 # define arch_spin_lock_flags(lock, flags)	do { barrier(); (void)(lock); } while (0)
@@ -79,7 +86,4 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 #define arch_read_can_lock(lock)	(((void)(lock), 1))
 #define arch_write_can_lock(lock)	(((void)(lock), 1))
 
-#define arch_spin_unlock_wait(lock) \
-		do { cpu_relax(); } while (arch_spin_is_locked(lock))
-
 #endif /* __LINUX_SPINLOCK_UP_H */
-- 
cgit v1.2.3


From f0702555b16d31d61dc758fac6efb994c3fe3ec6 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 9 Jun 2016 13:57:04 -0700
Subject: x86/vdso/32: Assemble sigreturn.S separately

sigreturn.S was historically included by the various
__kernel_vsyscall implementations due to assumptions about all the
32-bit vDSO images having the sigreturn symbols at the same address.

Those assumptions were removed in v3.16, and as of v4.4, there is only
a single 32-bit vDSO left.

Simplify the build process by assembling sigreturn.S into a normal
object file.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/d7b6dfde3c7397aa26977320da90448363b5a7e9.1465505753.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/vdso/Makefile             | 5 +++--
 arch/x86/entry/vdso/vdso32/sigreturn.S   | 8 --------
 arch/x86/entry/vdso/vdso32/system_call.S | 7 +------
 3 files changed, 4 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 253b72eaade6..68b63fddc209 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -134,7 +134,7 @@ VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
 override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
 
 targets += vdso32/vdso32.lds
-targets += vdso32/note.o vdso32/vclock_gettime.o vdso32/system_call.o
+targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
 targets += vdso32/vclock_gettime.o
 
 KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO
@@ -156,7 +156,8 @@ $(obj)/vdso32.so.dbg: FORCE \
 		      $(obj)/vdso32/vdso32.lds \
 		      $(obj)/vdso32/vclock_gettime.o \
 		      $(obj)/vdso32/note.o \
-		      $(obj)/vdso32/system_call.o
+		      $(obj)/vdso32/system_call.o \
+		      $(obj)/vdso32/sigreturn.o
 	$(call if_changed,vdso)
 
 #
diff --git a/arch/x86/entry/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S
index d7ec4e251c0a..20633e026e82 100644
--- a/arch/x86/entry/vdso/vdso32/sigreturn.S
+++ b/arch/x86/entry/vdso/vdso32/sigreturn.S
@@ -1,11 +1,3 @@
-/*
- * Common code for the sigreturn entry points in vDSO images.
- * So far this code is the same for both int80 and sysenter versions.
- * This file is #include'd by int80.S et al to define them first thing.
- * The kernel assumes that the addresses of these routines are constant
- * for all vDSO implementations.
- */
-
 #include <linux/linkage.h>
 #include <asm/unistd_32.h>
 #include <asm/asm-offsets.h>
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 0109ac6cb79c..ed4bc9731cbb 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -2,16 +2,11 @@
  * AT_SYSINFO entry point
 */
 
+#include <linux/linkage.h>
 #include <asm/dwarf2.h>
 #include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
 
-/*
- * First get the common code for the sigreturn entry points.
- * This must come first.
- */
-#include "sigreturn.S"
-
 	.text
 	.globl __kernel_vsyscall
 	.type __kernel_vsyscall,@function
-- 
cgit v1.2.3


From a4455082dc6f0b5d51a23523f77600e8ede47c79 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Wed, 8 Jun 2016 10:25:33 -0700
Subject: x86/signals: Add missing signal_compat code for x86 features

The 32-bit siginfo is a different binary format than the 64-bit
one.  So, when running 32-bit binaries on 64-bit kernels, we have
to convert the kernel's 64-bit version to a 32-bit version that
userspace can grok.

We've added a few features to siginfo over the past few years and
neglected to add them to arch/x86/kernel/signal_compat.c:

   1. The si_addr_lsb used in SIGBUS's sent for machine checks
   2. The upper/lower bounds for MPX SIGSEGV faults
   3. The protection key for pkey faults

I caught this with some protection keys unit tests and realized
it affected a few more features.

This was tested only with my protection keys patch that looks
for a proper value in si_pkey.  I didn't actually test the machine
check or MPX code.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac@vger.kernel.org
Link: http://lkml.kernel.org/r/20160608172533.F8F05637@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/compat.h   | 11 +++++++++++
 arch/x86/kernel/signal_compat.c | 15 +++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 5a3b2c119ed0..a18806165fe4 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -40,6 +40,7 @@ typedef s32		compat_long_t;
 typedef s64 __attribute__((aligned(4))) compat_s64;
 typedef u32		compat_uint_t;
 typedef u32		compat_ulong_t;
+typedef u32		compat_u32;
 typedef u64 __attribute__((aligned(4))) compat_u64;
 typedef u32		compat_uptr_t;
 
@@ -181,6 +182,16 @@ typedef struct compat_siginfo {
 		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
 		struct {
 			unsigned int _addr;	/* faulting insn/memory ref. */
+			short int _addr_lsb;	/* Valid LSB of the reported address. */
+			union {
+				/* used when si_code=SEGV_BNDERR */
+				struct {
+					compat_uptr_t _lower;
+					compat_uptr_t _upper;
+				} _addr_bnd;
+				/* used when si_code=SEGV_PKUERR */
+				compat_u32 _pkey;
+			};
 		} _sigfault;
 
 		/* SIGPOLL */
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index dc3c0b1c816f..5335ad96a290 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -32,6 +32,21 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 					  &to->_sifields._pad[0]);
 			switch (from->si_code >> 16) {
 			case __SI_FAULT >> 16:
+				if (from->si_signo == SIGBUS &&
+				    (from->si_code == BUS_MCEERR_AR ||
+				     from->si_code == BUS_MCEERR_AO))
+					put_user_ex(from->si_addr_lsb, &to->si_addr_lsb);
+
+				if (from->si_signo == SIGSEGV) {
+					if (from->si_code == SEGV_BNDERR) {
+						compat_uptr_t lower = (unsigned long)&to->si_lower;
+						compat_uptr_t upper = (unsigned long)&to->si_upper;
+						put_user_ex(lower, &to->si_lower);
+						put_user_ex(upper, &to->si_upper);
+					}
+					if (from->si_code == SEGV_PKUERR)
+						put_user_ex(from->si_pkey, &to->si_pkey);
+				}
 				break;
 			case __SI_SYS >> 16:
 				put_user_ex(from->si_syscall, &to->si_syscall);
-- 
cgit v1.2.3


From 02e8fda2cc00419a11cf38199afea4c0d7172be8 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Wed, 8 Jun 2016 10:25:34 -0700
Subject: x86/signals: Add build-time checks to the siginfo compat code

There were at least 3 features added to the __SI_FAULT area of the
siginfo struct that did not make it to the compat siginfo:

	1. The si_addr_lsb used in SIGBUS's sent for machine checks
	2. The upper/lower bounds for MPX SIGSEGV faults
	3. The protection key for pkey faults

There was also some turmoil when I was attempting to add the pkey
field because it needs to be a fixed size on 32 and 64-bit and
not have any alignment constraints.

This patch adds some compile-time checks to the compat code to
make it harder to screw this up.  Basically, the checks are
supposed to trip any time someone changes the siginfo structure.
That sounds bad, but it's what we want.  If someone changes
siginfo, we want them to also be _forced_ to go look at the
compat code.

The details are in the comments.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac@vger.kernel.org
Link: http://lkml.kernel.org/r/20160608172534.C73DAFC3@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/signal_compat.c | 93 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index 5335ad96a290..b44564bf86a8 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -1,11 +1,104 @@
 #include <linux/compat.h>
 #include <linux/uaccess.h>
 
+/*
+ * The compat_siginfo_t structure and handing code is very easy
+ * to break in several ways.  It must always be updated when new
+ * updates are made to the main siginfo_t, and
+ * copy_siginfo_to_user32() must be updated when the
+ * (arch-independent) copy_siginfo_to_user() is updated.
+ *
+ * It is also easy to put a new member in the compat_siginfo_t
+ * which has implicit alignment which can move internal structure
+ * alignment around breaking the ABI.  This can happen if you,
+ * for instance, put a plain 64-bit value in there.
+ */
+static inline void signal_compat_build_tests(void)
+{
+	int _sifields_offset = offsetof(compat_siginfo_t, _sifields);
+
+	/*
+	 * If adding a new si_code, there is probably new data in
+	 * the siginfo.  Make sure folks bumping the si_code
+	 * limits also have to look at this code.  Make sure any
+	 * new fields are handled in copy_siginfo_to_user32()!
+	 */
+	BUILD_BUG_ON(NSIGILL  != 8);
+	BUILD_BUG_ON(NSIGFPE  != 8);
+	BUILD_BUG_ON(NSIGSEGV != 4);
+	BUILD_BUG_ON(NSIGBUS  != 5);
+	BUILD_BUG_ON(NSIGTRAP != 4);
+	BUILD_BUG_ON(NSIGCHLD != 6);
+	BUILD_BUG_ON(NSIGSYS  != 1);
+
+	/* This is part of the ABI and can never change in size: */
+	BUILD_BUG_ON(sizeof(compat_siginfo_t) != 128);
+	/*
+	 * The offsets of all the (unioned) si_fields are fixed
+	 * in the ABI, of course.  Make sure none of them ever
+	 * move and are always at the beginning:
+	 */
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields) != 3 * sizeof(int));
+#define CHECK_CSI_OFFSET(name)	  BUILD_BUG_ON(_sifields_offset != offsetof(compat_siginfo_t, _sifields.name))
+
+	 /*
+	 * Ensure that the size of each si_field never changes.
+	 * If it does, it is a sign that the
+	 * copy_siginfo_to_user32() code below needs to updated
+	 * along with the size in the CHECK_SI_SIZE().
+	 *
+	 * We repeat this check for both the generic and compat
+	 * siginfos.
+	 *
+	 * Note: it is OK for these to grow as long as the whole
+	 * structure stays within the padding size (checked
+	 * above).
+	 */
+#define CHECK_CSI_SIZE(name, size) BUILD_BUG_ON(size != sizeof(((compat_siginfo_t *)0)->_sifields.name))
+#define CHECK_SI_SIZE(name, size) BUILD_BUG_ON(size != sizeof(((siginfo_t *)0)->_sifields.name))
+
+	CHECK_CSI_OFFSET(_kill);
+	CHECK_CSI_SIZE  (_kill, 2*sizeof(int));
+	CHECK_SI_SIZE   (_kill, 2*sizeof(int));
+
+	CHECK_CSI_OFFSET(_timer);
+	CHECK_CSI_SIZE  (_timer, 5*sizeof(int));
+	CHECK_SI_SIZE   (_timer, 6*sizeof(int));
+
+	CHECK_CSI_OFFSET(_rt);
+	CHECK_CSI_SIZE  (_rt, 3*sizeof(int));
+	CHECK_SI_SIZE   (_rt, 4*sizeof(int));
+
+	CHECK_CSI_OFFSET(_sigchld);
+	CHECK_CSI_SIZE  (_sigchld, 5*sizeof(int));
+	CHECK_SI_SIZE   (_sigchld, 8*sizeof(int));
+
+	CHECK_CSI_OFFSET(_sigchld_x32);
+	CHECK_CSI_SIZE  (_sigchld_x32, 7*sizeof(int));
+	/* no _sigchld_x32 in the generic siginfo_t */
+
+	CHECK_CSI_OFFSET(_sigfault);
+	CHECK_CSI_SIZE  (_sigfault, 4*sizeof(int));
+	CHECK_SI_SIZE   (_sigfault, 8*sizeof(int));
+
+	CHECK_CSI_OFFSET(_sigpoll);
+	CHECK_CSI_SIZE  (_sigpoll, 2*sizeof(int));
+	CHECK_SI_SIZE   (_sigpoll, 4*sizeof(int));
+
+	CHECK_CSI_OFFSET(_sigsys);
+	CHECK_CSI_SIZE  (_sigsys, 3*sizeof(int));
+	CHECK_SI_SIZE   (_sigsys, 4*sizeof(int));
+
+	/* any new si_fields should be added here */
+}
+
 int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 {
 	int err = 0;
 	bool ia32 = test_thread_flag(TIF_IA32);
 
+	signal_compat_build_tests();
+
 	if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
 		return -EFAULT;
 
-- 
cgit v1.2.3


From bb27570525a71f48347ed0e0c265063e7952bb61 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 13 Jun 2016 21:28:00 +0300
Subject: x86/platform/intel_mid_pci: Rework IRQ0 workaround

On Intel Merrifield platform several PCI devices have a bogus configuration,
i.e. the IRQ0 had been assigned to few of them. These are PCI root bridge,
eMMC0, HS UART common registers, PWM, and HDMI. The actual interrupt line can
be allocated to one device exclusively, in our case to eMMC0, the rest should
cope without it and basically known drivers for them are not using interrupt
line at all.

Rework IRQ0 workaround, which was previously done to avoid conflict between
eMMC0 and HS UART common registers, to behave differently based on the device
in question, i.e. allocate interrupt line to eMMC0, but silently skip interrupt
allocation for the rest except HS UART common registers which are not used
anyway. With this rework IOSF MBI driver in particular would be used.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: 39d9b77b8deb ("x86/pci/intel_mid_pci: Work around for IRQ0 assignment")
Link: http://lkml.kernel.org/r/1465842481-136852-1-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/pci/intel_mid_pci.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 8b93e634af84..ae97f24a4371 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -37,6 +37,7 @@
 
 /* Quirks for the listed devices */
 #define PCI_DEVICE_ID_INTEL_MRFL_MMC	0x1190
+#define PCI_DEVICE_ID_INTEL_MRFL_HSU	0x1191
 
 /* Fixed BAR fields */
 #define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00	/* Fixed BAR (TBD) */
@@ -224,14 +225,21 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 
 		/* Special treatment for IRQ0 */
 		if (dev->irq == 0) {
+			/*
+			 * Skip HS UART common registers device since it has
+			 * IRQ0 assigned and not used by the kernel.
+			 */
+			if (dev->device == PCI_DEVICE_ID_INTEL_MRFL_HSU)
+				return -EBUSY;
 			/*
 			 * TNG has IRQ0 assigned to eMMC controller. But there
 			 * are also other devices with bogus PCI configuration
 			 * that have IRQ0 assigned. This check ensures that
-			 * eMMC gets it.
+			 * eMMC gets it. The rest of devices still could be
+			 * enabled without interrupt line being allocated.
 			 */
 			if (dev->device != PCI_DEVICE_ID_INTEL_MRFL_MMC)
-				return -EBUSY;
+				return 0;
 		}
 		break;
 	default:
-- 
cgit v1.2.3


From 9485f8b6a75921e1b9e94b001cdb45872a598534 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 13 Jun 2016 21:28:01 +0300
Subject: x86/platform/atom/punit: Enable support for Merrifield

Intel Merrifield platform has Punit generation that somehow compatible to what
is already supported by punit_atom_debug driver.

Add necessary bits to enable that support.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1465842481-136852-2-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/atom/punit_atom_debug.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c
index 109782996867..8ff7b9355416 100644
--- a/arch/x86/platform/atom/punit_atom_debug.c
+++ b/arch/x86/platform/atom/punit_atom_debug.c
@@ -26,8 +26,6 @@
 #include <asm/intel-family.h>
 #include <asm/iosf_mbi.h>
 
-/* Power gate status reg */
-#define PWRGT_STATUS		0x61
 /* Subsystem config/status Video processor */
 #define VED_SS_PM0		0x32
 /* Subsystem config/status ISP (Image Signal Processor) */
@@ -36,12 +34,16 @@
 #define MIO_SS_PM		0x3B
 /* Shift bits for getting status for video, isp and i/o */
 #define SSS_SHIFT		24
+
+/* Power gate status reg */
+#define PWRGT_STATUS		0x61
 /* Shift bits for getting status for graphics rendering */
 #define RENDER_POS		0
 /* Shift bits for getting status for media control */
 #define MEDIA_POS		2
 /* Shift bits for getting status for Valley View/Baytrail display */
 #define VLV_DISPLAY_POS		6
+
 /* Subsystem config/status display for Cherry Trail SOC */
 #define CHT_DSP_SSS		0x36
 /* Shift bits for getting status for display */
@@ -53,6 +55,14 @@ struct punit_device {
 	int sss_pos;
 };
 
+static const struct punit_device punit_device_tng[] = {
+	{ "DISPLAY",	CHT_DSP_SSS,	SSS_SHIFT },
+	{ "VED",	VED_SS_PM0,	SSS_SHIFT },
+	{ "ISP",	ISP_SS_PM0,	SSS_SHIFT },
+	{ "MIO",	MIO_SS_PM,	SSS_SHIFT },
+	{ NULL }
+};
+
 static const struct punit_device punit_device_byt[] = {
 	{ "GFX RENDER",	PWRGT_STATUS,	RENDER_POS },
 	{ "GFX MEDIA",	PWRGT_STATUS,	MEDIA_POS },
@@ -145,6 +155,7 @@ static void punit_dbgfs_unregister(void)
 
 static const struct x86_cpu_id intel_punit_cpu_ids[] = {
 	ICPU(INTEL_FAM6_ATOM_SILVERMONT1, punit_device_byt),
+	ICPU(INTEL_FAM6_ATOM_MERRIFIELD1, punit_device_tng),
 	ICPU(INTEL_FAM6_ATOM_AIRMONT,	  punit_device_cht),
 	{}
 };
-- 
cgit v1.2.3


From 5823d0893ec284f37902e2ecd332dbb396a143d1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 14 Jun 2016 21:29:45 +0300
Subject: x86/platform/intel-mid: Add Power Management Unit driver

Add Power Management Unit driver to handle power states of South Complex
devices on Intel Tangier. In the future it might be expanded to cover North
Complex devices as well.

With this driver the power state of the host controllers such as SPI, I2C,
UART, eMMC, and DMA would be managed.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: David Cohen <david.a.cohen@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-pci@vger.kernel.org
Link: http://lkml.kernel.org/r/1465928985-12113-1-git-send-email-andriy.shevchenko@linux.intel.com
[ Minor readability edits. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/intel-mid.h     |   8 +
 arch/x86/pci/intel_mid_pci.c         |  40 +++-
 arch/x86/platform/intel-mid/Makefile |   2 +-
 arch/x86/platform/intel-mid/pwr.c    | 416 +++++++++++++++++++++++++++++++++++
 drivers/pci/Makefile                 |   3 +
 drivers/pci/pci-mid.c                |  77 +++++++
 6 files changed, 540 insertions(+), 6 deletions(-)
 create mode 100644 arch/x86/platform/intel-mid/pwr.c
 create mode 100644 drivers/pci/pci-mid.c

(limited to 'arch')

diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 7c5af123bdbd..38498a4fb44f 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -12,9 +12,17 @@
 #define _ASM_X86_INTEL_MID_H
 
 #include <linux/sfi.h>
+#include <linux/pci.h>
 #include <linux/platform_device.h>
 
 extern int intel_mid_pci_init(void);
+extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state);
+
+#define INTEL_MID_PWR_LSS_OFFSET	4
+#define INTEL_MID_PWR_LSS_TYPE		(1 << 7)
+
+extern int intel_mid_pwr_get_lss_id(struct pci_dev *pdev);
+
 extern int get_gpio_by_name(const char *name);
 extern void intel_scu_device_register(struct platform_device *pdev);
 extern int __init sfi_parse_mrtc(struct sfi_table_header *table);
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index ae97f24a4371..a9710433be4d 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -316,14 +316,44 @@ static void pci_d3delay_fixup(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup);
 
-static void mrst_power_off_unused_dev(struct pci_dev *dev)
+static void mid_power_off_dev(struct pci_dev *dev)
 {
+	u16 pmcsr;
+
+	/*
+	 * Update current state first, otherwise PCI core enforces PCI_D0 in
+	 * pci_set_power_state() for devices which status was PCI_UNKNOWN.
+	 */
+	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+	dev->current_state = (pci_power_t __force)(pmcsr & PCI_PM_CTRL_STATE_MASK);
+
 	pci_set_power_state(dev, PCI_D3hot);
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev);
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mid_power_off_dev);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mid_power_off_dev);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mid_power_off_dev);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mid_power_off_dev);
+
+static void mrfld_power_off_dev(struct pci_dev *dev)
+{
+	int id;
+
+	if (!pci_soc_mode)
+		return;
+
+	id = intel_mid_pwr_get_lss_id(dev);
+	if (id < 0)
+		return;
+
+	/*
+	 * This sets only PMCSR bits. The actual power off will happen in
+	 * arch/x86/platform/intel-mid/pwr.c.
+	 */
+	mid_power_off_dev(dev);
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, mrfld_power_off_dev);
 
 /*
  * Langwell devices reside at fixed offsets, don't try to move them.
diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile
index 0ce1b1913673..aebb5b9ea80a 100644
--- a/arch/x86/platform/intel-mid/Makefile
+++ b/arch/x86/platform/intel-mid/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o
+obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o pwr.o
 
 # SFI specific code
 ifdef CONFIG_X86_INTEL_MID
diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c
new file mode 100644
index 000000000000..59faf05d23f5
--- /dev/null
+++ b/arch/x86/platform/intel-mid/pwr.c
@@ -0,0 +1,416 @@
+/*
+ * Intel MID Power Management Unit (PWRMU) device driver
+ *
+ * Copyright (C) 2016, Intel Corporation
+ *
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * Intel MID Power Management Unit device driver handles the South Complex PCI
+ * devices such as GPDMA, SPI, I2C, PWM, and so on. By default PCI core
+ * modifies bits in PMCSR register in the PCI configuration space. This is not
+ * enough on some SoCs like Intel Tangier. In such case PCI core sets a new
+ * power state of the device in question through a PM hook registered in struct
+ * pci_platform_pm_ops (see drivers/pci/pci-mid.c).
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+
+#include <asm/intel-mid.h>
+
+/* Registers */
+#define PM_STS			0x00
+#define PM_CMD			0x04
+#define PM_ICS			0x08
+#define PM_WKC(x)		(0x10 + (x) * 4)
+#define PM_WKS(x)		(0x18 + (x) * 4)
+#define PM_SSC(x)		(0x20 + (x) * 4)
+#define PM_SSS(x)		(0x30 + (x) * 4)
+
+/* Bits in PM_STS */
+#define PM_STS_BUSY		(1 << 8)
+
+/* Bits in PM_CMD */
+#define PM_CMD_CMD(x)		((x) << 0)
+#define PM_CMD_IOC		(1 << 8)
+#define PM_CMD_D3cold		(1 << 21)
+
+/* List of commands */
+#define CMD_SET_CFG		0x01
+
+/* Bits in PM_ICS */
+#define PM_ICS_INT_STATUS(x)	((x) & 0xff)
+#define PM_ICS_IE		(1 << 8)
+#define PM_ICS_IP		(1 << 9)
+#define PM_ICS_SW_INT_STS	(1 << 10)
+
+/* List of interrupts */
+#define INT_INVALID		0
+#define INT_CMD_COMPLETE	1
+#define INT_CMD_ERR		2
+#define INT_WAKE_EVENT		3
+#define INT_LSS_POWER_ERR	4
+#define INT_S0iX_MSG_ERR	5
+#define INT_NO_C6		6
+#define INT_TRIGGER_ERR		7
+#define INT_INACTIVITY		8
+
+/* South Complex devices */
+#define LSS_MAX_SHARED_DEVS	4
+#define LSS_MAX_DEVS		64
+
+#define LSS_WS_BITS		1	/* wake state width */
+#define LSS_PWS_BITS		2	/* power state width */
+
+/* Supported device IDs */
+#define PCI_DEVICE_ID_TANGIER	0x11a1
+
+struct mid_pwr_dev {
+	struct pci_dev *pdev;
+	pci_power_t state;
+};
+
+struct mid_pwr {
+	struct device *dev;
+	void __iomem *regs;
+	int irq;
+	bool available;
+
+	struct mutex lock;
+	struct mid_pwr_dev lss[LSS_MAX_DEVS][LSS_MAX_SHARED_DEVS];
+};
+
+static struct mid_pwr *midpwr;
+
+static u32 mid_pwr_get_state(struct mid_pwr *pwr, int reg)
+{
+	return readl(pwr->regs + PM_SSS(reg));
+}
+
+static void mid_pwr_set_state(struct mid_pwr *pwr, int reg, u32 value)
+{
+	writel(value, pwr->regs + PM_SSC(reg));
+}
+
+static void mid_pwr_set_wake(struct mid_pwr *pwr, int reg, u32 value)
+{
+	writel(value, pwr->regs + PM_WKC(reg));
+}
+
+static void mid_pwr_interrupt_disable(struct mid_pwr *pwr)
+{
+	writel(~PM_ICS_IE, pwr->regs + PM_ICS);
+}
+
+static bool mid_pwr_is_busy(struct mid_pwr *pwr)
+{
+	return !!(readl(pwr->regs + PM_STS) & PM_STS_BUSY);
+}
+
+/* Wait 500ms that the latest PWRMU command finished */
+static int mid_pwr_wait(struct mid_pwr *pwr)
+{
+	unsigned int count = 500000;
+	bool busy;
+
+	do {
+		busy = mid_pwr_is_busy(pwr);
+		if (!busy)
+			return 0;
+		udelay(1);
+	} while (--count);
+
+	return -EBUSY;
+}
+
+static int mid_pwr_wait_for_cmd(struct mid_pwr *pwr, u8 cmd)
+{
+	writel(PM_CMD_CMD(cmd), pwr->regs + PM_CMD);
+	return mid_pwr_wait(pwr);
+}
+
+static int __update_power_state(struct mid_pwr *pwr, int reg, int bit, int new)
+{
+	int curstate;
+	u32 power;
+	int ret;
+
+	/* Check if the device is already in desired state */
+	power = mid_pwr_get_state(pwr, reg);
+	curstate = (power >> bit) & 3;
+	if (curstate == new)
+		return 0;
+
+	/* Update the power state */
+	mid_pwr_set_state(pwr, reg, (power & ~(3 << bit)) | (new << bit));
+
+	/* Send command to SCU */
+	ret = mid_pwr_wait_for_cmd(pwr, CMD_SET_CFG);
+	if (ret)
+		return ret;
+
+	/* Check if the device is already in desired state */
+	power = mid_pwr_get_state(pwr, reg);
+	curstate = (power >> bit) & 3;
+	if (curstate != new)
+		return -EAGAIN;
+
+	return 0;
+}
+
+static pci_power_t __find_weakest_power_state(struct mid_pwr_dev *lss,
+					      struct pci_dev *pdev,
+					      pci_power_t state)
+{
+	pci_power_t weakest = PCI_D3hot;
+	unsigned int j;
+
+	/* Find device in cache or first free cell */
+	for (j = 0; j < LSS_MAX_SHARED_DEVS; j++) {
+		if (lss[j].pdev == pdev || !lss[j].pdev)
+			break;
+	}
+
+	/* Store the desired state in cache */
+	if (j < LSS_MAX_SHARED_DEVS) {
+		lss[j].pdev = pdev;
+		lss[j].state = state;
+	} else {
+		dev_WARN(&pdev->dev, "No room for device in PWRMU LSS cache\n");
+		weakest = state;
+	}
+
+	/* Find the power state we may use */
+	for (j = 0; j < LSS_MAX_SHARED_DEVS; j++) {
+		if (lss[j].state < weakest)
+			weakest = lss[j].state;
+	}
+
+	return weakest;
+}
+
+static int __set_power_state(struct mid_pwr *pwr, struct pci_dev *pdev,
+			     pci_power_t state, int id, int reg, int bit)
+{
+	const char *name;
+	int ret;
+
+	state = __find_weakest_power_state(pwr->lss[id], pdev, state);
+	name = pci_power_name(state);
+
+	ret = __update_power_state(pwr, reg, bit, (__force int)state);
+	if (ret) {
+		dev_warn(&pdev->dev, "Can't set power state %s: %d\n", name, ret);
+		return ret;
+	}
+
+	dev_vdbg(&pdev->dev, "Set power state %s\n", name);
+	return 0;
+}
+
+static int mid_pwr_set_power_state(struct mid_pwr *pwr, struct pci_dev *pdev,
+				   pci_power_t state)
+{
+	int id, reg, bit;
+	int ret;
+
+	id = intel_mid_pwr_get_lss_id(pdev);
+	if (id < 0)
+		return id;
+
+	reg = (id * LSS_PWS_BITS) / 32;
+	bit = (id * LSS_PWS_BITS) % 32;
+
+	/* We support states between PCI_D0 and PCI_D3hot */
+	if (state < PCI_D0)
+		state = PCI_D0;
+	if (state > PCI_D3hot)
+		state = PCI_D3hot;
+
+	mutex_lock(&pwr->lock);
+	ret = __set_power_state(pwr, pdev, state, id, reg, bit);
+	mutex_unlock(&pwr->lock);
+	return ret;
+}
+
+int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state)
+{
+	struct mid_pwr *pwr = midpwr;
+	int ret = 0;
+
+	might_sleep();
+
+	if (pwr && pwr->available)
+		ret = mid_pwr_set_power_state(pwr, pdev, state);
+	dev_vdbg(&pdev->dev, "set_power_state() returns %d\n", ret);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state);
+
+int intel_mid_pwr_get_lss_id(struct pci_dev *pdev)
+{
+	int vndr;
+	u8 id;
+
+	/*
+	 * Mapping to PWRMU index is kept in the Logical SubSystem ID byte of
+	 * Vendor capability.
+	 */
+	vndr = pci_find_capability(pdev, PCI_CAP_ID_VNDR);
+	if (!vndr)
+		return -EINVAL;
+
+	/* Read the Logical SubSystem ID byte */
+	pci_read_config_byte(pdev, vndr + INTEL_MID_PWR_LSS_OFFSET, &id);
+	if (!(id & INTEL_MID_PWR_LSS_TYPE))
+		return -ENODEV;
+
+	id &= ~INTEL_MID_PWR_LSS_TYPE;
+	if (id >= LSS_MAX_DEVS)
+		return -ERANGE;
+
+	return id;
+}
+
+static irqreturn_t mid_pwr_irq_handler(int irq, void *dev_id)
+{
+	struct mid_pwr *pwr = dev_id;
+	u32 ics;
+
+	ics = readl(pwr->regs + PM_ICS);
+	if (!(ics & PM_ICS_IP))
+		return IRQ_NONE;
+
+	writel(ics | PM_ICS_IP, pwr->regs + PM_ICS);
+
+	dev_warn(pwr->dev, "Unexpected IRQ: %#x\n", PM_ICS_INT_STATUS(ics));
+	return IRQ_HANDLED;
+}
+
+struct mid_pwr_device_info {
+	int (*set_initial_state)(struct mid_pwr *pwr);
+};
+
+static int mid_pwr_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct mid_pwr_device_info *info = (void *)id->driver_data;
+	struct device *dev = &pdev->dev;
+	struct mid_pwr *pwr;
+	int ret;
+
+	ret = pcim_enable_device(pdev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "error: could not enable device\n");
+		return ret;
+	}
+
+	ret = pcim_iomap_regions(pdev, 1 << 0, pci_name(pdev));
+	if (ret) {
+		dev_err(&pdev->dev, "I/O memory remapping failed\n");
+		return ret;
+	}
+
+	pwr = devm_kzalloc(dev, sizeof(*pwr), GFP_KERNEL);
+	if (!pwr)
+		return -ENOMEM;
+
+	pwr->dev = dev;
+	pwr->regs = pcim_iomap_table(pdev)[0];
+	pwr->irq = pdev->irq;
+
+	mutex_init(&pwr->lock);
+
+	/* Disable interrupts */
+	mid_pwr_interrupt_disable(pwr);
+
+	if (info && info->set_initial_state) {
+		ret = info->set_initial_state(pwr);
+		if (ret)
+			dev_warn(dev, "Can't set initial state: %d\n", ret);
+	}
+
+	ret = devm_request_irq(dev, pdev->irq, mid_pwr_irq_handler,
+			       IRQF_NO_SUSPEND, pci_name(pdev), pwr);
+	if (ret)
+		return ret;
+
+	pwr->available = true;
+	midpwr = pwr;
+
+	pci_set_drvdata(pdev, pwr);
+	return 0;
+}
+
+static int tng_set_initial_state(struct mid_pwr *pwr)
+{
+	unsigned int i, j;
+	int ret;
+
+	/*
+	 * Enable wake events.
+	 *
+	 * PWRMU supports up to 32 sources for wake up the system. Ungate them
+	 * all here.
+	 */
+	mid_pwr_set_wake(pwr, 0, 0xffffffff);
+	mid_pwr_set_wake(pwr, 1, 0xffffffff);
+
+	/*
+	 * Power off South Complex devices.
+	 *
+	 * There is a map (see a note below) of 64 devices with 2 bits per each
+	 * on 32-bit HW registers. The following calls set all devices to one
+	 * known initial state, i.e. PCI_D3hot. This is done in conjunction
+	 * with PMCSR setting in arch/x86/pci/intel_mid_pci.c.
+	 *
+	 * NOTE: The actual device mapping is provided by a platform at run
+	 * time using vendor capability of PCI configuration space.
+	 */
+	mid_pwr_set_state(pwr, 0, 0xffffffff);
+	mid_pwr_set_state(pwr, 1, 0xffffffff);
+	mid_pwr_set_state(pwr, 2, 0xffffffff);
+	mid_pwr_set_state(pwr, 3, 0xffffffff);
+
+	/* Send command to SCU */
+	ret = mid_pwr_wait_for_cmd(pwr, CMD_SET_CFG);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < LSS_MAX_DEVS; i++) {
+		for (j = 0; j < LSS_MAX_SHARED_DEVS; j++)
+			pwr->lss[i][j].state = PCI_D3hot;
+	}
+
+	return 0;
+}
+
+static const struct mid_pwr_device_info tng_info = {
+	.set_initial_state = tng_set_initial_state,
+};
+
+static const struct pci_device_id mid_pwr_pci_ids[] = {
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_TANGIER), (kernel_ulong_t)&tng_info },
+	{}
+};
+MODULE_DEVICE_TABLE(pci, mid_pwr_pci_ids);
+
+static struct pci_driver mid_pwr_pci_driver = {
+	.name		= "intel_mid_pwr",
+	.probe		= mid_pwr_probe,
+	.id_table	= mid_pwr_pci_ids,
+};
+
+builtin_pci_driver(mid_pwr_pci_driver);
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 1fa6925733d3..8db5079f09a7 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -51,6 +51,9 @@ obj-$(CONFIG_ACPI)    += pci-acpi.o
 # SMBIOS provided firmware instance and labels
 obj-$(CONFIG_PCI_LABEL) += pci-label.o
 
+# Intel MID platform PM support
+obj-$(CONFIG_X86_INTEL_MID) += pci-mid.o
+
 obj-$(CONFIG_PCI_SYSCALL) += syscall.o
 
 obj-$(CONFIG_PCI_STUB) += pci-stub.o
diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c
new file mode 100644
index 000000000000..c878aa71173b
--- /dev/null
+++ b/drivers/pci/pci-mid.c
@@ -0,0 +1,77 @@
+/*
+ * Intel MID platform PM support
+ *
+ * Copyright (C) 2016, Intel Corporation
+ *
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/pci.h>
+
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include <asm/intel-mid.h>
+
+#include "pci.h"
+
+static bool mid_pci_power_manageable(struct pci_dev *dev)
+{
+	return true;
+}
+
+static int mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state)
+{
+	return intel_mid_pci_set_power_state(pdev, state);
+}
+
+static pci_power_t mid_pci_choose_state(struct pci_dev *pdev)
+{
+	return PCI_D3hot;
+}
+
+static int mid_pci_sleep_wake(struct pci_dev *dev, bool enable)
+{
+	return 0;
+}
+
+static int mid_pci_run_wake(struct pci_dev *dev, bool enable)
+{
+	return 0;
+}
+
+static bool mid_pci_need_resume(struct pci_dev *dev)
+{
+	return false;
+}
+
+static struct pci_platform_pm_ops mid_pci_platform_pm = {
+	.is_manageable	= mid_pci_power_manageable,
+	.set_state	= mid_pci_set_power_state,
+	.choose_state	= mid_pci_choose_state,
+	.sleep_wake	= mid_pci_sleep_wake,
+	.run_wake	= mid_pci_run_wake,
+	.need_resume	= mid_pci_need_resume,
+};
+
+#define ICPU(model)	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, }
+
+static const struct x86_cpu_id lpss_cpu_ids[] = {
+	ICPU(INTEL_FAM6_ATOM_MERRIFIELD1),
+	{}
+};
+
+static int __init mid_pci_init(void)
+{
+	const struct x86_cpu_id *id;
+
+	id = x86_match_cpu(lpss_cpu_ids);
+	if (id)
+		pci_set_platform_pm(&mid_pci_platform_pm);
+	return 0;
+}
+arch_initcall(mid_pci_init);
-- 
cgit v1.2.3


From 00688272157d83e48d1369d7d11c479571324e40 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 15 Jun 2016 12:48:53 +0300
Subject: x86/platform/intel-mid: Enable GPIO expanders on Edison

Intel Edison board provides GPIO expanders connected to I2C bus. Add necessary
file to get those enumerated.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Dan O'Donovan <dan@emutex.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1465984133-41639-1-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/intel-mid/device_libs/Makefile   |  6 +-
 .../intel-mid/device_libs/platform_pcal9555a.c     | 99 ++++++++++++++++++++++
 2 files changed, 103 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c

(limited to 'arch')

diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index 91ec9f8704bf..abe8ba87c970 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -11,11 +11,13 @@ obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o
 # I2C Devices
 obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o
 obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o
-obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_max7315.o
 obj-$(subst m,y,$(CONFIG_INPUT_MPU3050)) += platform_mpu3050.o
 obj-$(subst m,y,$(CONFIG_INPUT_BMA150)) += platform_bma023.o
-obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
 obj-$(subst m,y,$(CONFIG_DRM_MEDFIELD)) += platform_tc35876x.o
+# I2C GPIO Expanders
+obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_max7315.o
+obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o
+obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
 # MISC Devices
 obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
 obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_wdt.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c b/arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c
new file mode 100644
index 000000000000..429a94192671
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c
@@ -0,0 +1,99 @@
+/*
+ * PCAL9555a platform data initilization file
+ *
+ * Copyright (C) 2016, Intel Corporation
+ *
+ * Authors: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *	    Dan O'Donovan <dan@emutex.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/platform_data/pca953x.h>
+#include <linux/sfi.h>
+
+#include <asm/intel-mid.h>
+
+#define PCAL9555A_NUM	4
+
+static struct pca953x_platform_data pcal9555a_pdata[PCAL9555A_NUM];
+static int nr;
+
+static void __init *pcal9555a_platform_data(void *info)
+{
+	struct i2c_board_info *i2c_info = info;
+	char *type = i2c_info->type;
+	struct pca953x_platform_data *pcal9555a;
+	char base_pin_name[SFI_NAME_LEN + 1];
+	char intr_pin_name[SFI_NAME_LEN + 1];
+	int gpio_base, intr;
+
+	snprintf(base_pin_name, sizeof(base_pin_name), "%s_base", type);
+	snprintf(intr_pin_name, sizeof(intr_pin_name), "%s_int", type);
+
+	gpio_base = get_gpio_by_name(base_pin_name);
+	intr = get_gpio_by_name(intr_pin_name);
+
+	/* Check if the SFI record valid */
+	if (gpio_base == -1)
+		return NULL;
+
+	if (nr >= PCAL9555A_NUM) {
+		pr_err("%s: Too many instances, only %d supported\n", __func__,
+		       PCAL9555A_NUM);
+		return NULL;
+	}
+
+	pcal9555a = &pcal9555a_pdata[nr++];
+	pcal9555a->gpio_base = gpio_base;
+
+	if (intr >= 0) {
+		i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
+		pcal9555a->irq_base = gpio_base + INTEL_MID_IRQ_OFFSET;
+	} else {
+		i2c_info->irq = -1;
+		pcal9555a->irq_base = -1;
+	}
+
+	strcpy(type, "pcal9555a");
+	return pcal9555a;
+}
+
+static const struct devs_id pcal9555a_1_dev_id __initconst = {
+	.name			= "pcal9555a-1",
+	.type			= SFI_DEV_TYPE_I2C,
+	.delay			= 1,
+	.get_platform_data	= &pcal9555a_platform_data,
+};
+
+static const struct devs_id pcal9555a_2_dev_id __initconst = {
+	.name			= "pcal9555a-2",
+	.type			= SFI_DEV_TYPE_I2C,
+	.delay			= 1,
+	.get_platform_data	= &pcal9555a_platform_data,
+};
+
+static const struct devs_id pcal9555a_3_dev_id __initconst = {
+	.name			= "pcal9555a-3",
+	.type			= SFI_DEV_TYPE_I2C,
+	.delay			= 1,
+	.get_platform_data	= &pcal9555a_platform_data,
+};
+
+static const struct devs_id pcal9555a_4_dev_id __initconst = {
+	.name			= "pcal9555a-4",
+	.type			= SFI_DEV_TYPE_I2C,
+	.delay			= 1,
+	.get_platform_data	= &pcal9555a_platform_data,
+};
+
+sfi_device(pcal9555a_1_dev_id);
+sfi_device(pcal9555a_2_dev_id);
+sfi_device(pcal9555a_3_dev_id);
+sfi_device(pcal9555a_4_dev_id);
-- 
cgit v1.2.3


From 5fc39d347267bd029fcc9099c70e2fe2d53130e9 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Wed, 15 Jun 2016 13:20:19 +0200
Subject: ARM: sunxi/dt: make the CHIP inherit from allwinner,sun5i-a13

The sun4i-timer driver registers its sched_clock only if the machine is
compatible with "allwinner,sun5i-a13", "allwinner,sun5i-a10s" or
"allwinner,sun4i-a10".
Add the missing "allwinner,sun5i-a13" string to the machine compatible.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Fixes: 465a225fb2af ("ARM: sun5i: Add C.H.I.P DTS")
Cc: <stable@vger.kernel.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm/boot/dts/sun5i-r8-chip.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun5i-r8-chip.dts b/arch/arm/boot/dts/sun5i-r8-chip.dts
index a8d8b4582397..f694482bdeb6 100644
--- a/arch/arm/boot/dts/sun5i-r8-chip.dts
+++ b/arch/arm/boot/dts/sun5i-r8-chip.dts
@@ -52,7 +52,7 @@
 
 / {
 	model = "NextThing C.H.I.P.";
-	compatible = "nextthing,chip", "allwinner,sun5i-r8";
+	compatible = "nextthing,chip", "allwinner,sun5i-r8", "allwinner,sun5i-a13";
 
 	aliases {
 		i2c0 = &i2c0;
-- 
cgit v1.2.3


From 1f51dee7ca7424be6f84067395166f878dbdd8be Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:10 +0200
Subject: locking/atomic, arch/alpha: Implement
 atomic{,64}_fetch_{add,sub,and,andnot,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-alpha@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h | 65 +++++++++++++++++++++++++++++++++++------
 1 file changed, 56 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 572b228c44c7..8243f17999e3 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -65,6 +65,25 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return result;							\
 }
 
+#define ATOMIC_FETCH_OP(op, asm_op)					\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	long temp, result;						\
+	smp_mb();							\
+	__asm__ __volatile__(						\
+	"1:	ldl_l %2,%1\n"						\
+	"	" #asm_op " %2,%3,%0\n"					\
+	"	stl_c %0,%1\n"						\
+	"	beq %0,2f\n"						\
+	".subsection 2\n"						\
+	"2:	br 1b\n"						\
+	".previous"							\
+	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
+	:"Ir" (i), "m" (v->counter) : "memory");			\
+	smp_mb();							\
+	return result;							\
+}
+
 #define ATOMIC64_OP(op, asm_op)						\
 static __inline__ void atomic64_##op(long i, atomic64_t * v)		\
 {									\
@@ -101,11 +120,32 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	\
 	return result;							\
 }
 
+#define ATOMIC64_FETCH_OP(op, asm_op)					\
+static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v)	\
+{									\
+	long temp, result;						\
+	smp_mb();							\
+	__asm__ __volatile__(						\
+	"1:	ldq_l %2,%1\n"						\
+	"	" #asm_op " %2,%3,%0\n"					\
+	"	stq_c %0,%1\n"						\
+	"	beq %0,2f\n"						\
+	".subsection 2\n"						\
+	"2:	br 1b\n"						\
+	".previous"							\
+	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
+	:"Ir" (i), "m" (v->counter) : "memory");			\
+	smp_mb();							\
+	return result;							\
+}
+
 #define ATOMIC_OPS(op)							\
 	ATOMIC_OP(op, op##l)						\
 	ATOMIC_OP_RETURN(op, op##l)					\
+	ATOMIC_FETCH_OP(op, op##l)					\
 	ATOMIC64_OP(op, op##q)						\
-	ATOMIC64_OP_RETURN(op, op##q)
+	ATOMIC64_OP_RETURN(op, op##q)					\
+	ATOMIC64_FETCH_OP(op, op##q)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
@@ -113,18 +153,25 @@ ATOMIC_OPS(sub)
 #define atomic_andnot atomic_andnot
 #define atomic64_andnot atomic64_andnot
 
-ATOMIC_OP(and, and)
-ATOMIC_OP(andnot, bic)
-ATOMIC_OP(or, bis)
-ATOMIC_OP(xor, xor)
-ATOMIC64_OP(and, and)
-ATOMIC64_OP(andnot, bic)
-ATOMIC64_OP(or, bis)
-ATOMIC64_OP(xor, xor)
+#define atomic_fetch_or atomic_fetch_or
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, asm)						\
+	ATOMIC_OP(op, asm)						\
+	ATOMIC_FETCH_OP(op, asm)					\
+	ATOMIC64_OP(op, asm)						\
+	ATOMIC64_FETCH_OP(op, asm)
+
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(andnot, bic)
+ATOMIC_OPS(or, bis)
+ATOMIC_OPS(xor, xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-- 
cgit v1.2.3


From fbffe892e5253dd02c016c59a9d792eafe9d53e1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:09 +0200
Subject: locking/atomic, arch/arc: Implement
 atomic_fetch_{add,sub,and,andnot,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Noam Camus <noamc@ezchip.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-snps-arc@lists.infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arc/include/asm/atomic.h | 103 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 94 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index dd683995bc9d..c066a21caaaf 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -67,6 +67,37 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return val;							\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned int val, orig;						\
+	SCOND_FAIL_RETRY_VAR_DEF                                        \
+									\
+	/*								\
+	 * Explicit full memory barrier needed before/after as		\
+	 * LLOCK/SCOND thmeselves don't provide any such semantics	\
+	 */								\
+	smp_mb();							\
+									\
+	__asm__ __volatile__(						\
+	"1:	llock   %[orig], [%[ctr]]		\n"		\
+	"	" #asm_op " %[val], %[orig], %[i]	\n"		\
+	"	scond   %[val], [%[ctr]]		\n"		\
+	"						\n"		\
+	SCOND_FAIL_RETRY_ASM						\
+									\
+	: [val]	"=&r"	(val),						\
+	  [orig] "=&r" (orig)						\
+	  SCOND_FAIL_RETRY_VARS						\
+	: [ctr]	"r"	(&v->counter),					\
+	  [i]	"ir"	(i)						\
+	: "cc");							\
+									\
+	smp_mb();							\
+									\
+	return orig;							\
+}
+
 #else	/* !CONFIG_ARC_HAS_LLSC */
 
 #ifndef CONFIG_SMP
@@ -129,21 +160,46 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return temp;							\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long flags;						\
+	unsigned long orig;						\
+									\
+	/*								\
+	 * spin lock/unlock provides the needed smp_mb() before/after	\
+	 */								\
+	atomic_ops_lock(flags);						\
+	orig = v->counter;						\
+	v->counter c_op i;						\
+	atomic_ops_unlock(flags);					\
+									\
+	return orig;							\
+}
+
 #endif /* !CONFIG_ARC_HAS_LLSC */
 
 #define ATOMIC_OPS(op, c_op, asm_op)					\
 	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
 #define atomic_andnot atomic_andnot
 
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(andnot, &= ~, bic)
-ATOMIC_OP(or, |=, or)
-ATOMIC_OP(xor, ^=, xor)
+#define atomic_fetch_or atomic_fetch_or
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
 
 #undef SCOND_FAIL_RETRY_VAR_DEF
 #undef SCOND_FAIL_RETRY_ASM
@@ -208,22 +264,51 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return temp;							\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned int temp = i;						\
+									\
+	/* Explicit full memory barrier needed before/after */		\
+	smp_mb();							\
+									\
+	__asm__ __volatile__(						\
+	"	mov r2, %0\n"						\
+	"	mov r3, %1\n"						\
+	"       .word %2\n"						\
+	"	mov %0, r2"						\
+	: "+r"(temp)							\
+	: "r"(&v->counter), "i"(asm_op)					\
+	: "r2", "r3", "memory");					\
+									\
+	smp_mb();							\
+									\
+	return temp;							\
+}
+
 #define ATOMIC_OPS(op, c_op, asm_op)					\
 	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3)
 #define atomic_sub(i, v) atomic_add(-(i), (v))
 #define atomic_sub_return(i, v) atomic_add_return(-(i), (v))
 
-ATOMIC_OP(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
 #define atomic_andnot(mask, v) atomic_and(~(mask), (v))
-ATOMIC_OP(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
-ATOMIC_OP(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
+ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
+ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
 
 #endif /* CONFIG_ARC_PLAT_EZNPS */
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-- 
cgit v1.2.3


From 6da068c1beba684b2a0dbf43a07b0529edd9e959 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:10:52 +0200
Subject: locking/atomic, arch/arm: Implement
 atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/include/asm/atomic.h | 108 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 98 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 9e10c4567eb4..0feb110ec542 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -77,8 +77,36 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)	\
 	return result;							\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
+{									\
+	unsigned long tmp;						\
+	int result, val;						\
+									\
+	prefetchw(&v->counter);						\
+									\
+	__asm__ __volatile__("@ atomic_fetch_" #op "\n"			\
+"1:	ldrex	%0, [%4]\n"						\
+"	" #asm_op "	%1, %0, %5\n"					\
+"	strex	%2, %1, [%4]\n"						\
+"	teq	%2, #0\n"						\
+"	bne	1b"							\
+	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter)	\
+	: "r" (&v->counter), "Ir" (i)					\
+	: "cc");							\
+									\
+	return result;							\
+}
+
 #define atomic_add_return_relaxed	atomic_add_return_relaxed
 #define atomic_sub_return_relaxed	atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
+
+#define atomic_fetch_and_relaxed	atomic_fetch_and_relaxed
+#define atomic_fetch_andnot_relaxed	atomic_fetch_andnot_relaxed
+#define atomic_fetch_or_relaxed		atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed	atomic_fetch_xor_relaxed
 
 static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new)
 {
@@ -159,6 +187,22 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return val;							\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long flags;						\
+	int val;							\
+									\
+	raw_local_irq_save(flags);					\
+	val = v->counter;						\
+	v->counter c_op i;						\
+	raw_local_irq_restore(flags);					\
+									\
+	return val;							\
+}
+
+#define atomic_fetch_or atomic_fetch_or
+
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
@@ -187,19 +231,26 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 
 #define ATOMIC_OPS(op, c_op, asm_op)					\
 	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
 #define atomic_andnot atomic_andnot
 
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(andnot, &= ~, bic)
-ATOMIC_OP(or,  |=, orr)
-ATOMIC_OP(xor, ^=, eor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or,  |=, orr)
+ATOMIC_OPS(xor, ^=, eor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
@@ -317,24 +368,61 @@ atomic64_##op##_return_relaxed(long long i, atomic64_t *v)		\
 	return result;							\
 }
 
+#define ATOMIC64_FETCH_OP(op, op1, op2)					\
+static inline long long							\
+atomic64_fetch_##op##_relaxed(long long i, atomic64_t *v)		\
+{									\
+	long long result, val;						\
+	unsigned long tmp;						\
+									\
+	prefetchw(&v->counter);						\
+									\
+	__asm__ __volatile__("@ atomic64_fetch_" #op "\n"		\
+"1:	ldrexd	%0, %H0, [%4]\n"					\
+"	" #op1 " %Q1, %Q0, %Q5\n"					\
+"	" #op2 " %R1, %R0, %R5\n"					\
+"	strexd	%2, %1, %H1, [%4]\n"					\
+"	teq	%2, #0\n"						\
+"	bne	1b"							\
+	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter)	\
+	: "r" (&v->counter), "r" (i)					\
+	: "cc");							\
+									\
+	return result;							\
+}
+
 #define ATOMIC64_OPS(op, op1, op2)					\
 	ATOMIC64_OP(op, op1, op2)					\
-	ATOMIC64_OP_RETURN(op, op1, op2)
+	ATOMIC64_OP_RETURN(op, op1, op2)				\
+	ATOMIC64_FETCH_OP(op, op1, op2)
 
 ATOMIC64_OPS(add, adds, adc)
 ATOMIC64_OPS(sub, subs, sbc)
 
 #define atomic64_add_return_relaxed	atomic64_add_return_relaxed
 #define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, op1, op2)					\
+	ATOMIC64_OP(op, op1, op2)					\
+	ATOMIC64_FETCH_OP(op, op1, op2)
 
 #define atomic64_andnot atomic64_andnot
 
-ATOMIC64_OP(and, and, and)
-ATOMIC64_OP(andnot, bic, bic)
-ATOMIC64_OP(or,  orr, orr)
-ATOMIC64_OP(xor, eor, eor)
+ATOMIC64_OPS(and, and, and)
+ATOMIC64_OPS(andnot, bic, bic)
+ATOMIC64_OPS(or,  orr, orr)
+ATOMIC64_OPS(xor, eor, eor)
+
+#define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
+#define atomic64_fetch_andnot_relaxed	atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
 
 #undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-- 
cgit v1.2.3


From e490f9b1d3b40ba32ad07432b63b813ce3052d41 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:09 +0200
Subject: locking/atomic, arch/arm64: Implement
 atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

[wildea01: compile fixes for ll/sc]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm64/include/asm/atomic.h       |  62 +++++++++++++++++++
 arch/arm64/include/asm/atomic_ll_sc.h | 110 ++++++++++++++++++++++++++--------
 2 files changed, 148 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index f3a3586a421c..3128c3d7c1ff 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -76,6 +76,36 @@
 #define atomic_dec_return_release(v)	atomic_sub_return_release(1, (v))
 #define atomic_dec_return(v)		atomic_sub_return(1, (v))
 
+#define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
+#define atomic_fetch_add_acquire	atomic_fetch_add_acquire
+#define atomic_fetch_add_release	atomic_fetch_add_release
+#define atomic_fetch_add		atomic_fetch_add
+
+#define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
+#define atomic_fetch_sub_acquire	atomic_fetch_sub_acquire
+#define atomic_fetch_sub_release	atomic_fetch_sub_release
+#define atomic_fetch_sub		atomic_fetch_sub
+
+#define atomic_fetch_and_relaxed	atomic_fetch_and_relaxed
+#define atomic_fetch_and_acquire	atomic_fetch_and_acquire
+#define atomic_fetch_and_release	atomic_fetch_and_release
+#define atomic_fetch_and		atomic_fetch_and
+
+#define atomic_fetch_andnot_relaxed	atomic_fetch_andnot_relaxed
+#define atomic_fetch_andnot_acquire	atomic_fetch_andnot_acquire
+#define atomic_fetch_andnot_release	atomic_fetch_andnot_release
+#define atomic_fetch_andnot		atomic_fetch_andnot
+
+#define atomic_fetch_or_relaxed		atomic_fetch_or_relaxed
+#define atomic_fetch_or_acquire		atomic_fetch_or_acquire
+#define atomic_fetch_or_release		atomic_fetch_or_release
+#define atomic_fetch_or			atomic_fetch_or
+
+#define atomic_fetch_xor_relaxed	atomic_fetch_xor_relaxed
+#define atomic_fetch_xor_acquire	atomic_fetch_xor_acquire
+#define atomic_fetch_xor_release	atomic_fetch_xor_release
+#define atomic_fetch_xor		atomic_fetch_xor
+
 #define atomic_xchg_relaxed(v, new)	xchg_relaxed(&((v)->counter), (new))
 #define atomic_xchg_acquire(v, new)	xchg_acquire(&((v)->counter), (new))
 #define atomic_xchg_release(v, new)	xchg_release(&((v)->counter), (new))
@@ -98,6 +128,8 @@
 #define __atomic_add_unless(v, a, u)	___atomic_add_unless(v, a, u,)
 #define atomic_andnot			atomic_andnot
 
+#define atomic_fetch_or atomic_fetch_or
+
 /*
  * 64-bit atomic operations.
  */
@@ -125,6 +157,36 @@
 #define atomic64_dec_return_release(v)	atomic64_sub_return_release(1, (v))
 #define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
 
+#define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
+#define atomic64_fetch_add_acquire	atomic64_fetch_add_acquire
+#define atomic64_fetch_add_release	atomic64_fetch_add_release
+#define atomic64_fetch_add		atomic64_fetch_add
+
+#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
+#define atomic64_fetch_sub_acquire	atomic64_fetch_sub_acquire
+#define atomic64_fetch_sub_release	atomic64_fetch_sub_release
+#define atomic64_fetch_sub		atomic64_fetch_sub
+
+#define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
+#define atomic64_fetch_and_acquire	atomic64_fetch_and_acquire
+#define atomic64_fetch_and_release	atomic64_fetch_and_release
+#define atomic64_fetch_and		atomic64_fetch_and
+
+#define atomic64_fetch_andnot_relaxed	atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_andnot_acquire	atomic64_fetch_andnot_acquire
+#define atomic64_fetch_andnot_release	atomic64_fetch_andnot_release
+#define atomic64_fetch_andnot		atomic64_fetch_andnot
+
+#define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
+#define atomic64_fetch_or_acquire	atomic64_fetch_or_acquire
+#define atomic64_fetch_or_release	atomic64_fetch_or_release
+#define atomic64_fetch_or		atomic64_fetch_or
+
+#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
+#define atomic64_fetch_xor_acquire	atomic64_fetch_xor_acquire
+#define atomic64_fetch_xor_release	atomic64_fetch_xor_release
+#define atomic64_fetch_xor		atomic64_fetch_xor
+
 #define atomic64_xchg_relaxed		atomic_xchg_relaxed
 #define atomic64_xchg_acquire		atomic_xchg_acquire
 #define atomic64_xchg_release		atomic_xchg_release
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index f61c84f6ba02..f819fdcff1ac 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -77,26 +77,57 @@ __LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v))		\
 }									\
 __LL_SC_EXPORT(atomic_##op##_return##name);
 
+#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op)		\
+__LL_SC_INLINE int							\
+__LL_SC_PREFIX(atomic_fetch_##op##name(int i, atomic_t *v))		\
+{									\
+	unsigned long tmp;						\
+	int val, result;						\
+									\
+	asm volatile("// atomic_fetch_" #op #name "\n"			\
+"	prfm	pstl1strm, %3\n"					\
+"1:	ld" #acq "xr	%w0, %3\n"					\
+"	" #asm_op "	%w1, %w0, %w4\n"				\
+"	st" #rel "xr	%w2, %w1, %3\n"					\
+"	cbnz	%w2, 1b\n"						\
+"	" #mb								\
+	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)	\
+	: "Ir" (i)							\
+	: cl);								\
+									\
+	return result;							\
+}									\
+__LL_SC_EXPORT(atomic_fetch_##op##name);
+
 #define ATOMIC_OPS(...)							\
 	ATOMIC_OP(__VA_ARGS__)						\
-	ATOMIC_OP_RETURN(        , dmb ish,  , l, "memory", __VA_ARGS__)
-
-#define ATOMIC_OPS_RLX(...)						\
-	ATOMIC_OPS(__VA_ARGS__)						\
+	ATOMIC_OP_RETURN(        , dmb ish,  , l, "memory", __VA_ARGS__)\
 	ATOMIC_OP_RETURN(_relaxed,        ,  ,  ,         , __VA_ARGS__)\
 	ATOMIC_OP_RETURN(_acquire,        , a,  , "memory", __VA_ARGS__)\
-	ATOMIC_OP_RETURN(_release,        ,  , l, "memory", __VA_ARGS__)
+	ATOMIC_OP_RETURN(_release,        ,  , l, "memory", __VA_ARGS__)\
+	ATOMIC_FETCH_OP (        , dmb ish,  , l, "memory", __VA_ARGS__)\
+	ATOMIC_FETCH_OP (_relaxed,        ,  ,  ,         , __VA_ARGS__)\
+	ATOMIC_FETCH_OP (_acquire,        , a,  , "memory", __VA_ARGS__)\
+	ATOMIC_FETCH_OP (_release,        ,  , l, "memory", __VA_ARGS__)
 
-ATOMIC_OPS_RLX(add, add)
-ATOMIC_OPS_RLX(sub, sub)
+ATOMIC_OPS(add, add)
+ATOMIC_OPS(sub, sub)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(...)							\
+	ATOMIC_OP(__VA_ARGS__)						\
+	ATOMIC_FETCH_OP (        , dmb ish,  , l, "memory", __VA_ARGS__)\
+	ATOMIC_FETCH_OP (_relaxed,        ,  ,  ,         , __VA_ARGS__)\
+	ATOMIC_FETCH_OP (_acquire,        , a,  , "memory", __VA_ARGS__)\
+	ATOMIC_FETCH_OP (_release,        ,  , l, "memory", __VA_ARGS__)
 
-ATOMIC_OP(and, and)
-ATOMIC_OP(andnot, bic)
-ATOMIC_OP(or, orr)
-ATOMIC_OP(xor, eor)
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(andnot, bic)
+ATOMIC_OPS(or, orr)
+ATOMIC_OPS(xor, eor)
 
-#undef ATOMIC_OPS_RLX
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
@@ -140,26 +171,57 @@ __LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v))	\
 }									\
 __LL_SC_EXPORT(atomic64_##op##_return##name);
 
+#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op)		\
+__LL_SC_INLINE long							\
+__LL_SC_PREFIX(atomic64_fetch_##op##name(long i, atomic64_t *v))	\
+{									\
+	long result, val;						\
+	unsigned long tmp;						\
+									\
+	asm volatile("// atomic64_fetch_" #op #name "\n"		\
+"	prfm	pstl1strm, %3\n"					\
+"1:	ld" #acq "xr	%0, %3\n"					\
+"	" #asm_op "	%1, %0, %4\n"					\
+"	st" #rel "xr	%w2, %1, %3\n"					\
+"	cbnz	%w2, 1b\n"						\
+"	" #mb								\
+	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)	\
+	: "Ir" (i)							\
+	: cl);								\
+									\
+	return result;							\
+}									\
+__LL_SC_EXPORT(atomic64_fetch_##op##name);
+
 #define ATOMIC64_OPS(...)						\
 	ATOMIC64_OP(__VA_ARGS__)					\
-	ATOMIC64_OP_RETURN(, dmb ish,  , l, "memory", __VA_ARGS__)
-
-#define ATOMIC64_OPS_RLX(...)						\
-	ATOMIC64_OPS(__VA_ARGS__)					\
+	ATOMIC64_OP_RETURN(, dmb ish,  , l, "memory", __VA_ARGS__)	\
 	ATOMIC64_OP_RETURN(_relaxed,,  ,  ,         , __VA_ARGS__)	\
 	ATOMIC64_OP_RETURN(_acquire,, a,  , "memory", __VA_ARGS__)	\
-	ATOMIC64_OP_RETURN(_release,,  , l, "memory", __VA_ARGS__)
+	ATOMIC64_OP_RETURN(_release,,  , l, "memory", __VA_ARGS__)	\
+	ATOMIC64_FETCH_OP (, dmb ish,  , l, "memory", __VA_ARGS__)	\
+	ATOMIC64_FETCH_OP (_relaxed,,  ,  ,         , __VA_ARGS__)	\
+	ATOMIC64_FETCH_OP (_acquire,, a,  , "memory", __VA_ARGS__)	\
+	ATOMIC64_FETCH_OP (_release,,  , l, "memory", __VA_ARGS__)
 
-ATOMIC64_OPS_RLX(add, add)
-ATOMIC64_OPS_RLX(sub, sub)
+ATOMIC64_OPS(add, add)
+ATOMIC64_OPS(sub, sub)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(...)						\
+	ATOMIC64_OP(__VA_ARGS__)					\
+	ATOMIC64_FETCH_OP (, dmb ish,  , l, "memory", __VA_ARGS__)	\
+	ATOMIC64_FETCH_OP (_relaxed,,  ,  ,         , __VA_ARGS__)	\
+	ATOMIC64_FETCH_OP (_acquire,, a,  , "memory", __VA_ARGS__)	\
+	ATOMIC64_FETCH_OP (_release,,  , l, "memory", __VA_ARGS__)
 
-ATOMIC64_OP(and, and)
-ATOMIC64_OP(andnot, bic)
-ATOMIC64_OP(or, orr)
-ATOMIC64_OP(xor, eor)
+ATOMIC64_OPS(and, and)
+ATOMIC64_OPS(andnot, bic)
+ATOMIC64_OPS(or, orr)
+ATOMIC64_OPS(xor, eor)
 
-#undef ATOMIC64_OPS_RLX
 #undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-- 
cgit v1.2.3


From 6822a84dd4e35a1beb70028e46b5f60c14fc422d Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Apr 2016 18:01:32 +0100
Subject: locking/atomic, arch/arm64: Generate LSE non-return cases using
 common macros

atomic[64]_{add,and,andnot,or,xor} all follow the same patterns, so
generate them using macros, like we do for the LL/SC case already.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/1461344493-8262-1-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm64/include/asm/atomic_lse.h | 122 ++++++++++--------------------------
 1 file changed, 32 insertions(+), 90 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 39c1d340fec5..37a0f03560f7 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -26,54 +26,25 @@
 #endif
 
 #define __LL_SC_ATOMIC(op)	__LL_SC_CALL(atomic_##op)
-
-static inline void atomic_andnot(int i, atomic_t *v)
-{
-	register int w0 asm ("w0") = i;
-	register atomic_t *x1 asm ("x1") = v;
-
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot),
-	"	stclr	%w[i], %[v]\n")
-	: [i] "+r" (w0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: __LL_SC_CLOBBERS);
-}
-
-static inline void atomic_or(int i, atomic_t *v)
-{
-	register int w0 asm ("w0") = i;
-	register atomic_t *x1 asm ("x1") = v;
-
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or),
-	"	stset	%w[i], %[v]\n")
-	: [i] "+r" (w0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: __LL_SC_CLOBBERS);
-}
-
-static inline void atomic_xor(int i, atomic_t *v)
-{
-	register int w0 asm ("w0") = i;
-	register atomic_t *x1 asm ("x1") = v;
-
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor),
-	"	steor	%w[i], %[v]\n")
-	: [i] "+r" (w0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: __LL_SC_CLOBBERS);
+#define ATOMIC_OP(op, asm_op)						\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	register int w0 asm ("w0") = i;					\
+	register atomic_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(op),		\
+"	" #asm_op "	%w[i], %[v]\n")					\
+	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: __LL_SC_CLOBBERS);						\
 }
 
-static inline void atomic_add(int i, atomic_t *v)
-{
-	register int w0 asm ("w0") = i;
-	register atomic_t *x1 asm ("x1") = v;
+ATOMIC_OP(andnot, stclr)
+ATOMIC_OP(or, stset)
+ATOMIC_OP(xor, steor)
+ATOMIC_OP(add, stadd)
 
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add),
-	"	stadd	%w[i], %[v]\n")
-	: [i] "+r" (w0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: __LL_SC_CLOBBERS);
-}
+#undef ATOMIC_OP
 
 #define ATOMIC_OP_ADD_RETURN(name, mb, cl...)				\
 static inline int atomic_add_return##name(int i, atomic_t *v)		\
@@ -167,54 +138,25 @@ ATOMIC_OP_SUB_RETURN(        , al, "memory")
 #undef __LL_SC_ATOMIC
 
 #define __LL_SC_ATOMIC64(op)	__LL_SC_CALL(atomic64_##op)
-
-static inline void atomic64_andnot(long i, atomic64_t *v)
-{
-	register long x0 asm ("x0") = i;
-	register atomic64_t *x1 asm ("x1") = v;
-
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot),
-	"	stclr	%[i], %[v]\n")
-	: [i] "+r" (x0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: __LL_SC_CLOBBERS);
-}
-
-static inline void atomic64_or(long i, atomic64_t *v)
-{
-	register long x0 asm ("x0") = i;
-	register atomic64_t *x1 asm ("x1") = v;
-
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or),
-	"	stset	%[i], %[v]\n")
-	: [i] "+r" (x0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: __LL_SC_CLOBBERS);
-}
-
-static inline void atomic64_xor(long i, atomic64_t *v)
-{
-	register long x0 asm ("x0") = i;
-	register atomic64_t *x1 asm ("x1") = v;
-
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(xor),
-	"	steor	%[i], %[v]\n")
-	: [i] "+r" (x0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: __LL_SC_CLOBBERS);
+#define ATOMIC64_OP(op, asm_op)						\
+static inline void atomic64_##op(long i, atomic64_t *v)			\
+{									\
+	register long x0 asm ("x0") = i;				\
+	register atomic64_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op),	\
+"	" #asm_op "	%[i], %[v]\n")					\
+	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: __LL_SC_CLOBBERS);						\
 }
 
-static inline void atomic64_add(long i, atomic64_t *v)
-{
-	register long x0 asm ("x0") = i;
-	register atomic64_t *x1 asm ("x1") = v;
+ATOMIC64_OP(andnot, stclr)
+ATOMIC64_OP(or, stset)
+ATOMIC64_OP(xor, steor)
+ATOMIC64_OP(add, stadd)
 
-	asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(add),
-	"	stadd	%[i], %[v]\n")
-	: [i] "+r" (x0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: __LL_SC_CLOBBERS);
-}
+#undef ATOMIC64_OP
 
 #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...)				\
 static inline long atomic64_add_return##name(long i, atomic64_t *v)	\
-- 
cgit v1.2.3


From 2efe95fe695270ae1a225805f016303505972d86 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Apr 2016 18:01:33 +0100
Subject: locking/atomic, arch/arm64: Implement
 atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}()
 for LSE instructions

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

This patch implements the LSE variants.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/1461344493-8262-2-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm64/include/asm/atomic_lse.h | 172 ++++++++++++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 37a0f03560f7..b5890be8f257 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -46,6 +46,38 @@ ATOMIC_OP(add, stadd)
 
 #undef ATOMIC_OP
 
+#define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...)			\
+static inline int atomic_fetch_##op##name(int i, atomic_t *v)		\
+{									\
+	register int w0 asm ("w0") = i;					\
+	register atomic_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	__LL_SC_ATOMIC(fetch_##op##name),				\
+	/* LSE atomics */						\
+"	" #asm_op #mb "	%w[i], %w[i], %[v]")				\
+	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: __LL_SC_CLOBBERS, ##cl);					\
+									\
+	return w0;							\
+}
+
+#define ATOMIC_FETCH_OPS(op, asm_op)					\
+	ATOMIC_FETCH_OP(_relaxed,   , op, asm_op)			\
+	ATOMIC_FETCH_OP(_acquire,  a, op, asm_op, "memory")		\
+	ATOMIC_FETCH_OP(_release,  l, op, asm_op, "memory")		\
+	ATOMIC_FETCH_OP(        , al, op, asm_op, "memory")
+
+ATOMIC_FETCH_OPS(andnot, ldclr)
+ATOMIC_FETCH_OPS(or, ldset)
+ATOMIC_FETCH_OPS(xor, ldeor)
+ATOMIC_FETCH_OPS(add, ldadd)
+
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_FETCH_OPS
+
 #define ATOMIC_OP_ADD_RETURN(name, mb, cl...)				\
 static inline int atomic_add_return##name(int i, atomic_t *v)		\
 {									\
@@ -90,6 +122,33 @@ static inline void atomic_and(int i, atomic_t *v)
 	: __LL_SC_CLOBBERS);
 }
 
+#define ATOMIC_FETCH_OP_AND(name, mb, cl...)				\
+static inline int atomic_fetch_and##name(int i, atomic_t *v)		\
+{									\
+	register int w0 asm ("w0") = i;					\
+	register atomic_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	nop\n"							\
+	__LL_SC_ATOMIC(fetch_and##name),				\
+	/* LSE atomics */						\
+	"	mvn	%w[i], %w[i]\n"					\
+	"	ldclr" #mb "	%w[i], %w[i], %[v]")			\
+	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: __LL_SC_CLOBBERS, ##cl);					\
+									\
+	return w0;							\
+}
+
+ATOMIC_FETCH_OP_AND(_relaxed,   )
+ATOMIC_FETCH_OP_AND(_acquire,  a, "memory")
+ATOMIC_FETCH_OP_AND(_release,  l, "memory")
+ATOMIC_FETCH_OP_AND(        , al, "memory")
+
+#undef ATOMIC_FETCH_OP_AND
+
 static inline void atomic_sub(int i, atomic_t *v)
 {
 	register int w0 asm ("w0") = i;
@@ -135,6 +194,33 @@ ATOMIC_OP_SUB_RETURN(_release,  l, "memory")
 ATOMIC_OP_SUB_RETURN(        , al, "memory")
 
 #undef ATOMIC_OP_SUB_RETURN
+
+#define ATOMIC_FETCH_OP_SUB(name, mb, cl...)				\
+static inline int atomic_fetch_sub##name(int i, atomic_t *v)		\
+{									\
+	register int w0 asm ("w0") = i;					\
+	register atomic_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	nop\n"							\
+	__LL_SC_ATOMIC(fetch_sub##name),				\
+	/* LSE atomics */						\
+	"	neg	%w[i], %w[i]\n"					\
+	"	ldadd" #mb "	%w[i], %w[i], %[v]")			\
+	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: __LL_SC_CLOBBERS, ##cl);					\
+									\
+	return w0;							\
+}
+
+ATOMIC_FETCH_OP_SUB(_relaxed,   )
+ATOMIC_FETCH_OP_SUB(_acquire,  a, "memory")
+ATOMIC_FETCH_OP_SUB(_release,  l, "memory")
+ATOMIC_FETCH_OP_SUB(        , al, "memory")
+
+#undef ATOMIC_FETCH_OP_SUB
 #undef __LL_SC_ATOMIC
 
 #define __LL_SC_ATOMIC64(op)	__LL_SC_CALL(atomic64_##op)
@@ -158,6 +244,38 @@ ATOMIC64_OP(add, stadd)
 
 #undef ATOMIC64_OP
 
+#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...)			\
+static inline long atomic64_fetch_##op##name(long i, atomic64_t *v)	\
+{									\
+	register long x0 asm ("x0") = i;				\
+	register atomic64_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	__LL_SC_ATOMIC64(fetch_##op##name),				\
+	/* LSE atomics */						\
+"	" #asm_op #mb "	%[i], %[i], %[v]")				\
+	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: __LL_SC_CLOBBERS, ##cl);					\
+									\
+	return x0;							\
+}
+
+#define ATOMIC64_FETCH_OPS(op, asm_op)					\
+	ATOMIC64_FETCH_OP(_relaxed,   , op, asm_op)			\
+	ATOMIC64_FETCH_OP(_acquire,  a, op, asm_op, "memory")		\
+	ATOMIC64_FETCH_OP(_release,  l, op, asm_op, "memory")		\
+	ATOMIC64_FETCH_OP(        , al, op, asm_op, "memory")
+
+ATOMIC64_FETCH_OPS(andnot, ldclr)
+ATOMIC64_FETCH_OPS(or, ldset)
+ATOMIC64_FETCH_OPS(xor, ldeor)
+ATOMIC64_FETCH_OPS(add, ldadd)
+
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_FETCH_OPS
+
 #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...)				\
 static inline long atomic64_add_return##name(long i, atomic64_t *v)	\
 {									\
@@ -202,6 +320,33 @@ static inline void atomic64_and(long i, atomic64_t *v)
 	: __LL_SC_CLOBBERS);
 }
 
+#define ATOMIC64_FETCH_OP_AND(name, mb, cl...)				\
+static inline long atomic64_fetch_and##name(long i, atomic64_t *v)	\
+{									\
+	register long x0 asm ("w0") = i;				\
+	register atomic64_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	nop\n"							\
+	__LL_SC_ATOMIC64(fetch_and##name),				\
+	/* LSE atomics */						\
+	"	mvn	%[i], %[i]\n"					\
+	"	ldclr" #mb "	%[i], %[i], %[v]")			\
+	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: __LL_SC_CLOBBERS, ##cl);					\
+									\
+	return x0;							\
+}
+
+ATOMIC64_FETCH_OP_AND(_relaxed,   )
+ATOMIC64_FETCH_OP_AND(_acquire,  a, "memory")
+ATOMIC64_FETCH_OP_AND(_release,  l, "memory")
+ATOMIC64_FETCH_OP_AND(        , al, "memory")
+
+#undef ATOMIC64_FETCH_OP_AND
+
 static inline void atomic64_sub(long i, atomic64_t *v)
 {
 	register long x0 asm ("x0") = i;
@@ -248,6 +393,33 @@ ATOMIC64_OP_SUB_RETURN(        , al, "memory")
 
 #undef ATOMIC64_OP_SUB_RETURN
 
+#define ATOMIC64_FETCH_OP_SUB(name, mb, cl...)				\
+static inline long atomic64_fetch_sub##name(long i, atomic64_t *v)	\
+{									\
+	register long x0 asm ("w0") = i;				\
+	register atomic64_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	nop\n"							\
+	__LL_SC_ATOMIC64(fetch_sub##name),				\
+	/* LSE atomics */						\
+	"	neg	%[i], %[i]\n"					\
+	"	ldadd" #mb "	%[i], %[i], %[v]")			\
+	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: __LL_SC_CLOBBERS, ##cl);					\
+									\
+	return x0;							\
+}
+
+ATOMIC64_FETCH_OP_SUB(_relaxed,   )
+ATOMIC64_FETCH_OP_SUB(_acquire,  a, "memory")
+ATOMIC64_FETCH_OP_SUB(_release,  l, "memory")
+ATOMIC64_FETCH_OP_SUB(        , al, "memory")
+
+#undef ATOMIC64_FETCH_OP_SUB
+
 static inline long atomic64_dec_if_positive(atomic64_t *v)
 {
 	register long x0 asm ("x0") = (long)v;
-- 
cgit v1.2.3


From 1a6eafacd4811cdc1b138faee858527658eee4e1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:08 +0200
Subject: locking/atomic, arch/avr32: Implement
 atomic_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Hans-Christian Noren Egtvedt <egtvedt@samfundet.no>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Haavard Skinnemoen <hskinnemoen@gmail.com>
Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/avr32/include/asm/atomic.h | 56 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 51 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h
index d74fd8ce980a..b8681fd495ef 100644
--- a/arch/avr32/include/asm/atomic.h
+++ b/arch/avr32/include/asm/atomic.h
@@ -41,21 +41,51 @@ static inline int __atomic_##op##_return(int i, atomic_t *v)		\
 	return result;							\
 }
 
+#define ATOMIC_FETCH_OP(op, asm_op, asm_con)				\
+static inline int __atomic_fetch_##op(int i, atomic_t *v)		\
+{									\
+	int result, val;						\
+									\
+	asm volatile(							\
+		"/* atomic_fetch_" #op " */\n"				\
+		"1:	ssrf	5\n"					\
+		"	ld.w	%0, %3\n"				\
+		"	mov	%1, %0\n"				\
+		"	" #asm_op "	%1, %4\n"			\
+		"	stcond	%2, %1\n"				\
+		"	brne	1b"					\
+		: "=&r" (result), "=&r" (val), "=o" (v->counter)	\
+		: "m" (v->counter), #asm_con (i)			\
+		: "cc");						\
+									\
+	return result;							\
+}
+
 ATOMIC_OP_RETURN(sub, sub, rKs21)
 ATOMIC_OP_RETURN(add, add, r)
+ATOMIC_FETCH_OP (sub, sub, rKs21)
+ATOMIC_FETCH_OP (add, add, r)
+
+#define atomic_fetch_or atomic_fetch_or
 
-#define ATOMIC_OP(op, asm_op)						\
+#define ATOMIC_OPS(op, asm_op)						\
 ATOMIC_OP_RETURN(op, asm_op, r)						\
 static inline void atomic_##op(int i, atomic_t *v)			\
 {									\
 	(void)__atomic_##op##_return(i, v);				\
+}									\
+ATOMIC_FETCH_OP(op, asm_op, r)						\
+static inline int atomic_fetch_##op(int i, atomic_t *v)		\
+{									\
+	return __atomic_fetch_##op(i, v);				\
 }
 
-ATOMIC_OP(and, and)
-ATOMIC_OP(or, or)
-ATOMIC_OP(xor, eor)
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(or, or)
+ATOMIC_OPS(xor, eor)
 
-#undef ATOMIC_OP
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 
 /*
@@ -87,6 +117,14 @@ static inline int atomic_add_return(int i, atomic_t *v)
 	return __atomic_add_return(i, v);
 }
 
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+	if (IS_21BIT_CONST(i))
+		return __atomic_fetch_sub(-i, v);
+
+	return __atomic_fetch_add(i, v);
+}
+
 /*
  * atomic_sub_return - subtract the atomic variable
  * @i: integer value to subtract
@@ -102,6 +140,14 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 	return __atomic_add_return(-i, v);
 }
 
+static inline int atomic_fetch_sub(int i, atomic_t *v)
+{
+	if (IS_21BIT_CONST(i))
+		return __atomic_fetch_sub(i, v);
+
+	return __atomic_fetch_add(-i, v);
+}
+
 /*
  * __atomic_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
-- 
cgit v1.2.3


From e87fc0ec070554e34812be68267a9450271868d6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:08 +0200
Subject: locking/atomic, arch/blackfin: Implement
 atomic_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Miao <realmz6@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: adi-buildroot-devel@lists.sourceforge.net
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/blackfin/include/asm/atomic.h |  8 +++++++
 arch/blackfin/kernel/bfin_ksyms.c  |  1 +
 arch/blackfin/mach-bf561/atomic.S  | 43 +++++++++++++++++++++++++++-----------
 3 files changed, 40 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h
index 1c1c42330c99..63c7deceeeb6 100644
--- a/arch/blackfin/include/asm/atomic.h
+++ b/arch/blackfin/include/asm/atomic.h
@@ -17,6 +17,7 @@
 
 asmlinkage int __raw_uncached_fetch_asm(const volatile int *ptr);
 asmlinkage int __raw_atomic_add_asm(volatile int *ptr, int value);
+asmlinkage int __raw_atomic_xadd_asm(volatile int *ptr, int value);
 
 asmlinkage int __raw_atomic_and_asm(volatile int *ptr, int value);
 asmlinkage int __raw_atomic_or_asm(volatile int *ptr, int value);
@@ -28,10 +29,17 @@ asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value);
 #define atomic_add_return(i, v) __raw_atomic_add_asm(&(v)->counter, i)
 #define atomic_sub_return(i, v) __raw_atomic_add_asm(&(v)->counter, -(i))
 
+#define atomic_fetch_add(i, v) __raw_atomic_xadd_asm(&(v)->counter, i)
+#define atomic_fetch_sub(i, v) __raw_atomic_xadd_asm(&(v)->counter, -(i))
+
 #define atomic_or(i, v)  (void)__raw_atomic_or_asm(&(v)->counter, i)
 #define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i)
 #define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i)
 
+#define atomic_fetch_or(i, v)  __raw_atomic_or_asm(&(v)->counter, i)
+#define atomic_fetch_and(i, v) __raw_atomic_and_asm(&(v)->counter, i)
+#define atomic_fetch_xor(i, v) __raw_atomic_xor_asm(&(v)->counter, i)
+
 #endif
 
 #include <asm-generic/atomic.h>
diff --git a/arch/blackfin/kernel/bfin_ksyms.c b/arch/blackfin/kernel/bfin_ksyms.c
index a401c27b69b4..68096e8f787f 100644
--- a/arch/blackfin/kernel/bfin_ksyms.c
+++ b/arch/blackfin/kernel/bfin_ksyms.c
@@ -84,6 +84,7 @@ EXPORT_SYMBOL(insl_16);
 
 #ifdef CONFIG_SMP
 EXPORT_SYMBOL(__raw_atomic_add_asm);
+EXPORT_SYMBOL(__raw_atomic_xadd_asm);
 EXPORT_SYMBOL(__raw_atomic_and_asm);
 EXPORT_SYMBOL(__raw_atomic_or_asm);
 EXPORT_SYMBOL(__raw_atomic_xor_asm);
diff --git a/arch/blackfin/mach-bf561/atomic.S b/arch/blackfin/mach-bf561/atomic.S
index 26fccb5568b9..1e2989c5d6b2 100644
--- a/arch/blackfin/mach-bf561/atomic.S
+++ b/arch/blackfin/mach-bf561/atomic.S
@@ -605,6 +605,28 @@ ENTRY(___raw_atomic_add_asm)
 	rts;
 ENDPROC(___raw_atomic_add_asm)
 
+/*
+ * r0 = ptr
+ * r1 = value
+ *
+ * ADD a signed value to a 32bit word and return the old value atomically.
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_atomic_xadd_asm)
+	p1 = r0;
+	r3 = r1;
+	[--sp] = rets;
+	call _get_core_lock;
+	r3 = [p1];
+	r2 = r3 + r2;
+	[p1] = r2;
+	r1 = p1;
+	call _put_core_lock;
+	r0 = r3;
+	rets = [sp++];
+	rts;
+ENDPROC(___raw_atomic_add_asm)
+
 /*
  * r0 = ptr
  * r1 = mask
@@ -618,10 +640,9 @@ ENTRY(___raw_atomic_and_asm)
 	r3 = r1;
 	[--sp] = rets;
 	call _get_core_lock;
-	r2 = [p1];
-	r3 = r2 & r3;
-	[p1] = r3;
-	r3 = r2;
+	r3 = [p1];
+	r2 = r2 & r3;
+	[p1] = r2;
 	r1 = p1;
 	call _put_core_lock;
 	r0 = r3;
@@ -642,10 +663,9 @@ ENTRY(___raw_atomic_or_asm)
 	r3 = r1;
 	[--sp] = rets;
 	call _get_core_lock;
-	r2 = [p1];
-	r3 = r2 | r3;
-	[p1] = r3;
-	r3 = r2;
+	r3 = [p1];
+	r2 = r2 | r3;
+	[p1] = r2;
 	r1 = p1;
 	call _put_core_lock;
 	r0 = r3;
@@ -666,10 +686,9 @@ ENTRY(___raw_atomic_xor_asm)
 	r3 = r1;
 	[--sp] = rets;
 	call _get_core_lock;
-	r2 = [p1];
-	r3 = r2 ^ r3;
-	[p1] = r3;
-	r3 = r2;
+	r3 = [p1];
+	r2 = r2 ^ r3;
+	[p1] = r2;
 	r1 = p1;
 	call _put_core_lock;
 	r0 = r3;
-- 
cgit v1.2.3


From d9c730281617e55ca470e66f8e9d7d3f5f420fec Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:08 +0200
Subject: locking/atomic, arch/frv: Implement
 atomic{,64}_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/frv/include/asm/atomic.h      | 32 ++++++++++++--------------------
 arch/frv/include/asm/atomic_defs.h |  2 ++
 2 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
index 64f02d451aa8..e3e06da0cd59 100644
--- a/arch/frv/include/asm/atomic.h
+++ b/arch/frv/include/asm/atomic.h
@@ -60,16 +60,6 @@ static inline int atomic_add_negative(int i, atomic_t *v)
 	return atomic_add_return(i, v) < 0;
 }
 
-static inline void atomic_add(int i, atomic_t *v)
-{
-	atomic_add_return(i, v);
-}
-
-static inline void atomic_sub(int i, atomic_t *v)
-{
-	atomic_sub_return(i, v);
-}
-
 static inline void atomic_inc(atomic_t *v)
 {
 	atomic_inc_return(v);
@@ -84,6 +74,8 @@ static inline void atomic_dec(atomic_t *v)
 #define atomic_dec_and_test(v)		(atomic_sub_return(1, (v)) == 0)
 #define atomic_inc_and_test(v)		(atomic_add_return(1, (v)) == 0)
 
+#define atomic_fetch_or atomic_fetch_or
+
 /*
  * 64-bit atomic ops
  */
@@ -136,16 +128,6 @@ static inline long long atomic64_add_negative(long long i, atomic64_t *v)
 	return atomic64_add_return(i, v) < 0;
 }
 
-static inline void atomic64_add(long long i, atomic64_t *v)
-{
-	atomic64_add_return(i, v);
-}
-
-static inline void atomic64_sub(long long i, atomic64_t *v)
-{
-	atomic64_sub_return(i, v);
-}
-
 static inline void atomic64_inc(atomic64_t *v)
 {
 	atomic64_inc_return(v);
@@ -182,11 +164,19 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 }
 
 #define ATOMIC_OP(op)							\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	return __atomic32_fetch_##op(i, &v->counter);			\
+}									\
 static inline void atomic_##op(int i, atomic_t *v)			\
 {									\
 	(void)__atomic32_fetch_##op(i, &v->counter);			\
 }									\
 									\
+static inline long long atomic64_fetch_##op(long long i, atomic64_t *v)	\
+{									\
+	return __atomic64_fetch_##op(i, &v->counter);			\
+}									\
 static inline void atomic64_##op(long long i, atomic64_t *v)		\
 {									\
 	(void)__atomic64_fetch_##op(i, &v->counter);			\
@@ -195,6 +185,8 @@ static inline void atomic64_##op(long long i, atomic64_t *v)		\
 ATOMIC_OP(or)
 ATOMIC_OP(and)
 ATOMIC_OP(xor)
+ATOMIC_OP(add)
+ATOMIC_OP(sub)
 
 #undef ATOMIC_OP
 
diff --git a/arch/frv/include/asm/atomic_defs.h b/arch/frv/include/asm/atomic_defs.h
index 36e126d2f801..d4912c88b829 100644
--- a/arch/frv/include/asm/atomic_defs.h
+++ b/arch/frv/include/asm/atomic_defs.h
@@ -162,6 +162,8 @@ ATOMIC_EXPORT(__atomic64_fetch_##op);
 ATOMIC_FETCH_OP(or)
 ATOMIC_FETCH_OP(and)
 ATOMIC_FETCH_OP(xor)
+ATOMIC_FETCH_OP(add)
+ATOMIC_FETCH_OP(sub)
 
 ATOMIC_OP_RETURN(add)
 ATOMIC_OP_RETURN(sub)
-- 
cgit v1.2.3


From 0c074cbc33091dd69fe70ec27474d228c3184860 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:08 +0200
Subject: locking/atomic, arch/h8300: Implement
 atomic_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: uclinux-h8-devel@lists.sourceforge.jp
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/h8300/include/asm/atomic.h | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 4435a445ae7e..0961b618bdde 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -28,6 +28,19 @@ static inline int atomic_##op##_return(int i, atomic_t *v)	\
 	return ret;						\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t *v)		\
+{								\
+	h8300flags flags;					\
+	int ret;						\
+								\
+	flags = arch_local_irq_save();				\
+	ret = v->counter;					\
+	v->counter c_op i;					\
+	arch_local_irq_restore(flags);				\
+	return ret;						\
+}
+
 #define ATOMIC_OP(op, c_op)					\
 static inline void atomic_##op(int i, atomic_t *v)		\
 {								\
@@ -41,17 +54,23 @@ static inline void atomic_##op(int i, atomic_t *v)		\
 ATOMIC_OP_RETURN(add, +=)
 ATOMIC_OP_RETURN(sub, -=)
 
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or,  |=)
-ATOMIC_OP(xor, ^=)
+#define atomic_fetch_or atomic_fetch_or
 
+#define ATOMIC_OPS(op, c_op)					\
+	ATOMIC_OP(op, c_op)					\
+	ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or,  |=)
+ATOMIC_OPS(xor, ^=)
+ATOMIC_OPS(add, +=)
+ATOMIC_OPS(sub, -=)
+
+#undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic_add(i, v)		(void)atomic_add_return(i, v)
 #define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
-
-#define atomic_sub(i, v)		(void)atomic_sub_return(i, v)
 #define atomic_sub_and_test(i, v)	(atomic_sub_return(i, v) == 0)
 
 #define atomic_inc_return(v)		atomic_add_return(1, v)
-- 
cgit v1.2.3


From 4be7dd393515615430a4d07ca1ffceaf2a331620 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:07 +0200
Subject: locking/atomic, arch/hexagon: Implement
 atomic_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-hexagon@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/hexagon/include/asm/atomic.h | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 55696c4100d4..07dbb3332b4a 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -110,7 +110,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 	);								\
 }									\
 
-#define ATOMIC_OP_RETURN(op)							\
+#define ATOMIC_OP_RETURN(op)						\
 static inline int atomic_##op##_return(int i, atomic_t *v)		\
 {									\
 	int output;							\
@@ -127,16 +127,39 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return output;							\
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	int output, val;						\
+									\
+	__asm__ __volatile__ (						\
+		"1:	%0 = memw_locked(%2);\n"			\
+		"	%1 = "#op "(%0,%3);\n"				\
+		"	memw_locked(%2,P3)=%1;\n"			\
+		"	if !P3 jump 1b;\n"				\
+		: "=&r" (output), "=&r" (val)				\
+		: "r" (&v->counter), "r" (i)				\
+		: "memory", "p3"					\
+	);								\
+	return output;							\
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+#define atomic_fetch_or atomic_fetch_or
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-- 
cgit v1.2.3


From cc102507fac75f9f4f37938f49d10c25e596a608 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:07 +0200
Subject: locking/atomic, arch/ia64: Implement
 atomic{,64}_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-ia64@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/ia64/include/asm/atomic.h | 130 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 114 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 8dfb5f6f6c35..f565ad376142 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -42,8 +42,27 @@ ia64_atomic_##op (int i, atomic_t *v)					\
 	return new;							\
 }
 
-ATOMIC_OP(add, +)
-ATOMIC_OP(sub, -)
+#define ATOMIC_FETCH_OP(op, c_op)					\
+static __inline__ int							\
+ia64_atomic_fetch_##op (int i, atomic_t *v)				\
+{									\
+	__s32 old, new;							\
+	CMPXCHG_BUGCHECK_DECL						\
+									\
+	do {								\
+		CMPXCHG_BUGCHECK(v);					\
+		old = atomic_read(v);					\
+		new = old c_op i;					\
+	} while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); \
+	return old;							\
+}
+
+#define ATOMIC_OPS(op, c_op)						\
+	ATOMIC_OP(op, c_op)						\
+	ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(add, +)
+ATOMIC_OPS(sub, -)
 
 #define atomic_add_return(i,v)						\
 ({									\
@@ -69,14 +88,44 @@ ATOMIC_OP(sub, -)
 		: ia64_atomic_sub(__ia64_asr_i, v);			\
 })
 
-ATOMIC_OP(and, &)
-ATOMIC_OP(or, |)
-ATOMIC_OP(xor, ^)
+#define atomic_fetch_add(i,v)						\
+({									\
+	int __ia64_aar_i = (i);						\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)		\
+	     || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)		\
+	     || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)		\
+	     || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))		\
+		? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq)	\
+		: ia64_atomic_fetch_add(__ia64_aar_i, v);		\
+})
+
+#define atomic_fetch_sub(i,v)						\
+({									\
+	int __ia64_asr_i = (i);						\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)		\
+	     || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)		\
+	     || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)		\
+	     || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))	\
+		? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq)	\
+		: ia64_atomic_fetch_sub(__ia64_asr_i, v);		\
+})
+
+ATOMIC_FETCH_OP(and, &)
+ATOMIC_FETCH_OP(or, |)
+ATOMIC_FETCH_OP(xor, ^)
+
+#define atomic_and(i,v)	(void)ia64_atomic_fetch_and(i,v)
+#define atomic_or(i,v)	(void)ia64_atomic_fetch_or(i,v)
+#define atomic_xor(i,v)	(void)ia64_atomic_fetch_xor(i,v)
 
-#define atomic_and(i,v)	(void)ia64_atomic_and(i,v)
-#define atomic_or(i,v)	(void)ia64_atomic_or(i,v)
-#define atomic_xor(i,v)	(void)ia64_atomic_xor(i,v)
+#define atomic_fetch_and(i,v)	ia64_atomic_fetch_and(i,v)
+#define atomic_fetch_or(i,v)	ia64_atomic_fetch_or(i,v)
+#define atomic_fetch_xor(i,v)	ia64_atomic_fetch_xor(i,v)
 
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP
 
 #define ATOMIC64_OP(op, c_op)						\
@@ -94,8 +143,27 @@ ia64_atomic64_##op (__s64 i, atomic64_t *v)				\
 	return new;							\
 }
 
-ATOMIC64_OP(add, +)
-ATOMIC64_OP(sub, -)
+#define ATOMIC64_FETCH_OP(op, c_op)					\
+static __inline__ long							\
+ia64_atomic64_fetch_##op (__s64 i, atomic64_t *v)			\
+{									\
+	__s64 old, new;							\
+	CMPXCHG_BUGCHECK_DECL						\
+									\
+	do {								\
+		CMPXCHG_BUGCHECK(v);					\
+		old = atomic64_read(v);					\
+		new = old c_op i;					\
+	} while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); \
+	return old;							\
+}
+
+#define ATOMIC64_OPS(op, c_op)						\
+	ATOMIC64_OP(op, c_op)						\
+	ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(add, +)
+ATOMIC64_OPS(sub, -)
 
 #define atomic64_add_return(i,v)					\
 ({									\
@@ -121,14 +189,44 @@ ATOMIC64_OP(sub, -)
 		: ia64_atomic64_sub(__ia64_asr_i, v);			\
 })
 
-ATOMIC64_OP(and, &)
-ATOMIC64_OP(or, |)
-ATOMIC64_OP(xor, ^)
+#define atomic64_fetch_add(i,v)						\
+({									\
+	long __ia64_aar_i = (i);					\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)		\
+	     || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)		\
+	     || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)		\
+	     || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))		\
+		? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq)	\
+		: ia64_atomic64_fetch_add(__ia64_aar_i, v);		\
+})
+
+#define atomic64_fetch_sub(i,v)						\
+({									\
+	long __ia64_asr_i = (i);					\
+	(__builtin_constant_p(i)					\
+	 && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)		\
+	     || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)		\
+	     || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)		\
+	     || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))	\
+		? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq)	\
+		: ia64_atomic64_fetch_sub(__ia64_asr_i, v);		\
+})
+
+ATOMIC64_FETCH_OP(and, &)
+ATOMIC64_FETCH_OP(or, |)
+ATOMIC64_FETCH_OP(xor, ^)
+
+#define atomic64_and(i,v)	(void)ia64_atomic64_fetch_and(i,v)
+#define atomic64_or(i,v)	(void)ia64_atomic64_fetch_or(i,v)
+#define atomic64_xor(i,v)	(void)ia64_atomic64_fetch_xor(i,v)
 
-#define atomic64_and(i,v)	(void)ia64_atomic64_and(i,v)
-#define atomic64_or(i,v)	(void)ia64_atomic64_or(i,v)
-#define atomic64_xor(i,v)	(void)ia64_atomic64_xor(i,v)
+#define atomic64_fetch_and(i,v)	ia64_atomic64_fetch_and(i,v)
+#define atomic64_fetch_or(i,v)	ia64_atomic64_fetch_or(i,v)
+#define atomic64_fetch_xor(i,v)	ia64_atomic64_fetch_xor(i,v)
 
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP
 
 #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
-- 
cgit v1.2.3


From f649370523033c7c2adf16a9d062438c8a7758b3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:07 +0200
Subject: locking/atomic, arch/m32r: Implement
 atomic_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/m32r/include/asm/atomic.h | 38 ++++++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h
index ea35160d632b..8ba8a0ab5d5d 100644
--- a/arch/m32r/include/asm/atomic.h
+++ b/arch/m32r/include/asm/atomic.h
@@ -89,16 +89,46 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v)		\
 	return result;							\
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+static __inline__ int atomic_fetch_##op(int i, atomic_t *v)		\
+{									\
+	unsigned long flags;						\
+	int result, val;						\
+									\
+	local_irq_save(flags);						\
+	__asm__ __volatile__ (						\
+		"# atomic_fetch_" #op "		\n\t"			\
+		DCACHE_CLEAR("%0", "r4", "%2")				\
+		M32R_LOCK" %1, @%2;		\n\t"			\
+		"mv %0, %1			\n\t" 			\
+		#op " %1, %3;			\n\t"			\
+		M32R_UNLOCK" %1, @%2;		\n\t"			\
+		: "=&r" (result), "=&r" (val)				\
+		: "r" (&v->counter), "r" (i)				\
+		: "memory"						\
+		__ATOMIC_CLOBBER					\
+	);								\
+	local_irq_restore(flags);					\
+									\
+	return result;							\
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+#define atomic_fetch_or atomic_fetch_or
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-- 
cgit v1.2.3


From e39d88ea3ce4a471cd0202f4f2c8f5ee0f8d7f53 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:06 +0200
Subject: locking/atomic, arch/m68k: Implement
 atomic_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-m68k@lists.linux-m68k.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/m68k/include/asm/atomic.h | 53 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 49 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 4858178260f9..5cf9b3b1b6ac 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -38,6 +38,13 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 
 #ifdef CONFIG_RMW_INSNS
 
+/*
+ * Am I reading these CAS loops right in that %2 is the old value and the first
+ * iteration uses an uninitialized value?
+ *
+ * Would it not make sense to add: tmp = atomic_read(v); to avoid this?
+ */
+
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
 static inline int atomic_##op##_return(int i, atomic_t *v)		\
 {									\
@@ -53,6 +60,21 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return t;							\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	int t, tmp;							\
+									\
+	__asm__ __volatile__(						\
+			"1:	movel %2,%1\n"				\
+			"	" #asm_op "l %3,%1\n"			\
+			"	casl %2,%1,%0\n"			\
+			"	jne 1b"					\
+			: "+m" (*v), "=&d" (t), "=&d" (tmp)		\
+			: "g" (i), "2" (atomic_read(v)));		\
+	return tmp;							\
+}
+
 #else
 
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
@@ -68,20 +90,43 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 	return t;							\
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
+static inline int atomic_fetch_##op(int i, atomic_t * v)		\
+{									\
+	unsigned long flags;						\
+	int t;								\
+									\
+	local_irq_save(flags);						\
+	t = v->counter;							\
+	v->counter c_op i;						\
+	local_irq_restore(flags);					\
+									\
+	return t;							\
+}
+
 #endif /* CONFIG_RMW_INSNS */
 
 #define ATOMIC_OPS(op, c_op, asm_op)					\
 	ATOMIC_OP(op, c_op, asm_op)					\
-	ATOMIC_OP_RETURN(op, c_op, asm_op)
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(or, |=, or)
-ATOMIC_OP(xor, ^=, eor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					\
+	ATOMIC_OP(op, c_op, asm_op)					\
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+#define atomic_fetch_or atomic_fetch_or
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, eor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-- 
cgit v1.2.3


From e898eb27ffd8b0ad6f4fd0b631559bc877c85444 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:06 +0200
Subject: locking/atomic, arch/metag: Implement
 atomic_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: James Hogan <james.hogan@imgtec.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-metag@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/metag/include/asm/atomic.h        |  2 ++
 arch/metag/include/asm/atomic_lnkget.h | 36 ++++++++++++++++++++++++++++++----
 arch/metag/include/asm/atomic_lock1.h  | 33 +++++++++++++++++++++++++++----
 3 files changed, 63 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/metag/include/asm/atomic.h b/arch/metag/include/asm/atomic.h
index 470e365f04ea..6ca210de8a7d 100644
--- a/arch/metag/include/asm/atomic.h
+++ b/arch/metag/include/asm/atomic.h
@@ -17,6 +17,8 @@
 #include <asm/atomic_lnkget.h>
 #endif
 
+#define atomic_fetch_or atomic_fetch_or
+
 #define atomic_add_negative(a, v)       (atomic_add_return((a), (v)) < 0)
 
 #define atomic_dec_return(v) atomic_sub_return(1, (v))
diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h
index 88fa25fae8bd..def2c642f053 100644
--- a/arch/metag/include/asm/atomic_lnkget.h
+++ b/arch/metag/include/asm/atomic_lnkget.h
@@ -69,16 +69,44 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return result;							\
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	int result, temp;						\
+									\
+	smp_mb();							\
+									\
+	asm volatile (							\
+		"1:	LNKGETD %1, [%2]\n"				\
+		"	" #op "	%0, %1, %3\n"				\
+		"	LNKSETD [%2], %0\n"				\
+		"	DEFR	%0, TXSTAT\n"				\
+		"	ANDT	%0, %0, #HI(0x3f000000)\n"		\
+		"	CMPT	%0, #HI(0x02000000)\n"			\
+		"	BNZ 1b\n"					\
+		: "=&d" (temp), "=&d" (result)				\
+		: "da" (&v->counter), "bd" (i)				\
+		: "cc");						\
+									\
+	smp_mb();							\
+									\
+	return result;							\
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h
index 0295d9b8d5bf..6c1380a8a0d4 100644
--- a/arch/metag/include/asm/atomic_lock1.h
+++ b/arch/metag/include/asm/atomic_lock1.h
@@ -64,15 +64,40 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return result;							\
 }
 
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
+#define ATOMIC_FETCH_OP(op, c_op)					\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long result;						\
+	unsigned long flags;						\
+									\
+	__global_lock1(flags);						\
+	result = v->counter;						\
+	fence();							\
+	v->counter c_op i;						\
+	__global_unlock1(flags);					\
+									\
+	return result;							\
+}
+
+#define ATOMIC_OPS(op, c_op)						\
+	ATOMIC_OP(op, c_op)						\
+	ATOMIC_OP_RETURN(op, c_op)					\
+	ATOMIC_FETCH_OP(op, c_op)
 
 ATOMIC_OPS(add, +=)
 ATOMIC_OPS(sub, -=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
 
 #undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op)						\
+	ATOMIC_OP(op, c_op)						\
+	ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or, |=)
+ATOMIC_OPS(xor, ^=)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-- 
cgit v1.2.3


From 4edac529eb629ccd598e2236c61762537f16e883 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:06 +0200
Subject: locking/atomic, arch/mips: Implement
 atomic{,64}_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/mips/include/asm/atomic.h | 138 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 129 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 835b402e4574..431079f8e483 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -66,7 +66,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v)			      \
 			"	" #asm_op " %0, %2			\n"   \
 			"	sc	%0, %1				\n"   \
 			"	.set	mips0				\n"   \
-			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)      \
+			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)  \
 			: "Ir" (i));					      \
 		} while (unlikely(!temp));				      \
 	} else {							      \
@@ -130,18 +130,78 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v)		      \
 	return result;							      \
 }
 
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)				      \
+static __inline__ int atomic_fetch_##op(int i, atomic_t * v)		      \
+{									      \
+	int result;							      \
+									      \
+	smp_mb__before_llsc();						      \
+									      \
+	if (kernel_uses_llsc && R10000_LLSC_WAR) {			      \
+		int temp;						      \
+									      \
+		__asm__ __volatile__(					      \
+		"	.set	arch=r4000				\n"   \
+		"1:	ll	%1, %2		# atomic_fetch_" #op "	\n"   \
+		"	" #asm_op " %0, %1, %3				\n"   \
+		"	sc	%0, %2					\n"   \
+		"	beqzl	%0, 1b					\n"   \
+		"	move	%0, %1					\n"   \
+		"	.set	mips0					\n"   \
+		: "=&r" (result), "=&r" (temp),				      \
+		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
+		: "Ir" (i));						      \
+	} else if (kernel_uses_llsc) {					      \
+		int temp;						      \
+									      \
+		do {							      \
+			__asm__ __volatile__(				      \
+			"	.set	"MIPS_ISA_LEVEL"		\n"   \
+			"	ll	%1, %2	# atomic_fetch_" #op "	\n"   \
+			"	" #asm_op " %0, %1, %3			\n"   \
+			"	sc	%0, %2				\n"   \
+			"	.set	mips0				\n"   \
+			: "=&r" (result), "=&r" (temp),			      \
+			  "+" GCC_OFF_SMALL_ASM() (v->counter)		      \
+			: "Ir" (i));					      \
+		} while (unlikely(!result));				      \
+									      \
+		result = temp;						      \
+	} else {							      \
+		unsigned long flags;					      \
+									      \
+		raw_local_irq_save(flags);				      \
+		result = v->counter;					      \
+		v->counter c_op i;					      \
+		raw_local_irq_restore(flags);				      \
+	}								      \
+									      \
+	smp_llsc_mb();							      \
+									      \
+	return result;							      \
+}
+
 #define ATOMIC_OPS(op, c_op, asm_op)					      \
 	ATOMIC_OP(op, c_op, asm_op)					      \
-	ATOMIC_OP_RETURN(op, c_op, asm_op)
+	ATOMIC_OP_RETURN(op, c_op, asm_op)				      \
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(add, +=, addu)
 ATOMIC_OPS(sub, -=, subu)
 
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(or, |=, or)
-ATOMIC_OP(xor, ^=, xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)					      \
+	ATOMIC_OP(op, c_op, asm_op)					      \
+	ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+#define atomic_fetch_or atomic_fetch_or
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
@@ -414,17 +474,77 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	      \
 	return result;							      \
 }
 
+#define ATOMIC64_FETCH_OP(op, c_op, asm_op)				      \
+static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v)	      \
+{									      \
+	long result;							      \
+									      \
+	smp_mb__before_llsc();						      \
+									      \
+	if (kernel_uses_llsc && R10000_LLSC_WAR) {			      \
+		long temp;						      \
+									      \
+		__asm__ __volatile__(					      \
+		"	.set	arch=r4000				\n"   \
+		"1:	lld	%1, %2		# atomic64_fetch_" #op "\n"   \
+		"	" #asm_op " %0, %1, %3				\n"   \
+		"	scd	%0, %2					\n"   \
+		"	beqzl	%0, 1b					\n"   \
+		"	move	%0, %1					\n"   \
+		"	.set	mips0					\n"   \
+		: "=&r" (result), "=&r" (temp),				      \
+		  "+" GCC_OFF_SMALL_ASM() (v->counter)			      \
+		: "Ir" (i));						      \
+	} else if (kernel_uses_llsc) {					      \
+		long temp;						      \
+									      \
+		do {							      \
+			__asm__ __volatile__(				      \
+			"	.set	"MIPS_ISA_LEVEL"		\n"   \
+			"	lld	%1, %2	# atomic64_fetch_" #op "\n"   \
+			"	" #asm_op " %0, %1, %3			\n"   \
+			"	scd	%0, %2				\n"   \
+			"	.set	mips0				\n"   \
+			: "=&r" (result), "=&r" (temp),			      \
+			  "=" GCC_OFF_SMALL_ASM() (v->counter)		      \
+			: "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter)	      \
+			: "memory");					      \
+		} while (unlikely(!result));				      \
+									      \
+		result = temp;						      \
+	} else {							      \
+		unsigned long flags;					      \
+									      \
+		raw_local_irq_save(flags);				      \
+		result = v->counter;					      \
+		v->counter c_op i;					      \
+		raw_local_irq_restore(flags);				      \
+	}								      \
+									      \
+	smp_llsc_mb();							      \
+									      \
+	return result;							      \
+}
+
 #define ATOMIC64_OPS(op, c_op, asm_op)					      \
 	ATOMIC64_OP(op, c_op, asm_op)					      \
-	ATOMIC64_OP_RETURN(op, c_op, asm_op)
+	ATOMIC64_OP_RETURN(op, c_op, asm_op)				      \
+	ATOMIC64_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC64_OPS(add, +=, daddu)
 ATOMIC64_OPS(sub, -=, dsubu)
-ATOMIC64_OP(and, &=, and)
-ATOMIC64_OP(or, |=, or)
-ATOMIC64_OP(xor, ^=, xor)
 
 #undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op, asm_op)					      \
+	ATOMIC64_OP(op, c_op, asm_op)					      \
+	ATOMIC64_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC64_OPS(and, &=, and)
+ATOMIC64_OPS(or, |=, or)
+ATOMIC64_OPS(xor, ^=, xor)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-- 
cgit v1.2.3


From f8d638e28d7cc858066d2de484d9719dc181593a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:05 +0200
Subject: locking/atomic, arch/mn10300: Implement
 atomic_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-am33-list@redhat.com
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/mn10300/include/asm/atomic.h | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index ce318d5ab23b..3580f789f3a6 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -84,16 +84,43 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return retval;							\
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	int retval, status;						\
+									\
+	asm volatile(							\
+		"1:	mov	%4,(_AAR,%3)	\n"			\
+		"	mov	(_ADR,%3),%1	\n"			\
+		"	mov	%1,%0		\n"			\
+		"	" #op "	%5,%0		\n"			\
+		"	mov	%0,(_ADR,%3)	\n"			\
+		"	mov	(_ADR,%3),%0	\n"	/* flush */	\
+		"	mov	(_ASR,%3),%0	\n"			\
+		"	or	%0,%0		\n"			\
+		"	bne	1b		\n"			\
+		: "=&r"(status), "=&r"(retval), "=m"(v->counter)	\
+		: "a"(ATOMIC_OPS_BASE_ADDR), "r"(&v->counter), "r"(i)	\
+		: "memory", "cc");					\
+	return retval;							\
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+#define atomic_fetch_or atomic_fetch_or
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-- 
cgit v1.2.3


From e5857a6ed6004cac5273b8cdc189ab4b6363cfaf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:05 +0200
Subject: locking/atomic, arch/parisc: Implement
 atomic{,64}_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Helge Deller <deller@gmx.de>
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-parisc@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/parisc/include/asm/atomic.h | 65 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 57 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 1d109990a022..29df1f871910 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -121,16 +121,41 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v)		\
 	return ret;							\
 }
 
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
+#define ATOMIC_FETCH_OP(op, c_op)					\
+static __inline__ int atomic_fetch_##op(int i, atomic_t *v)		\
+{									\
+	unsigned long flags;						\
+	int ret;							\
+									\
+	_atomic_spin_lock_irqsave(v, flags);				\
+	ret = v->counter;						\
+	v->counter c_op i;						\
+	_atomic_spin_unlock_irqrestore(v, flags);			\
+									\
+	return ret;							\
+}
+
+#define ATOMIC_OPS(op, c_op)						\
+	ATOMIC_OP(op, c_op)						\
+	ATOMIC_OP_RETURN(op, c_op)					\
+	ATOMIC_FETCH_OP(op, c_op)
 
 ATOMIC_OPS(add, +=)
 ATOMIC_OPS(sub, -=)
 
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op)						\
+	ATOMIC_OP(op, c_op)						\
+	ATOMIC_FETCH_OP(op, c_op)
+
+#define atomic_fetch_or atomic_fetch_or
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or, |=)
+ATOMIC_OPS(xor, ^=)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
@@ -185,15 +210,39 @@ static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v)	\
 	return ret;							\
 }
 
-#define ATOMIC64_OPS(op, c_op) ATOMIC64_OP(op, c_op) ATOMIC64_OP_RETURN(op, c_op)
+#define ATOMIC64_FETCH_OP(op, c_op)					\
+static __inline__ s64 atomic64_fetch_##op(s64 i, atomic64_t *v)		\
+{									\
+	unsigned long flags;						\
+	s64 ret;							\
+									\
+	_atomic_spin_lock_irqsave(v, flags);				\
+	ret = v->counter;						\
+	v->counter c_op i;						\
+	_atomic_spin_unlock_irqrestore(v, flags);			\
+									\
+	return ret;							\
+}
+
+#define ATOMIC64_OPS(op, c_op)						\
+	ATOMIC64_OP(op, c_op)						\
+	ATOMIC64_OP_RETURN(op, c_op)					\
+	ATOMIC64_FETCH_OP(op, c_op)
 
 ATOMIC64_OPS(add, +=)
 ATOMIC64_OPS(sub, -=)
-ATOMIC64_OP(and, &=)
-ATOMIC64_OP(or, |=)
-ATOMIC64_OP(xor, ^=)
 
 #undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op)						\
+	ATOMIC64_OP(op, c_op)						\
+	ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &=)
+ATOMIC64_OPS(or, |=)
+ATOMIC64_OPS(xor, ^=)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-- 
cgit v1.2.3


From a28cc7bbe8e30ee573e1a27e704558f0862d8c6d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:05 +0200
Subject: locking/atomic, arch/powerpc: Implement
 atomic{,64}_fetch_{add,sub,and,or,xor}{,_relaxed,_acquire,_release}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Tested-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/include/asm/atomic.h | 83 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 74 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index ae0751ef8788..f08d567e0ca4 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -78,21 +78,53 @@ static inline int atomic_##op##_return_relaxed(int a, atomic_t *v)	\
 	return t;							\
 }
 
+#define ATOMIC_FETCH_OP_RELAXED(op, asm_op)				\
+static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v)	\
+{									\
+	int res, t;							\
+									\
+	__asm__ __volatile__(						\
+"1:	lwarx	%0,0,%4		# atomic_fetch_" #op "_relaxed\n"	\
+	#asm_op " %1,%3,%0\n"						\
+	PPC405_ERR77(0, %4)						\
+"	stwcx.	%1,0,%4\n"						\
+"	bne-	1b\n"							\
+	: "=&r" (res), "=&r" (t), "+m" (v->counter)			\
+	: "r" (a), "r" (&v->counter)					\
+	: "cc");							\
+									\
+	return res;							\
+}
+
 #define ATOMIC_OPS(op, asm_op)						\
 	ATOMIC_OP(op, asm_op)						\
-	ATOMIC_OP_RETURN_RELAXED(op, asm_op)
+	ATOMIC_OP_RETURN_RELAXED(op, asm_op)				\
+	ATOMIC_FETCH_OP_RELAXED(op, asm_op)
 
 ATOMIC_OPS(add, add)
 ATOMIC_OPS(sub, subf)
 
-ATOMIC_OP(and, and)
-ATOMIC_OP(or, or)
-ATOMIC_OP(xor, xor)
-
 #define atomic_add_return_relaxed atomic_add_return_relaxed
 #define atomic_sub_return_relaxed atomic_sub_return_relaxed
 
+#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, asm_op)						\
+	ATOMIC_OP(op, asm_op)						\
+	ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(or, or)
+ATOMIC_OPS(xor, xor)
+
+#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
+#define atomic_fetch_or_relaxed  atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
+
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP_RELAXED
 #undef ATOMIC_OP_RETURN_RELAXED
 #undef ATOMIC_OP
 
@@ -329,20 +361,53 @@ atomic64_##op##_return_relaxed(long a, atomic64_t *v)			\
 	return t;							\
 }
 
+#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op)				\
+static inline long							\
+atomic64_fetch_##op##_relaxed(long a, atomic64_t *v)			\
+{									\
+	long res, t;							\
+									\
+	__asm__ __volatile__(						\
+"1:	ldarx	%0,0,%4		# atomic64_fetch_" #op "_relaxed\n"	\
+	#asm_op " %1,%3,%0\n"						\
+"	stdcx.	%1,0,%4\n"						\
+"	bne-	1b\n"							\
+	: "=&r" (res), "=&r" (t), "+m" (v->counter)			\
+	: "r" (a), "r" (&v->counter)					\
+	: "cc");							\
+									\
+	return res;							\
+}
+
 #define ATOMIC64_OPS(op, asm_op)					\
 	ATOMIC64_OP(op, asm_op)						\
-	ATOMIC64_OP_RETURN_RELAXED(op, asm_op)
+	ATOMIC64_OP_RETURN_RELAXED(op, asm_op)				\
+	ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
 
 ATOMIC64_OPS(add, add)
 ATOMIC64_OPS(sub, subf)
-ATOMIC64_OP(and, and)
-ATOMIC64_OP(or, or)
-ATOMIC64_OP(xor, xor)
 
 #define atomic64_add_return_relaxed atomic64_add_return_relaxed
 #define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
 
+#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, asm_op)					\
+	ATOMIC64_OP(op, asm_op)						\
+	ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
+
+ATOMIC64_OPS(and, and)
+ATOMIC64_OPS(or, or)
+ATOMIC64_OPS(xor, xor)
+
+#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
+#define atomic64_fetch_or_relaxed  atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
+
 #undef ATOPIC64_OPS
+#undef ATOMIC64_FETCH_OP_RELAXED
 #undef ATOMIC64_OP_RETURN_RELAXED
 #undef ATOMIC64_OP
 
-- 
cgit v1.2.3


From 56fefbbc3f13ad8cc9f502dbc6b5c9ddc8c4395e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:05 +0200
Subject: locking/atomic, arch/s390: Implement
 atomic{,64}_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-s390@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/s390/include/asm/atomic.h | 42 ++++++++++++++++++++++++++++++++----------
 1 file changed, 32 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 911064aa59b2..2324e759b544 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -93,6 +93,11 @@ static inline int atomic_add_return(int i, atomic_t *v)
 	return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER) + i;
 }
 
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+	return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER);
+}
+
 static inline void atomic_add(int i, atomic_t *v)
 {
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -114,22 +119,29 @@ static inline void atomic_add(int i, atomic_t *v)
 #define atomic_inc_and_test(_v)		(atomic_add_return(1, _v) == 0)
 #define atomic_sub(_i, _v)		atomic_add(-(int)(_i), _v)
 #define atomic_sub_return(_i, _v)	atomic_add_return(-(int)(_i), _v)
+#define atomic_fetch_sub(_i, _v)	atomic_fetch_add(-(int)(_i), _v)
 #define atomic_sub_and_test(_i, _v)	(atomic_sub_return(_i, _v) == 0)
 #define atomic_dec(_v)			atomic_sub(1, _v)
 #define atomic_dec_return(_v)		atomic_sub_return(1, _v)
 #define atomic_dec_and_test(_v)		(atomic_sub_return(1, _v) == 0)
 
-#define ATOMIC_OP(op, OP)						\
+#define ATOMIC_OPS(op, OP)						\
 static inline void atomic_##op(int i, atomic_t *v)			\
 {									\
 	__ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_NO_BARRIER);	\
+}									\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	return __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_BARRIER);	\
 }
 
-ATOMIC_OP(and, AND)
-ATOMIC_OP(or, OR)
-ATOMIC_OP(xor, XOR)
+#define atomic_fetch_or atomic_fetch_or
+
+ATOMIC_OPS(and, AND)
+ATOMIC_OPS(or, OR)
+ATOMIC_OPS(xor, XOR)
 
-#undef ATOMIC_OP
+#undef ATOMIC_OPS
 
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
@@ -236,6 +248,11 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v)
 	return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER) + i;
 }
 
+static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
+{
+	return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER);
+}
+
 static inline void atomic64_add(long long i, atomic64_t *v)
 {
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -264,17 +281,21 @@ static inline long long atomic64_cmpxchg(atomic64_t *v,
 	return old;
 }
 
-#define ATOMIC64_OP(op, OP)						\
+#define ATOMIC64_OPS(op, OP)						\
 static inline void atomic64_##op(long i, atomic64_t *v)			\
 {									\
 	__ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_NO_BARRIER);	\
+}									\
+static inline long atomic64_fetch_##op(long i, atomic64_t *v)		\
+{									\
+	return __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_BARRIER); \
 }
 
-ATOMIC64_OP(and, AND)
-ATOMIC64_OP(or, OR)
-ATOMIC64_OP(xor, XOR)
+ATOMIC64_OPS(and, AND)
+ATOMIC64_OPS(or, OR)
+ATOMIC64_OPS(xor, XOR)
 
-#undef ATOMIC64_OP
+#undef ATOMIC64_OPS
 #undef __ATOMIC64_LOOP
 
 static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
@@ -315,6 +336,7 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
 #define atomic64_inc_return(_v)		atomic64_add_return(1, _v)
 #define atomic64_inc_and_test(_v)	(atomic64_add_return(1, _v) == 0)
 #define atomic64_sub_return(_i, _v)	atomic64_add_return(-(long long)(_i), _v)
+#define atomic64_fetch_sub(_i, _v)	atomic64_fetch_add(-(long long)(_i), _v)
 #define atomic64_sub(_i, _v)		atomic64_add(-(long long)(_i), _v)
 #define atomic64_sub_and_test(_i, _v)	(atomic64_sub_return(_i, _v) == 0)
 #define atomic64_dec(_v)		atomic64_sub(1, _v)
-- 
cgit v1.2.3


From 7d9794e7523798e1b9422ad9f4e4d808ae5d5932 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:04 +0200
Subject: locking/atomic, arch/sh: Implement
 atomic_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rich Felker <dalias@libc.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-sh@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/sh/include/asm/atomic-grb.h  | 34 ++++++++++++++++++++++++++++++----
 arch/sh/include/asm/atomic-irq.h  | 31 +++++++++++++++++++++++++++----
 arch/sh/include/asm/atomic-llsc.h | 32 ++++++++++++++++++++++++++++----
 arch/sh/include/asm/atomic.h      |  2 ++
 4 files changed, 87 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h
index b94df40e5f2d..d755e96c3064 100644
--- a/arch/sh/include/asm/atomic-grb.h
+++ b/arch/sh/include/asm/atomic-grb.h
@@ -43,16 +43,42 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return tmp;							\
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	int res, tmp;							\
+									\
+	__asm__ __volatile__ (						\
+		"   .align 2              \n\t"				\
+		"   mova    1f,   r0      \n\t" /* r0 = end point */	\
+		"   mov    r15,   r1      \n\t" /* r1 = saved sp */	\
+		"   mov    #-6,   r15     \n\t" /* LOGIN: r15 = size */	\
+		"   mov.l  @%2,   %0      \n\t" /* load old value */	\
+		"   mov     %0,   %1      \n\t" /* save old value */	\
+		" " #op "   %3,   %0      \n\t" /* $op */		\
+		"   mov.l   %0,   @%2     \n\t" /* store new value */	\
+		"1: mov     r1,   r15     \n\t" /* LOGOUT */		\
+		: "=&r" (tmp), "=&r" (res), "+r"  (v)			\
+		: "r"   (i)						\
+		: "memory" , "r0", "r1");				\
+									\
+	return res;							\
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/sh/include/asm/atomic-irq.h b/arch/sh/include/asm/atomic-irq.h
index 23fcdad5773e..8e2da5fa0178 100644
--- a/arch/sh/include/asm/atomic-irq.h
+++ b/arch/sh/include/asm/atomic-irq.h
@@ -33,15 +33,38 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return temp;							\
 }
 
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
+#define ATOMIC_FETCH_OP(op, c_op)					\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long temp, flags;					\
+									\
+	raw_local_irq_save(flags);					\
+	temp = v->counter;						\
+	v->counter c_op i;						\
+	raw_local_irq_restore(flags);					\
+									\
+	return temp;							\
+}
+
+#define ATOMIC_OPS(op, c_op)						\
+	ATOMIC_OP(op, c_op)						\
+	ATOMIC_OP_RETURN(op, c_op)					\
+	ATOMIC_FETCH_OP(op, c_op)
 
 ATOMIC_OPS(add, +=)
 ATOMIC_OPS(sub, -=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
 
 #undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op)						\
+	ATOMIC_OP(op, c_op)						\
+	ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or, |=)
+ATOMIC_OPS(xor, ^=)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h
index 33d34b16d4d6..caea2c45f6c2 100644
--- a/arch/sh/include/asm/atomic-llsc.h
+++ b/arch/sh/include/asm/atomic-llsc.h
@@ -48,15 +48,39 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return temp;							\
 }
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	unsigned long res, temp;					\
+									\
+	__asm__ __volatile__ (						\
+"1:	movli.l @%3, %0		! atomic_fetch_" #op "	\n"		\
+"	mov %0, %1					\n"		\
+"	" #op "	%2, %0					\n"		\
+"	movco.l	%0, @%3					\n"		\
+"	bf	1b					\n"		\
+"	synco						\n"		\
+	: "=&z" (temp), "=&z" (res)					\
+	: "r" (i), "r" (&v->counter)					\
+	: "t");								\
+									\
+	return res;							\
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
 
 #undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index c399e1c55685..d93ed7ce1b2f 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -25,6 +25,8 @@
 #include <asm/atomic-irq.h>
 #endif
 
+#define atomic_fetch_or atomic_fetch_or
+
 #define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
 #define atomic_dec_return(v)		atomic_sub_return(1, (v))
 #define atomic_inc_return(v)		atomic_add_return(1, (v))
-- 
cgit v1.2.3


From 3a1adb23a52c920304239efff377d3bc967febc2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:04 +0200
Subject: locking/atomic, arch/sparc: Implement
 atomic{,64}_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: James Y Knight <jyknight@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/sparc/include/asm/atomic.h    |  1 +
 arch/sparc/include/asm/atomic_32.h | 15 ++++++++--
 arch/sparc/include/asm/atomic_64.h | 16 +++++++---
 arch/sparc/lib/atomic32.c          | 29 ++++++++++--------
 arch/sparc/lib/atomic_64.S         | 61 +++++++++++++++++++++++++++++++-------
 arch/sparc/lib/ksyms.c             | 17 ++++++++---
 6 files changed, 105 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/include/asm/atomic.h b/arch/sparc/include/asm/atomic.h
index 8ff83d8cc33f..1f741bcc73b7 100644
--- a/arch/sparc/include/asm/atomic.h
+++ b/arch/sparc/include/asm/atomic.h
@@ -5,4 +5,5 @@
 #else
 #include <asm/atomic_32.h>
 #endif
+#define atomic_fetch_or atomic_fetch_or
 #endif
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index 7dcbebbcaec6..5cfb20a599d9 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -20,9 +20,10 @@
 #define ATOMIC_INIT(i)  { (i) }
 
 int atomic_add_return(int, atomic_t *);
-void atomic_and(int, atomic_t *);
-void atomic_or(int, atomic_t *);
-void atomic_xor(int, atomic_t *);
+int atomic_fetch_add(int, atomic_t *);
+int atomic_fetch_and(int, atomic_t *);
+int atomic_fetch_or(int, atomic_t *);
+int atomic_fetch_xor(int, atomic_t *);
 int atomic_cmpxchg(atomic_t *, int, int);
 int atomic_xchg(atomic_t *, int);
 int __atomic_add_unless(atomic_t *, int, int);
@@ -35,7 +36,15 @@ void atomic_set(atomic_t *, int);
 #define atomic_inc(v)		((void)atomic_add_return(        1, (v)))
 #define atomic_dec(v)		((void)atomic_add_return(       -1, (v)))
 
+#define atomic_fetch_or	atomic_fetch_or
+
+#define atomic_and(i, v)	((void)atomic_fetch_and((i), (v)))
+#define atomic_or(i, v)		((void)atomic_fetch_or((i), (v)))
+#define atomic_xor(i, v)	((void)atomic_fetch_xor((i), (v)))
+
 #define atomic_sub_return(i, v)	(atomic_add_return(-(int)(i), (v)))
+#define atomic_fetch_sub(i, v)  (atomic_fetch_add (-(int)(i), (v)))
+
 #define atomic_inc_return(v)	(atomic_add_return(        1, (v)))
 #define atomic_dec_return(v)	(atomic_add_return(       -1, (v)))
 
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index f2fbf9e16faf..24827a3f733a 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -28,16 +28,24 @@ void atomic64_##op(long, atomic64_t *);
 int atomic_##op##_return(int, atomic_t *);				\
 long atomic64_##op##_return(long, atomic64_t *);
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+int atomic_fetch_##op(int, atomic_t *);					\
+long atomic64_fetch_##op(long, atomic64_t *);
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index b9d63c0a7aab..2c373329d5cb 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -27,39 +27,44 @@ static DEFINE_SPINLOCK(dummy);
 
 #endif /* SMP */
 
-#define ATOMIC_OP_RETURN(op, c_op)					\
-int atomic_##op##_return(int i, atomic_t *v)				\
+#define ATOMIC_FETCH_OP(op, c_op)					\
+int atomic_fetch_##op(int i, atomic_t *v)				\
 {									\
 	int ret;							\
 	unsigned long flags;						\
 	spin_lock_irqsave(ATOMIC_HASH(v), flags);			\
 									\
-	ret = (v->counter c_op i);					\
+	ret = v->counter;						\
+	v->counter c_op i;						\
 									\
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);			\
 	return ret;							\
 }									\
-EXPORT_SYMBOL(atomic_##op##_return);
+EXPORT_SYMBOL(atomic_fetch_##op);
 
-#define ATOMIC_OP(op, c_op)						\
-void atomic_##op(int i, atomic_t *v)					\
+#define ATOMIC_OP_RETURN(op, c_op)					\
+int atomic_##op##_return(int i, atomic_t *v)				\
 {									\
+	int ret;							\
 	unsigned long flags;						\
 	spin_lock_irqsave(ATOMIC_HASH(v), flags);			\
 									\
-	v->counter c_op i;						\
+	ret = (v->counter c_op i);					\
 									\
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);			\
+	return ret;							\
 }									\
-EXPORT_SYMBOL(atomic_##op);
+EXPORT_SYMBOL(atomic_##op##_return);
 
 ATOMIC_OP_RETURN(add, +=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
 
+ATOMIC_FETCH_OP(add, +=)
+ATOMIC_FETCH_OP(and, &=)
+ATOMIC_FETCH_OP(or, |=)
+ATOMIC_FETCH_OP(xor, ^=)
+
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
 
 int atomic_xchg(atomic_t *v, int new)
 {
diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S
index d6b0363f345b..a5c5a0279ccc 100644
--- a/arch/sparc/lib/atomic_64.S
+++ b/arch/sparc/lib/atomic_64.S
@@ -9,10 +9,11 @@
 
 	.text
 
-	/* Two versions of the atomic routines, one that
+	/* Three versions of the atomic routines, one that
 	 * does not return a value and does not perform
-	 * memory barriers, and a second which returns
-	 * a value and does the barriers.
+	 * memory barriers, and a two which return
+	 * a value, the new and old value resp. and does the
+	 * barriers.
 	 */
 
 #define ATOMIC_OP(op)							\
@@ -43,15 +44,34 @@ ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */	\
 2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
 ENDPROC(atomic_##op##_return);
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+ENTRY(atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */	\
+	BACKOFF_SETUP(%o2);						\
+1:	lduw	[%o1], %g1;						\
+	op	%g1, %o0, %g7;						\
+	cas	[%o1], %g1, %g7;					\
+	cmp	%g1, %g7;						\
+	bne,pn	%icc, BACKOFF_LABEL(2f, 1b);				\
+	 nop;								\
+	retl;								\
+	 sra	%g1, 0, %o0;						\
+2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
+ENDPROC(atomic_fetch_##op);
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
 
 #undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
@@ -83,15 +103,34 @@ ENTRY(atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */	\
 2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
 ENDPROC(atomic64_##op##_return);
 
-#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op)
+#define ATOMIC64_FETCH_OP(op)						\
+ENTRY(atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */	\
+	BACKOFF_SETUP(%o2);						\
+1:	ldx	[%o1], %g1;						\
+	op	%g1, %o0, %g7;						\
+	casx	[%o1], %g1, %g7;					\
+	cmp	%g1, %g7;						\
+	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b);				\
+	 nop;								\
+	retl;								\
+	 mov	%g1, %o0;						\
+2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
+ENDPROC(atomic64_fetch_##op);
+
+#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op)
 
 ATOMIC64_OPS(add)
 ATOMIC64_OPS(sub)
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
 
 #undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_FETCH_OP(op)
+
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+ATOMIC64_OPS(xor)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c
index 8eb454cfe05c..de5e97817bdb 100644
--- a/arch/sparc/lib/ksyms.c
+++ b/arch/sparc/lib/ksyms.c
@@ -107,15 +107,24 @@ EXPORT_SYMBOL(atomic64_##op);
 EXPORT_SYMBOL(atomic_##op##_return);					\
 EXPORT_SYMBOL(atomic64_##op##_return);
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)						\
+EXPORT_SYMBOL(atomic_fetch_##op);					\
+EXPORT_SYMBOL(atomic64_fetch_##op);
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
 
 #undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-- 
cgit v1.2.3


From 1af5de9af138941fb8638cf126293b16f3387de4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:03 +0200
Subject: locking/atomic, arch/tile: Implement
 atomic{,64}_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/tile/include/asm/atomic.h    |   4 ++
 arch/tile/include/asm/atomic_32.h |  60 ++++++++++++++------
 arch/tile/include/asm/atomic_64.h | 115 +++++++++++++++++++++++++-------------
 arch/tile/include/asm/bitops_32.h |  18 +++---
 arch/tile/lib/atomic_32.c         |  42 +++++++-------
 arch/tile/lib/atomic_asm_32.S     |  14 ++---
 6 files changed, 159 insertions(+), 94 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h
index 9fc0107a9c5e..9807030557c4 100644
--- a/arch/tile/include/asm/atomic.h
+++ b/arch/tile/include/asm/atomic.h
@@ -46,6 +46,10 @@ static inline int atomic_read(const atomic_t *v)
  */
 #define atomic_sub_return(i, v)		atomic_add_return((int)(-(i)), (v))
 
+#define atomic_fetch_sub(i, v)		atomic_fetch_add(-(int)(i), (v))
+
+#define atomic_fetch_or atomic_fetch_or
+
 /**
  * atomic_sub - subtract integer from atomic variable
  * @i: integer value to subtract
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index d320ce253d86..da8eb4ed3752 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -34,18 +34,29 @@ static inline void atomic_add(int i, atomic_t *v)
 	_atomic_xchg_add(&v->counter, i);
 }
 
-#define ATOMIC_OP(op)							\
-unsigned long _atomic_##op(volatile unsigned long *p, unsigned long mask); \
+#define ATOMIC_OPS(op)							\
+unsigned long _atomic_fetch_##op(volatile unsigned long *p, unsigned long mask); \
 static inline void atomic_##op(int i, atomic_t *v)			\
 {									\
-	_atomic_##op((unsigned long *)&v->counter, i);			\
+	_atomic_fetch_##op((unsigned long *)&v->counter, i);		\
+}									\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	smp_mb();							\
+	return _atomic_fetch_##op((unsigned long *)&v->counter, i);	\
 }
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
 
-#undef ATOMIC_OP
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+	smp_mb();
+	return _atomic_xchg_add(&v->counter, i);
+}
 
 /**
  * atomic_add_return - add integer and return
@@ -126,17 +137,30 @@ static inline void atomic64_add(long long i, atomic64_t *v)
 	_atomic64_xchg_add(&v->counter, i);
 }
 
-#define ATOMIC64_OP(op)						\
-long long _atomic64_##op(long long *v, long long n);		\
+#define ATOMIC64_OPS(op)					\
+long long _atomic64_fetch_##op(long long *v, long long n);	\
+static inline void atomic64_##op(long long i, atomic64_t *v)	\
+{								\
+	_atomic64_fetch_##op(&v->counter, i);			\
+}								\
 static inline void atomic64_##op(long long i, atomic64_t *v)	\
 {								\
-	_atomic64_##op(&v->counter, i);				\
+	smp_mb();						\
+	return _atomic64_fetch_##op(&v->counter, i);		\
 }
 
 ATOMIC64_OP(and)
 ATOMIC64_OP(or)
 ATOMIC64_OP(xor)
 
+#undef ATOMIC64_OPS
+
+static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
+{
+	smp_mb();
+	return _atomic64_xchg_add(&v->counter, i);
+}
+
 /**
  * atomic64_add_return - add integer and return
  * @v: pointer of type atomic64_t
@@ -186,6 +210,7 @@ static inline void atomic64_set(atomic64_t *v, long long n)
 #define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
 #define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
 #define atomic64_sub_return(i, v)	atomic64_add_return(-(i), (v))
+#define atomic64_fetch_sub(i, v)	atomic64_fetch_add(-(i), (v))
 #define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
 #define atomic64_sub(i, v)		atomic64_add(-(i), (v))
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
@@ -193,7 +218,6 @@ static inline void atomic64_set(atomic64_t *v, long long n)
 #define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
 #define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1LL, 0LL)
 
-
 #endif /* !__ASSEMBLY__ */
 
 /*
@@ -248,10 +272,10 @@ extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n);
 extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n);
 extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
 						  int *lock, int o, int n);
-extern struct __get_user __atomic_or(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_and(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_fetch_or(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_fetch_and(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_fetch_andn(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_fetch_xor(volatile int *p, int *lock, int n);
 extern long long __atomic64_cmpxchg(volatile long long *p, int *lock,
 					long long o, long long n);
 extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n);
@@ -259,9 +283,9 @@ extern long long __atomic64_xchg_add(volatile long long *p, int *lock,
 					long long n);
 extern long long __atomic64_xchg_add_unless(volatile long long *p,
 					int *lock, long long o, long long n);
-extern long long __atomic64_and(volatile long long *p, int *lock, long long n);
-extern long long __atomic64_or(volatile long long *p, int *lock, long long n);
-extern long long __atomic64_xor(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_and(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_or(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_xor(volatile long long *p, int *lock, long long n);
 
 /* Return failure from the atomic wrappers. */
 struct __get_user __atomic_bad_address(int __user *addr);
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index b0531a623653..4cefa0c9fd81 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -32,11 +32,6 @@
  * on any routine which updates memory and returns a value.
  */
 
-static inline void atomic_add(int i, atomic_t *v)
-{
-	__insn_fetchadd4((void *)&v->counter, i);
-}
-
 /*
  * Note a subtlety of the locking here.  We are required to provide a
  * full memory barrier before and after the operation.  However, we
@@ -59,28 +54,39 @@ static inline int atomic_add_return(int i, atomic_t *v)
 	return val;
 }
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+#define ATOMIC_OPS(op)							\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	int val;							\
+	smp_mb();							\
+	val = __insn_fetch##op##4((void *)&v->counter, i);		\
+	smp_mb();							\
+	return val;							\
+}									\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	__insn_fetch##op##4((void *)&v->counter, i);			\
+}
+
+ATOMIC_OPS(add)
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+
+#undef ATOMIC_OPS
+
+static inline int atomic_fetch_xor(int i, atomic_t *v)
 {
 	int guess, oldval = v->counter;
+	smp_mb();
 	do {
-		if (oldval == u)
-			break;
 		guess = oldval;
-		oldval = cmpxchg(&v->counter, guess, guess + a);
+		__insn_mtspr(SPR_CMPEXCH_VALUE, guess);
+		oldval = __insn_cmpexch4(&v->counter, guess ^ i);
 	} while (guess != oldval);
+	smp_mb();
 	return oldval;
 }
 
-static inline void atomic_and(int i, atomic_t *v)
-{
-	__insn_fetchand4((void *)&v->counter, i);
-}
-
-static inline void atomic_or(int i, atomic_t *v)
-{
-	__insn_fetchor4((void *)&v->counter, i);
-}
-
 static inline void atomic_xor(int i, atomic_t *v)
 {
 	int guess, oldval = v->counter;
@@ -91,6 +97,18 @@ static inline void atomic_xor(int i, atomic_t *v)
 	} while (guess != oldval);
 }
 
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int guess, oldval = v->counter;
+	do {
+		if (oldval == u)
+			break;
+		guess = oldval;
+		oldval = cmpxchg(&v->counter, guess, guess + a);
+	} while (guess != oldval);
+	return oldval;
+}
+
 /* Now the true 64-bit operations. */
 
 #define ATOMIC64_INIT(i)	{ (i) }
@@ -98,11 +116,6 @@ static inline void atomic_xor(int i, atomic_t *v)
 #define atomic64_read(v)	READ_ONCE((v)->counter)
 #define atomic64_set(v, i)	WRITE_ONCE((v)->counter, (i))
 
-static inline void atomic64_add(long i, atomic64_t *v)
-{
-	__insn_fetchadd((void *)&v->counter, i);
-}
-
 static inline long atomic64_add_return(long i, atomic64_t *v)
 {
 	int val;
@@ -112,26 +125,37 @@ static inline long atomic64_add_return(long i, atomic64_t *v)
 	return val;
 }
 
-static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
+#define ATOMIC64_OPS(op)						\
+static inline long atomic64_fetch_##op(long i, atomic64_t *v)		\
+{									\
+	long val;							\
+	smp_mb();							\
+	val = __insn_fetch##op((void *)&v->counter, i);			\
+	smp_mb();							\
+	return val;							\
+}									\
+static inline void atomic64_##op(long i, atomic64_t *v)			\
+{									\
+	__insn_fetch##op((void *)&v->counter, i);			\
+}
+
+ATOMIC64_OPS(add)
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+
+#undef ATOMIC64_OPS
+
+static inline long atomic64_fetch_xor(long i, atomic64_t *v)
 {
 	long guess, oldval = v->counter;
+	smp_mb();
 	do {
-		if (oldval == u)
-			break;
 		guess = oldval;
-		oldval = cmpxchg(&v->counter, guess, guess + a);
+		__insn_mtspr(SPR_CMPEXCH_VALUE, guess);
+		oldval = __insn_cmpexch(&v->counter, guess ^ i);
 	} while (guess != oldval);
-	return oldval != u;
-}
-
-static inline void atomic64_and(long i, atomic64_t *v)
-{
-	__insn_fetchand((void *)&v->counter, i);
-}
-
-static inline void atomic64_or(long i, atomic64_t *v)
-{
-	__insn_fetchor((void *)&v->counter, i);
+	smp_mb();
+	return oldval;
 }
 
 static inline void atomic64_xor(long i, atomic64_t *v)
@@ -144,7 +168,20 @@ static inline void atomic64_xor(long i, atomic64_t *v)
 	} while (guess != oldval);
 }
 
+static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
+{
+	long guess, oldval = v->counter;
+	do {
+		if (oldval == u)
+			break;
+		guess = oldval;
+		oldval = cmpxchg(&v->counter, guess, guess + a);
+	} while (guess != oldval);
+	return oldval != u;
+}
+
 #define atomic64_sub_return(i, v)	atomic64_add_return(-(i), (v))
+#define atomic64_fetch_sub(i, v)	atomic64_fetch_add(-(i), (v))
 #define atomic64_sub(i, v)		atomic64_add(-(i), (v))
 #define atomic64_inc_return(v)		atomic64_add_return(1, (v))
 #define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h
index bbf7b666f21d..d1406a95f6b7 100644
--- a/arch/tile/include/asm/bitops_32.h
+++ b/arch/tile/include/asm/bitops_32.h
@@ -19,9 +19,9 @@
 #include <asm/barrier.h>
 
 /* Tile-specific routines to support <asm/bitops.h>. */
-unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask);
-unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask);
-unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask);
 
 /**
  * set_bit - Atomically set a bit in memory
@@ -35,7 +35,7 @@ unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask);
  */
 static inline void set_bit(unsigned nr, volatile unsigned long *addr)
 {
-	_atomic_or(addr + BIT_WORD(nr), BIT_MASK(nr));
+	_atomic_fetch_or(addr + BIT_WORD(nr), BIT_MASK(nr));
 }
 
 /**
@@ -54,7 +54,7 @@ static inline void set_bit(unsigned nr, volatile unsigned long *addr)
  */
 static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
 {
-	_atomic_andn(addr + BIT_WORD(nr), BIT_MASK(nr));
+	_atomic_fetch_andn(addr + BIT_WORD(nr), BIT_MASK(nr));
 }
 
 /**
@@ -69,7 +69,7 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
  */
 static inline void change_bit(unsigned nr, volatile unsigned long *addr)
 {
-	_atomic_xor(addr + BIT_WORD(nr), BIT_MASK(nr));
+	_atomic_fetch_xor(addr + BIT_WORD(nr), BIT_MASK(nr));
 }
 
 /**
@@ -85,7 +85,7 @@ static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr)
 	unsigned long mask = BIT_MASK(nr);
 	addr += BIT_WORD(nr);
 	smp_mb();  /* barrier for proper semantics */
-	return (_atomic_or(addr, mask) & mask) != 0;
+	return (_atomic_fetch_or(addr, mask) & mask) != 0;
 }
 
 /**
@@ -101,7 +101,7 @@ static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr)
 	unsigned long mask = BIT_MASK(nr);
 	addr += BIT_WORD(nr);
 	smp_mb();  /* barrier for proper semantics */
-	return (_atomic_andn(addr, mask) & mask) != 0;
+	return (_atomic_fetch_andn(addr, mask) & mask) != 0;
 }
 
 /**
@@ -118,7 +118,7 @@ static inline int test_and_change_bit(unsigned nr,
 	unsigned long mask = BIT_MASK(nr);
 	addr += BIT_WORD(nr);
 	smp_mb();  /* barrier for proper semantics */
-	return (_atomic_xor(addr, mask) & mask) != 0;
+	return (_atomic_fetch_xor(addr, mask) & mask) != 0;
 }
 
 #include <asm-generic/bitops/ext2-atomic.h>
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index 298df1e9912a..5b6bd932c9c7 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -88,29 +88,29 @@ int _atomic_cmpxchg(int *v, int o, int n)
 }
 EXPORT_SYMBOL(_atomic_cmpxchg);
 
-unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_or((int *)p, __atomic_setup(p), mask).val;
+	return __atomic_fetch_or((int *)p, __atomic_setup(p), mask).val;
 }
-EXPORT_SYMBOL(_atomic_or);
+EXPORT_SYMBOL(_atomic_fetch_or);
 
-unsigned long _atomic_and(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_and(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_and((int *)p, __atomic_setup(p), mask).val;
+	return __atomic_fetch_and((int *)p, __atomic_setup(p), mask).val;
 }
-EXPORT_SYMBOL(_atomic_and);
+EXPORT_SYMBOL(_atomic_fetch_and);
 
-unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_andn((int *)p, __atomic_setup(p), mask).val;
+	return __atomic_fetch_andn((int *)p, __atomic_setup(p), mask).val;
 }
-EXPORT_SYMBOL(_atomic_andn);
+EXPORT_SYMBOL(_atomic_fetch_andn);
 
-unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_xor((int *)p, __atomic_setup(p), mask).val;
+	return __atomic_fetch_xor((int *)p, __atomic_setup(p), mask).val;
 }
-EXPORT_SYMBOL(_atomic_xor);
+EXPORT_SYMBOL(_atomic_fetch_xor);
 
 
 long long _atomic64_xchg(long long *v, long long n)
@@ -142,23 +142,23 @@ long long _atomic64_cmpxchg(long long *v, long long o, long long n)
 }
 EXPORT_SYMBOL(_atomic64_cmpxchg);
 
-long long _atomic64_and(long long *v, long long n)
+long long _atomic64_fetch_and(long long *v, long long n)
 {
-	return __atomic64_and(v, __atomic_setup(v), n);
+	return __atomic64_fetch_and(v, __atomic_setup(v), n);
 }
-EXPORT_SYMBOL(_atomic64_and);
+EXPORT_SYMBOL(_atomic64_fetch_and);
 
-long long _atomic64_or(long long *v, long long n)
+long long _atomic64_fetch_or(long long *v, long long n)
 {
-	return __atomic64_or(v, __atomic_setup(v), n);
+	return __atomic64_fetch_or(v, __atomic_setup(v), n);
 }
-EXPORT_SYMBOL(_atomic64_or);
+EXPORT_SYMBOL(_atomic64_fetch_or);
 
-long long _atomic64_xor(long long *v, long long n)
+long long _atomic64_fetch_xor(long long *v, long long n)
 {
-	return __atomic64_xor(v, __atomic_setup(v), n);
+	return __atomic64_fetch_xor(v, __atomic_setup(v), n);
 }
-EXPORT_SYMBOL(_atomic64_xor);
+EXPORT_SYMBOL(_atomic64_fetch_xor);
 
 /*
  * If any of the atomic or futex routines hit a bad address (not in
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index f611265633d6..507abdd2bf9a 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -177,10 +177,10 @@ atomic_op _xchg, 32, "move r24, r2"
 atomic_op _xchg_add, 32, "add r24, r22, r2"
 atomic_op _xchg_add_unless, 32, \
 	"sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
-atomic_op _or, 32, "or r24, r22, r2"
-atomic_op _and, 32, "and r24, r22, r2"
-atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2"
-atomic_op _xor, 32, "xor r24, r22, r2"
+atomic_op _fetch_or, 32, "or r24, r22, r2"
+atomic_op _fetch_and, 32, "and r24, r22, r2"
+atomic_op _fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2"
+atomic_op _fetch_xor, 32, "xor r24, r22, r2"
 
 atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
 	{ bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
@@ -192,9 +192,9 @@ atomic_op 64_xchg_add_unless, 64, \
 	{ bbns r26, 3f; add r24, r22, r4 }; \
 	{ bbns r27, 3f; add r25, r23, r5 }; \
 	slt_u r26, r24, r22; add r25, r25, r26"
-atomic_op 64_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }"
-atomic_op 64_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }"
-atomic_op 64_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }"
+atomic_op 64_fetch_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }"
+atomic_op 64_fetch_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }"
+atomic_op 64_fetch_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }"
 
 	jrp     lr              /* happy backtracer */
 
-- 
cgit v1.2.3


From a8bcccaba162632c3963259b8a442c6b490f4c68 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:03 +0200
Subject: locking/atomic, arch/x86: Implement
 atomic{,64}_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/atomic.h      | 37 ++++++++++++++++++++++++++++++++++---
 arch/x86/include/asm/atomic64_32.h | 25 ++++++++++++++++++++++---
 arch/x86/include/asm/atomic64_64.h | 35 ++++++++++++++++++++++++++++++++---
 3 files changed, 88 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 3e8674288198..73b8463b89e9 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -171,6 +171,16 @@ static __always_inline int atomic_sub_return(int i, atomic_t *v)
 #define atomic_inc_return(v)  (atomic_add_return(1, v))
 #define atomic_dec_return(v)  (atomic_sub_return(1, v))
 
+static __always_inline int atomic_fetch_add(int i, atomic_t *v)
+{
+	return xadd(&v->counter, i);
+}
+
+static __always_inline int atomic_fetch_sub(int i, atomic_t *v)
+{
+	return xadd(&v->counter, -i);
+}
+
 static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	return cmpxchg(&v->counter, old, new);
@@ -190,10 +200,31 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 			: "memory");					\
 }
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#define ATOMIC_FETCH_OP(op, c_op)					\
+static inline int atomic_fetch_##op(int i, atomic_t *v)		\
+{									\
+	int old, val = atomic_read(v);					\
+	for (;;) {							\
+		old = atomic_cmpxchg(v, val, val c_op i);		\
+		if (old == val)						\
+			break;						\
+		val = old;						\
+	}								\
+	return old;							\
+}
+
+#define ATOMIC_OPS(op, c_op)						\
+	ATOMIC_OP(op)							\
+	ATOMIC_FETCH_OP(op, c_op)
+
+#define atomic_fetch_or atomic_fetch_or
+
+ATOMIC_OPS(and, &)
+ATOMIC_OPS(or , |)
+ATOMIC_OPS(xor, ^)
 
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP
 
 /**
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index a984111135b1..71d7705fb303 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -320,10 +320,29 @@ static inline void atomic64_##op(long long i, atomic64_t *v)		\
 		c = old;						\
 }
 
-ATOMIC64_OP(and, &)
-ATOMIC64_OP(or, |)
-ATOMIC64_OP(xor, ^)
+#define ATOMIC64_FETCH_OP(op, c_op)					\
+static inline long long atomic64_fetch_##op(long long i, atomic64_t *v)	\
+{									\
+	long long old, c = 0;						\
+	while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c)		\
+		c = old;						\
+	return old;							\
+}
+
+ATOMIC64_FETCH_OP(add, +)
+
+#define atomic64_fetch_sub(i, v)	atomic64_fetch_add(-(i), (v))
+
+#define ATOMIC64_OPS(op, c_op)						\
+	ATOMIC64_OP(op, c_op)						\
+	ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &)
+ATOMIC64_OPS(or, |)
+ATOMIC64_OPS(xor, ^)
 
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP
 
 #endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 037351022f54..70eed0e14553 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -158,6 +158,16 @@ static inline long atomic64_sub_return(long i, atomic64_t *v)
 	return atomic64_add_return(-i, v);
 }
 
+static inline long atomic64_fetch_add(long i, atomic64_t *v)
+{
+	return xadd(&v->counter, i);
+}
+
+static inline long atomic64_fetch_sub(long i, atomic64_t *v)
+{
+	return xadd(&v->counter, -i);
+}
+
 #define atomic64_inc_return(v)  (atomic64_add_return(1, (v)))
 #define atomic64_dec_return(v)  (atomic64_sub_return(1, (v)))
 
@@ -229,10 +239,29 @@ static inline void atomic64_##op(long i, atomic64_t *v)			\
 			: "memory");					\
 }
 
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
+#define ATOMIC64_FETCH_OP(op, c_op)					\
+static inline long atomic64_fetch_##op(long i, atomic64_t *v)		\
+{									\
+	long old, val = atomic64_read(v);				\
+	for (;;) {							\
+		old = atomic64_cmpxchg(v, val, val c_op i);		\
+		if (old == val)						\
+			break;						\
+		val = old;						\
+	}								\
+	return old;							\
+}
+
+#define ATOMIC64_OPS(op, c_op)						\
+	ATOMIC64_OP(op)							\
+	ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &)
+ATOMIC64_OPS(or, |)
+ATOMIC64_OPS(xor, ^)
 
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP
 
 #endif /* _ASM_X86_ATOMIC64_64_H */
-- 
cgit v1.2.3


From 6dc25876cdb17fd3906504dcabb9e537f8433000 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:16:03 +0200
Subject: locking/atomic, arch/xtensa: Implement
 atomic_fetch_{add,sub,and,or,xor}()

Implement FETCH-OP atomic primitives, these are very similar to the
existing OP-RETURN primitives we already have, except they return the
value of the atomic variable _before_ modification.

This is especially useful for irreversible operations -- such as
bitops (because it becomes impossible to reconstruct the state prior
to modification).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Zankel <chris@zankel.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-xtensa@linux-xtensa.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/xtensa/include/asm/atomic.h | 54 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index fd8017ce298a..d95a8aa1a6d3 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -98,6 +98,26 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 	return result;							\
 }
 
+#define ATOMIC_FETCH_OP(op)						\
+static inline int atomic_fetch_##op(int i, atomic_t * v)		\
+{									\
+	unsigned long tmp;						\
+	int result;							\
+									\
+	__asm__ __volatile__(						\
+			"1:     l32i    %1, %3, 0\n"			\
+			"       wsr     %1, scompare1\n"		\
+			"       " #op " %0, %1, %2\n"			\
+			"       s32c1i  %0, %3, 0\n"			\
+			"       bne     %0, %1, 1b\n"			\
+			: "=&a" (result), "=&a" (tmp)			\
+			: "a" (i), "a" (v)				\
+			: "memory"					\
+			);						\
+									\
+	return result;							\
+}
+
 #else /* XCHAL_HAVE_S32C1I */
 
 #define ATOMIC_OP(op)							\
@@ -138,18 +158,44 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 	return vval;							\
 }
 
+#define ATOMIC_FETCH_OP(op)						\
+static inline int atomic_fetch_##op(int i, atomic_t * v)		\
+{									\
+	unsigned int tmp, vval;						\
+									\
+	__asm__ __volatile__(						\
+			"       rsil    a15,"__stringify(TOPLEVEL)"\n"	\
+			"       l32i    %0, %3, 0\n"			\
+			"       " #op " %1, %0, %2\n"			\
+			"       s32i    %1, %3, 0\n"			\
+			"       wsr     a15, ps\n"			\
+			"       rsync\n"				\
+			: "=&a" (vval), "=&a" (tmp)			\
+			: "a" (i), "a" (v)				\
+			: "a15", "memory"				\
+			);						\
+									\
+	return vval;							\
+}
+
 #endif /* XCHAL_HAVE_S32C1I */
 
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) ATOMIC_OP_RETURN(op)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+#define atomic_fetch_or atomic_fetch_or
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
 
 #undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-- 
cgit v1.2.3


From b53d6bedbe781974097fd8c38263f6cc78ff9ea7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 00:58:25 +0200
Subject: locking/atomic: Remove linux/atomic.h:atomic_fetch_or()

Since all architectures have this implemented now natively, remove this
dead code.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h    |  2 --
 arch/arc/include/asm/atomic.h      |  2 --
 arch/arm/include/asm/atomic.h      |  2 --
 arch/arm64/include/asm/atomic.h    |  2 --
 arch/avr32/include/asm/atomic.h    |  2 --
 arch/frv/include/asm/atomic.h      |  2 --
 arch/h8300/include/asm/atomic.h    |  2 --
 arch/hexagon/include/asm/atomic.h  |  2 --
 arch/m32r/include/asm/atomic.h     |  2 --
 arch/m68k/include/asm/atomic.h     |  2 --
 arch/metag/include/asm/atomic.h    |  2 --
 arch/mips/include/asm/atomic.h     |  2 --
 arch/mn10300/include/asm/atomic.h  |  2 --
 arch/parisc/include/asm/atomic.h   |  2 --
 arch/s390/include/asm/atomic.h     |  2 --
 arch/sh/include/asm/atomic.h       |  2 --
 arch/sparc/include/asm/atomic.h    |  1 -
 arch/sparc/include/asm/atomic_32.h |  2 --
 arch/tile/include/asm/atomic.h     |  2 --
 arch/x86/include/asm/atomic.h      |  2 --
 arch/xtensa/include/asm/atomic.h   |  2 --
 include/asm-generic/atomic.h       |  2 --
 include/linux/atomic.h             | 21 ---------------------
 23 files changed, 64 deletions(-)

(limited to 'arch')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 8243f17999e3..5377ca8bb503 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -153,8 +153,6 @@ ATOMIC_OPS(sub)
 #define atomic_andnot atomic_andnot
 #define atomic64_andnot atomic64_andnot
 
-#define atomic_fetch_or atomic_fetch_or
-
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op, asm)						\
 	ATOMIC_OP(op, asm)						\
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index c066a21caaaf..bd9c51cb2bfd 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -189,8 +189,6 @@ ATOMIC_OPS(sub, -=, sub)
 
 #define atomic_andnot atomic_andnot
 
-#define atomic_fetch_or atomic_fetch_or
-
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op, c_op, asm_op)					\
 	ATOMIC_OP(op, c_op, asm_op)					\
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 0feb110ec542..66d0e215a773 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -201,8 +201,6 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 	return val;							\
 }
 
-#define atomic_fetch_or atomic_fetch_or
-
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 3128c3d7c1ff..c0235e0ff849 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -128,8 +128,6 @@
 #define __atomic_add_unless(v, a, u)	___atomic_add_unless(v, a, u,)
 #define atomic_andnot			atomic_andnot
 
-#define atomic_fetch_or atomic_fetch_or
-
 /*
  * 64-bit atomic operations.
  */
diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h
index b8681fd495ef..3d5ce38a6f0b 100644
--- a/arch/avr32/include/asm/atomic.h
+++ b/arch/avr32/include/asm/atomic.h
@@ -66,8 +66,6 @@ ATOMIC_OP_RETURN(add, add, r)
 ATOMIC_FETCH_OP (sub, sub, rKs21)
 ATOMIC_FETCH_OP (add, add, r)
 
-#define atomic_fetch_or atomic_fetch_or
-
 #define ATOMIC_OPS(op, asm_op)						\
 ATOMIC_OP_RETURN(op, asm_op, r)						\
 static inline void atomic_##op(int i, atomic_t *v)			\
diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
index e3e06da0cd59..1c2a5e264fc7 100644
--- a/arch/frv/include/asm/atomic.h
+++ b/arch/frv/include/asm/atomic.h
@@ -74,8 +74,6 @@ static inline void atomic_dec(atomic_t *v)
 #define atomic_dec_and_test(v)		(atomic_sub_return(1, (v)) == 0)
 #define atomic_inc_and_test(v)		(atomic_add_return(1, (v)) == 0)
 
-#define atomic_fetch_or atomic_fetch_or
-
 /*
  * 64-bit atomic ops
  */
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 0961b618bdde..349a47a918db 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -54,8 +54,6 @@ static inline void atomic_##op(int i, atomic_t *v)		\
 ATOMIC_OP_RETURN(add, +=)
 ATOMIC_OP_RETURN(sub, -=)
 
-#define atomic_fetch_or atomic_fetch_or
-
 #define ATOMIC_OPS(op, c_op)					\
 	ATOMIC_OP(op, c_op)					\
 	ATOMIC_FETCH_OP(op, c_op)
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 07dbb3332b4a..a62ba368b27d 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -152,8 +152,6 @@ ATOMIC_OPS(sub)
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and)
 ATOMIC_OPS(or)
 ATOMIC_OPS(xor)
diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h
index 8ba8a0ab5d5d..640cc1c7099f 100644
--- a/arch/m32r/include/asm/atomic.h
+++ b/arch/m32r/include/asm/atomic.h
@@ -121,8 +121,6 @@ ATOMIC_OPS(sub)
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and)
 ATOMIC_OPS(or)
 ATOMIC_OPS(xor)
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 5cf9b3b1b6ac..3e03de7ae33b 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -119,8 +119,6 @@ ATOMIC_OPS(sub, -=, sub)
 	ATOMIC_OP(op, c_op, asm_op)					\
 	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and, &=, and)
 ATOMIC_OPS(or, |=, or)
 ATOMIC_OPS(xor, ^=, eor)
diff --git a/arch/metag/include/asm/atomic.h b/arch/metag/include/asm/atomic.h
index 6ca210de8a7d..470e365f04ea 100644
--- a/arch/metag/include/asm/atomic.h
+++ b/arch/metag/include/asm/atomic.h
@@ -17,8 +17,6 @@
 #include <asm/atomic_lnkget.h>
 #endif
 
-#define atomic_fetch_or atomic_fetch_or
-
 #define atomic_add_negative(a, v)       (atomic_add_return((a), (v)) < 0)
 
 #define atomic_dec_return(v) atomic_sub_return(1, (v))
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 431079f8e483..387ce288334e 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -194,8 +194,6 @@ ATOMIC_OPS(sub, -=, subu)
 	ATOMIC_OP(op, c_op, asm_op)					      \
 	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and, &=, and)
 ATOMIC_OPS(or, |=, or)
 ATOMIC_OPS(xor, ^=, xor)
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index 3580f789f3a6..36389efd45e8 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -113,8 +113,6 @@ ATOMIC_OPS(sub)
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and)
 ATOMIC_OPS(or)
 ATOMIC_OPS(xor)
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 29df1f871910..5394b9c5f914 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -148,8 +148,6 @@ ATOMIC_OPS(sub, -=)
 	ATOMIC_OP(op, c_op)						\
 	ATOMIC_FETCH_OP(op, c_op)
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and, &=)
 ATOMIC_OPS(or, |=)
 ATOMIC_OPS(xor, ^=)
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 2324e759b544..d28cc2f5b7b2 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -135,8 +135,6 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 	return __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_BARRIER);	\
 }
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and, AND)
 ATOMIC_OPS(or, OR)
 ATOMIC_OPS(xor, XOR)
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index d93ed7ce1b2f..c399e1c55685 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -25,8 +25,6 @@
 #include <asm/atomic-irq.h>
 #endif
 
-#define atomic_fetch_or atomic_fetch_or
-
 #define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
 #define atomic_dec_return(v)		atomic_sub_return(1, (v))
 #define atomic_inc_return(v)		atomic_add_return(1, (v))
diff --git a/arch/sparc/include/asm/atomic.h b/arch/sparc/include/asm/atomic.h
index 1f741bcc73b7..8ff83d8cc33f 100644
--- a/arch/sparc/include/asm/atomic.h
+++ b/arch/sparc/include/asm/atomic.h
@@ -5,5 +5,4 @@
 #else
 #include <asm/atomic_32.h>
 #endif
-#define atomic_fetch_or atomic_fetch_or
 #endif
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index 5cfb20a599d9..ee3f11c43cda 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -36,8 +36,6 @@ void atomic_set(atomic_t *, int);
 #define atomic_inc(v)		((void)atomic_add_return(        1, (v)))
 #define atomic_dec(v)		((void)atomic_add_return(       -1, (v)))
 
-#define atomic_fetch_or	atomic_fetch_or
-
 #define atomic_and(i, v)	((void)atomic_fetch_and((i), (v)))
 #define atomic_or(i, v)		((void)atomic_fetch_or((i), (v)))
 #define atomic_xor(i, v)	((void)atomic_fetch_xor((i), (v)))
diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h
index 9807030557c4..8dda3c8ff5ab 100644
--- a/arch/tile/include/asm/atomic.h
+++ b/arch/tile/include/asm/atomic.h
@@ -48,8 +48,6 @@ static inline int atomic_read(const atomic_t *v)
 
 #define atomic_fetch_sub(i, v)		atomic_fetch_add(-(int)(i), (v))
 
-#define atomic_fetch_or atomic_fetch_or
-
 /**
  * atomic_sub - subtract integer from atomic variable
  * @i: integer value to subtract
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 73b8463b89e9..a58b99811105 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -217,8 +217,6 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)		\
 	ATOMIC_OP(op)							\
 	ATOMIC_FETCH_OP(op, c_op)
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and, &)
 ATOMIC_OPS(or , |)
 ATOMIC_OPS(xor, ^)
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index d95a8aa1a6d3..e7a23f2a519a 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -188,8 +188,6 @@ ATOMIC_OPS(sub)
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
 
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_OPS(and)
 ATOMIC_OPS(or)
 ATOMIC_OPS(xor)
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index a2304ccf4ed0..9ed8b987185b 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -137,8 +137,6 @@ ATOMIC_FETCH_OP(and, &)
 #endif
 
 #ifndef atomic_fetch_or
-#define atomic_fetch_or atomic_fetch_or
-
 ATOMIC_FETCH_OP(or, |)
 #endif
 
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 2e6c013ac5a4..0b3802d33125 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -573,27 +573,6 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 }
 #endif
 
-/**
- * atomic_fetch_or - perform *p |= mask and return old value of *p
- * @mask: mask to OR on the atomic_t
- * @p: pointer to atomic_t
- */
-#ifndef atomic_fetch_or
-static inline int atomic_fetch_or(int mask, atomic_t *p)
-{
-	int old, val = atomic_read(p);
-
-	for (;;) {
-		old = atomic_cmpxchg(p, val, val | mask);
-		if (old == val)
-			break;
-		val = old;
-	}
-
-	return old;
-}
-#endif
-
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
-- 
cgit v1.2.3


From fe14d2f12d5e641f114e27c2ea1fb85843c58967 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:09:20 +0200
Subject: locking/atomic, arch/alpha: Convert to _relaxed atomics

Generic code will construct {,_acquire,_release} versions by adding the
required smp_mb__{before,after}_atomic() calls.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-alpha@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 5377ca8bb503..498933a7df97 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -46,10 +46,9 @@ static __inline__ void atomic_##op(int i, atomic_t * v)			\
 }									\
 
 #define ATOMIC_OP_RETURN(op, asm_op)					\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)	\
 {									\
 	long temp, result;						\
-	smp_mb();							\
 	__asm__ __volatile__(						\
 	"1:	ldl_l %0,%1\n"						\
 	"	" #asm_op " %0,%3,%2\n"					\
@@ -61,15 +60,13 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	".previous"							\
 	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
 	:"Ir" (i), "m" (v->counter) : "memory");			\
-	smp_mb();							\
 	return result;							\
 }
 
 #define ATOMIC_FETCH_OP(op, asm_op)					\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
 {									\
 	long temp, result;						\
-	smp_mb();							\
 	__asm__ __volatile__(						\
 	"1:	ldl_l %2,%1\n"						\
 	"	" #asm_op " %2,%3,%0\n"					\
@@ -80,7 +77,6 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 	".previous"							\
 	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
 	:"Ir" (i), "m" (v->counter) : "memory");			\
-	smp_mb();							\
 	return result;							\
 }
 
@@ -101,10 +97,9 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v)		\
 }									\
 
 #define ATOMIC64_OP_RETURN(op, asm_op)					\
-static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	\
+static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v)	\
 {									\
 	long temp, result;						\
-	smp_mb();							\
 	__asm__ __volatile__(						\
 	"1:	ldq_l %0,%1\n"						\
 	"	" #asm_op " %0,%3,%2\n"					\
@@ -116,15 +111,13 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	\
 	".previous"							\
 	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
 	:"Ir" (i), "m" (v->counter) : "memory");			\
-	smp_mb();							\
 	return result;							\
 }
 
 #define ATOMIC64_FETCH_OP(op, asm_op)					\
-static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v)	\
+static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v)	\
 {									\
 	long temp, result;						\
-	smp_mb();							\
 	__asm__ __volatile__(						\
 	"1:	ldq_l %2,%1\n"						\
 	"	" #asm_op " %2,%3,%0\n"					\
@@ -135,7 +128,6 @@ static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v)	\
 	".previous"							\
 	:"=&r" (temp), "=m" (v->counter), "=&r" (result)		\
 	:"Ir" (i), "m" (v->counter) : "memory");			\
-	smp_mb();							\
 	return result;							\
 }
 
@@ -150,6 +142,16 @@ static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v)	\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+#define atomic_add_return_relaxed	atomic_add_return_relaxed
+#define atomic_sub_return_relaxed	atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
+
+#define atomic64_add_return_relaxed	atomic64_add_return_relaxed
+#define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
+
 #define atomic_andnot atomic_andnot
 #define atomic64_andnot atomic64_andnot
 
@@ -165,6 +167,16 @@ ATOMIC_OPS(andnot, bic)
 ATOMIC_OPS(or, bis)
 ATOMIC_OPS(xor, xor)
 
+#define atomic_fetch_and_relaxed	atomic_fetch_and_relaxed
+#define atomic_fetch_andnot_relaxed	atomic_fetch_andnot_relaxed
+#define atomic_fetch_or_relaxed		atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed	atomic_fetch_xor_relaxed
+
+#define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
+#define atomic64_fetch_andnot_relaxed	atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
+
 #undef ATOMIC_OPS
 #undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
-- 
cgit v1.2.3


From 4ec45856b698c37e73d973fb4b1a094dfb9d5732 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 18 Apr 2016 01:15:25 +0200
Subject: locking/atomic, arch/mips: Convert to _relaxed atomics

Generic code will construct {,_acquire,_release} versions by adding the
required smp_mb__{before,after}_atomic() calls.

XXX if/when MIPS will start using their new SYNCxx instructions they
can provide custom __atomic_op_{acquire,release}() macros as per the
powerpc example.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/mips/include/asm/atomic.h | 42 ++++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 387ce288334e..0ab176bdb8e8 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -79,12 +79,10 @@ static __inline__ void atomic_##op(int i, atomic_t * v)			      \
 }
 
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				      \
-static __inline__ int atomic_##op##_return(int i, atomic_t * v)		      \
+static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v)	      \
 {									      \
 	int result;							      \
 									      \
-	smp_mb__before_llsc();						      \
-									      \
 	if (kernel_uses_llsc && R10000_LLSC_WAR) {			      \
 		int temp;						      \
 									      \
@@ -125,18 +123,14 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v)		      \
 		raw_local_irq_restore(flags);				      \
 	}								      \
 									      \
-	smp_llsc_mb();							      \
-									      \
 	return result;							      \
 }
 
 #define ATOMIC_FETCH_OP(op, c_op, asm_op)				      \
-static __inline__ int atomic_fetch_##op(int i, atomic_t * v)		      \
+static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v)	      \
 {									      \
 	int result;							      \
 									      \
-	smp_mb__before_llsc();						      \
-									      \
 	if (kernel_uses_llsc && R10000_LLSC_WAR) {			      \
 		int temp;						      \
 									      \
@@ -176,8 +170,6 @@ static __inline__ int atomic_fetch_##op(int i, atomic_t * v)		      \
 		raw_local_irq_restore(flags);				      \
 	}								      \
 									      \
-	smp_llsc_mb();							      \
-									      \
 	return result;							      \
 }
 
@@ -189,6 +181,11 @@ static __inline__ int atomic_fetch_##op(int i, atomic_t * v)		      \
 ATOMIC_OPS(add, +=, addu)
 ATOMIC_OPS(sub, -=, subu)
 
+#define atomic_add_return_relaxed	atomic_add_return_relaxed
+#define atomic_sub_return_relaxed	atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
+
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op, c_op, asm_op)					      \
 	ATOMIC_OP(op, c_op, asm_op)					      \
@@ -198,6 +195,10 @@ ATOMIC_OPS(and, &=, and)
 ATOMIC_OPS(or, |=, or)
 ATOMIC_OPS(xor, ^=, xor)
 
+#define atomic_fetch_and_relaxed	atomic_fetch_and_relaxed
+#define atomic_fetch_or_relaxed		atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed	atomic_fetch_xor_relaxed
+
 #undef ATOMIC_OPS
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
@@ -420,12 +421,10 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v)		      \
 }
 
 #define ATOMIC64_OP_RETURN(op, c_op, asm_op)				      \
-static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	      \
+static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
 {									      \
 	long result;							      \
 									      \
-	smp_mb__before_llsc();						      \
-									      \
 	if (kernel_uses_llsc && R10000_LLSC_WAR) {			      \
 		long temp;						      \
 									      \
@@ -467,18 +466,14 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	      \
 		raw_local_irq_restore(flags);				      \
 	}								      \
 									      \
-	smp_llsc_mb();							      \
-									      \
 	return result;							      \
 }
 
 #define ATOMIC64_FETCH_OP(op, c_op, asm_op)				      \
-static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v)	      \
+static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v)  \
 {									      \
 	long result;							      \
 									      \
-	smp_mb__before_llsc();						      \
-									      \
 	if (kernel_uses_llsc && R10000_LLSC_WAR) {			      \
 		long temp;						      \
 									      \
@@ -519,8 +514,6 @@ static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v)	      \
 		raw_local_irq_restore(flags);				      \
 	}								      \
 									      \
-	smp_llsc_mb();							      \
-									      \
 	return result;							      \
 }
 
@@ -532,6 +525,11 @@ static __inline__ long atomic64_fetch_##op(long i, atomic64_t * v)	      \
 ATOMIC64_OPS(add, +=, daddu)
 ATOMIC64_OPS(sub, -=, dsubu)
 
+#define atomic64_add_return_relaxed	atomic64_add_return_relaxed
+#define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
+
 #undef ATOMIC64_OPS
 #define ATOMIC64_OPS(op, c_op, asm_op)					      \
 	ATOMIC64_OP(op, c_op, asm_op)					      \
@@ -541,6 +539,10 @@ ATOMIC64_OPS(and, &=, and)
 ATOMIC64_OPS(or, |=, or)
 ATOMIC64_OPS(xor, ^=, xor)
 
+#define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
+#define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
+
 #undef ATOMIC64_OPS
 #undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
-- 
cgit v1.2.3


From c5379ba8fccd99d5f99632c789f0393d84a57805 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Thu, 16 Jun 2016 15:42:25 +0200
Subject: ARM: mvebu: fix HW I/O coherency related deadlocks

Until now, our understanding for HW I/O coherency to work on the
Cortex-A9 based Marvell SoC was that only the PCIe regions should be
mapped strongly-ordered. However, we were still encountering some
deadlocks, especially when testing the CESA crypto engine. After
checking with the HW designers, it was concluded that all the MMIO
registers should be mapped as strongly ordered for the HW I/O coherency
mechanism to work properly.

This fixes some easy to reproduce deadlocks with the CESA crypto engine
driver (dmcrypt on a sufficiently large disk partition).

Tested-by: Terry Stockert <stockert@inkblotadmirer.me>
Tested-by: Romain Perier <romain.perier@free-electrons.com>
Cc: Terry Stockert <stockert@inkblotadmirer.me>
Cc: Romain Perier <romain.perier@free-electrons.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
---
 arch/arm/mach-mvebu/coherency.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c
index 7e989d61159c..474abff7e855 100644
--- a/arch/arm/mach-mvebu/coherency.c
+++ b/arch/arm/mach-mvebu/coherency.c
@@ -162,22 +162,16 @@ exit:
 }
 
 /*
- * This ioremap hook is used on Armada 375/38x to ensure that PCIe
- * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This
- * is needed as a workaround for a deadlock issue between the PCIe
- * interface and the cache controller.
+ * This ioremap hook is used on Armada 375/38x to ensure that all MMIO
+ * areas are mapped as MT_UNCACHED instead of MT_DEVICE. This is
+ * needed for the HW I/O coherency mechanism to work properly without
+ * deadlock.
  */
 static void __iomem *
-armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
-			      unsigned int mtype, void *caller)
+armada_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
+			 unsigned int mtype, void *caller)
 {
-	struct resource pcie_mem;
-
-	mvebu_mbus_get_pcie_mem_aperture(&pcie_mem);
-
-	if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end)
-		mtype = MT_UNCACHED;
-
+	mtype = MT_UNCACHED;
 	return __arm_ioremap_caller(phys_addr, size, mtype, caller);
 }
 
@@ -186,7 +180,7 @@ static void __init armada_375_380_coherency_init(struct device_node *np)
 	struct device_node *cache_dn;
 
 	coherency_cpu_base = of_iomap(np, 0);
-	arch_ioremap_caller = armada_pcie_wa_ioremap_caller;
+	arch_ioremap_caller = armada_wa_ioremap_caller;
 
 	/*
 	 * We should switch the PL310 to I/O coherency mode only if
-- 
cgit v1.2.3


From 6a02734d420fca778554878d03017017537d92e1 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Thu, 16 Jun 2016 15:42:26 +0200
Subject: ARM: mvebu: map PCI I/O regions strongly ordered

In order for HW I/O coherency to work on Cortex-A9 based Marvell SoCs,
all MMIO registers must be mapped strongly ordered. In commit
1c8c3cf0b5239 ("ARM: 8060/1: mm: allow sub-architectures to override PCI
I/O memory type") we implemented a new function,
pci_ioremap_set_mem_type(), that allow sub-architecture code to override
the memory type used to map PCI I/O regions.

In the discussion around this patch series [1], Arnd Bergmann made the
comment that maybe all PCI I/O regions should be mapped
strongly-ordered, which would have made our proposal to add
pci_ioremap_set_mem_type() irrelevant. So, we submitted a patch [2] that
did what Arnd suggested.

However, Russell in the end merged our initial proposal to add
pci_ioremap_set_mem_type(), but it was never used anywhere. Further
discussion with Arnd and other folks on IRC lead to the conclusion that
in fact using strongly-ordered for all platforms was maybe not
desirable, and therefore, using pci_ioremap_set_mem_type() was the most
appropriate solution.

As a consequence, this commit finally adds the
pci_ioremap_set_mem_type() call in the mach-mvebu platform code, which
was originally part of our initial patch series [3] and is necessary for
the whole mechanism to work.

[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-May/256565.html
[2] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-May/256755.html
[3] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-May/256563.html

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
---
 arch/arm/mach-mvebu/coherency.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c
index 474abff7e855..e80f0dde2189 100644
--- a/arch/arm/mach-mvebu/coherency.c
+++ b/arch/arm/mach-mvebu/coherency.c
@@ -181,6 +181,7 @@ static void __init armada_375_380_coherency_init(struct device_node *np)
 
 	coherency_cpu_base = of_iomap(np, 0);
 	arch_ioremap_caller = armada_wa_ioremap_caller;
+	pci_ioremap_set_mem_type(MT_UNCACHED);
 
 	/*
 	 * We should switch the PL310 to I/O coherency mode only if
-- 
cgit v1.2.3


From 929e604efa3dc0522214e0dc18984be23993e9f0 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Thu, 16 Jun 2016 15:42:27 +0200
Subject: ARM: dts: armada-38x: fix MBUS_ID for crypto SRAM on Armada 385
 Linksys

When the support for the Marvell crypto engine was added in the Device
Tree of the various Armada 385 Device Tree files in commit
d716f2e837ac6 ("ARM: mvebu: define crypto SRAM ranges for all armada-38x
boards"), a typo was made in the MBus window attributes for the Armada
385 Linksys board: 0x09/0x05 are used instead of 0x19/0x15. This commit
fixes this typo, which makes the CESA engines operational on Armada 385
Linksys boards.

Reported-by: Terry Stockert <stockert@inkblotadmirer.me>
Cc: Terry Stockert <stockert@inkblotadmirer.me>
Cc: Imre Kaloz <kaloz@openwrt.org>
Cc: Boris Brezillon <boris.brezillon@free-electrons.com>
Cc: <stable@vger.kernel.org>
Fixes: d716f2e837ac6 ("ARM: mvebu: define crypto SRAM ranges for all armada-38x boards")
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
---
 arch/arm/boot/dts/armada-385-linksys.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/armada-385-linksys.dtsi b/arch/arm/boot/dts/armada-385-linksys.dtsi
index 8450944b28e6..22f7a13e20b4 100644
--- a/arch/arm/boot/dts/armada-385-linksys.dtsi
+++ b/arch/arm/boot/dts/armada-385-linksys.dtsi
@@ -58,8 +58,8 @@
 	soc {
 		ranges = <MBUS_ID(0xf0, 0x01) 0 0xf1000000 0x100000
 			  MBUS_ID(0x01, 0x1d) 0 0xfff00000 0x100000
-			  MBUS_ID(0x09, 0x09) 0 0xf1100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0xf1110000 0x10000>;
+			  MBUS_ID(0x09, 0x19) 0 0xf1100000 0x10000
+			  MBUS_ID(0x09, 0x15) 0 0xf1110000 0x10000>;
 
 		internal-regs {
 
-- 
cgit v1.2.3


From 30ce0350381351646ef86b64e6d3840b3869833b Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 17 Jun 2016 16:00:21 -0600
Subject: x86/PCI/VMD: Use untracked irq handler

There is no way to know which device in a VMD triggered an interrupt
without invoking every registered driver's actions. This uses the
untracked irq handler so that a less used device does not trigger
spurious interrupt.

We have been previously recommending users to enable "noirqdebug", but do
not want to force a system setting just to keep this domain functional.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: linux-pci@vger.kernel.org
Cc: Jon Derrick <jonathan.derrick@intel.com>
Link: http://lkml.kernel.org/r/1466200821-29159-2-git-send-email-keith.busch@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/pci/vmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c
index 7792aba266df..613cac7395c4 100644
--- a/arch/x86/pci/vmd.c
+++ b/arch/x86/pci/vmd.c
@@ -195,7 +195,7 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
 	vmdirq->virq = virq;
 
 	irq_domain_set_info(domain, virq, vmdirq->irq->vmd_vector, info->chip,
-			    vmdirq, handle_simple_irq, vmd, NULL);
+			    vmdirq, handle_untracked_irq, vmd, NULL);
 	return 0;
 }
 
-- 
cgit v1.2.3


From a1141e0b5ca6ee3e5e35d5f1a310a5ecb9c96ce5 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Fri, 20 May 2016 10:47:05 -0700
Subject: x86/fpu/xstate: Define and use 'fpu_user_xstate_size'

The kernel xstate area can be in standard or compacted format;
it is always in standard format for user mode. When XSAVES is
enabled, the kernel uses the compacted format and it is necessary
to use a separate fpu_user_xstate_size for signal/ptrace frames.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
[ Rebased the patch and cleaned up the naming. ]
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/8756ec34dabddfc727cda5743195eb81e8caf91c.1463760376.git.yu-cheng.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |  1 -
 arch/x86/include/asm/processor.h  |  1 +
 arch/x86/kernel/fpu/init.c        |  5 ++-
 arch/x86/kernel/fpu/signal.c      | 27 ++++++++++----
 arch/x86/kernel/fpu/xstate.c      | 76 ++++++++++++++++++++++++---------------
 5 files changed, 73 insertions(+), 37 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 38951b0fcc5a..16df2c44ac66 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -39,7 +39,6 @@
 #define REX_PREFIX
 #endif
 
-extern unsigned int xstate_size;
 extern u64 xfeatures_mask;
 extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 62c6cc3cc5d3..0a16a16284f5 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -368,6 +368,7 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
 #endif	/* X86_64 */
 
 extern unsigned int xstate_size;
+extern unsigned int fpu_user_xstate_size;
 
 struct perf_event;
 
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index aacfd7a82cec..5b1928c0aad4 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -195,7 +195,7 @@ static void __init fpu__init_task_struct_size(void)
 }
 
 /*
- * Set up the xstate_size based on the legacy FPU context size.
+ * Set up the user and kernel xstate_size based on the legacy FPU context size.
  *
  * We set this up first, and later it will be overwritten by
  * fpu__init_system_xstate() if the CPU knows about xstates.
@@ -226,6 +226,9 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 		else
 			xstate_size = sizeof(struct fregs_state);
 	}
+
+	fpu_user_xstate_size = xstate_size;
+
 	/*
 	 * Quirk: we don't yet handle the XSAVES* instructions
 	 * correctly, as we don't correctly convert between
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index c6f2a3cee2c2..0d29d4de4209 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -32,7 +32,7 @@ static inline int check_for_xstate(struct fxregs_state __user *buf,
 	/* Check for the first magic field and other error scenarios. */
 	if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
 	    fx_sw->xstate_size < min_xstate_size ||
-	    fx_sw->xstate_size > xstate_size ||
+	    fx_sw->xstate_size > fpu_user_xstate_size ||
 	    fx_sw->xstate_size > fx_sw->extended_size)
 		return -1;
 
@@ -89,7 +89,8 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
 	if (!use_xsave())
 		return err;
 
-	err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
+	err |= __put_user(FP_XSTATE_MAGIC2,
+			  (__u32 *)(buf + fpu_user_xstate_size));
 
 	/*
 	 * Read the xfeatures which we copied (directly from the cpu or
@@ -126,7 +127,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
 	else
 		err = copy_fregs_to_user((struct fregs_state __user *) buf);
 
-	if (unlikely(err) && __clear_user(buf, xstate_size))
+	if (unlikely(err) && __clear_user(buf, fpu_user_xstate_size))
 		err = -EFAULT;
 	return err;
 }
@@ -176,8 +177,19 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 		if (ia32_fxstate)
 			copy_fxregs_to_kernel(&tsk->thread.fpu);
 	} else {
+		/*
+		 * It is a *bug* if kernel uses compacted-format for xsave
+		 * area and we copy it out directly to a signal frame. It
+		 * should have been handled above by saving the registers
+		 * directly.
+		 */
+		if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+			WARN_ONCE(1, "x86/fpu: saving compacted-format xsave area to a signal frame!\n");
+			return -1;
+		}
+
 		fpstate_sanitize_xstate(&tsk->thread.fpu);
-		if (__copy_to_user(buf_fx, xsave, xstate_size))
+		if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
 			return -1;
 	}
 
@@ -344,7 +356,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 
 static inline int xstate_sigframe_size(void)
 {
-	return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
+	return use_xsave() ? fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE :
+			fpu_user_xstate_size;
 }
 
 /*
@@ -388,12 +401,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
  */
 void fpu__init_prepare_fx_sw_frame(void)
 {
-	int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
+	int size = fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE;
 
 	fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
 	fx_sw_reserved.extended_size = size;
 	fx_sw_reserved.xfeatures = xfeatures_mask;
-	fx_sw_reserved.xstate_size = xstate_size;
+	fx_sw_reserved.xstate_size = fpu_user_xstate_size;
 
 	if (config_enabled(CONFIG_IA32_EMULATION) ||
 	    config_enabled(CONFIG_X86_32)) {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 4ea2a59483c7..9c4da358ebb9 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -43,6 +43,13 @@ static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] =
 static unsigned int xstate_sizes[XFEATURE_MAX]   = { [ 0 ... XFEATURE_MAX - 1] = -1};
 static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
 
+/*
+ * The XSAVE area of kernel can be in standard or compacted format;
+ * it is always in standard format for user mode. This is the user
+ * mode standard format size used for signal and ptrace frames.
+ */
+unsigned int fpu_user_xstate_size;
+
 /*
  * Clear all of the X86_FEATURE_* bits that are unavailable
  * when the CPU has no XSAVE support.
@@ -171,7 +178,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
 	 */
 	while (xfeatures) {
 		if (xfeatures & 0x1) {
-			int offset = xstate_offsets[feature_bit];
+			int offset = xstate_comp_offsets[feature_bit];
 			int size = xstate_sizes[feature_bit];
 
 			memcpy((void *)fx + offset,
@@ -533,8 +540,9 @@ static void do_extra_xstate_size_checks(void)
 	XSTATE_WARN_ON(paranoid_xstate_size != xstate_size);
 }
 
+
 /*
- * Calculate total size of enabled xstates in XCR0/xfeatures_mask.
+ * Get total size of enabled xstates in XCR0/xfeatures_mask.
  *
  * Note the SDM's wording here.  "sub-function 0" only enumerates
  * the size of the *user* states.  If we use it to size a buffer
@@ -544,34 +552,33 @@ static void do_extra_xstate_size_checks(void)
  * Note that we do not currently set any bits on IA32_XSS so
  * 'XCR0 | IA32_XSS == XCR0' for now.
  */
-static unsigned int __init calculate_xstate_size(void)
+static unsigned int __init get_xsaves_size(void)
 {
 	unsigned int eax, ebx, ecx, edx;
-	unsigned int calculated_xstate_size;
+	/*
+	 * - CPUID function 0DH, sub-function 1:
+	 *    EBX enumerates the size (in bytes) required by
+	 *    the XSAVES instruction for an XSAVE area
+	 *    containing all the state components
+	 *    corresponding to bits currently set in
+	 *    XCR0 | IA32_XSS.
+	 */
+	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
+	return ebx;
+}
 
-	if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
-		/*
-		 * - CPUID function 0DH, sub-function 0:
-		 *    EBX enumerates the size (in bytes) required by
-		 *    the XSAVE instruction for an XSAVE area
-		 *    containing all the *user* state components
-		 *    corresponding to bits currently set in XCR0.
-		 */
-		cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
-		calculated_xstate_size = ebx;
-	} else {
-		/*
-		 * - CPUID function 0DH, sub-function 1:
-		 *    EBX enumerates the size (in bytes) required by
-		 *    the XSAVES instruction for an XSAVE area
-		 *    containing all the state components
-		 *    corresponding to bits currently set in
-		 *    XCR0 | IA32_XSS.
-		 */
-		cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
-		calculated_xstate_size = ebx;
-	}
-	return calculated_xstate_size;
+static unsigned int __init get_xsave_size(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+	/*
+	 * - CPUID function 0DH, sub-function 0:
+	 *    EBX enumerates the size (in bytes) required by
+	 *    the XSAVE instruction for an XSAVE area
+	 *    containing all the *user* state components
+	 *    corresponding to bits currently set in XCR0.
+	 */
+	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+	return ebx;
 }
 
 /*
@@ -591,7 +598,15 @@ static bool is_supported_xstate_size(unsigned int test_xstate_size)
 static int init_xstate_size(void)
 {
 	/* Recompute the context size for enabled features: */
-	unsigned int possible_xstate_size = calculate_xstate_size();
+	unsigned int possible_xstate_size;
+	unsigned int xsave_size;
+
+	xsave_size = get_xsave_size();
+
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
+		possible_xstate_size = get_xsaves_size();
+	else
+		possible_xstate_size = xsave_size;
 
 	/* Ensure we have the space to store all enabled: */
 	if (!is_supported_xstate_size(possible_xstate_size))
@@ -603,6 +618,11 @@ static int init_xstate_size(void)
 	 */
 	xstate_size = possible_xstate_size;
 	do_extra_xstate_size_checks();
+
+	/*
+	 * User space is always in standard format.
+	 */
+	fpu_user_xstate_size = xsave_size;
 	return 0;
 }
 
-- 
cgit v1.2.3


From bf15a8cf8d14879b785c548728415d36ccb6a33b Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Fri, 20 May 2016 10:47:06 -0700
Subject: x86/fpu/xstate: Rename 'xstate_size' to 'fpu_kernel_xstate_size', to
 distinguish it from 'fpu_user_xstate_size'

User space uses standard format xsave area. fpstate in signal frame
should have standard format size.

To explicitly distinguish between xstate size in kernel space and the
one in user space, we rename 'xstate_size' to 'fpu_kernel_xstate_size'.

Cleanup only, no change in functionality.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
[ Rebased the patch and cleaned up the naming. ]
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/2ecbae347a5152d94be52adf7d0f3b7305d90d99.1463760376.git.yu-cheng.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/processor.h |  2 +-
 arch/x86/kernel/fpu/core.c       |  7 ++++---
 arch/x86/kernel/fpu/init.c       | 20 +++++++++++---------
 arch/x86/kernel/fpu/signal.c     |  2 +-
 arch/x86/kernel/fpu/xstate.c     |  8 ++++----
 5 files changed, 21 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 0a16a16284f5..965c5d212c31 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -367,7 +367,7 @@ DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
 DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
 #endif	/* X86_64 */
 
-extern unsigned int xstate_size;
+extern unsigned int fpu_kernel_xstate_size;
 extern unsigned int fpu_user_xstate_size;
 
 struct perf_event;
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 7d564742e499..c759bd01ec99 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -227,7 +227,7 @@ void fpstate_init(union fpregs_state *state)
 		return;
 	}
 
-	memset(state, 0, xstate_size);
+	memset(state, 0, fpu_kernel_xstate_size);
 
 	if (static_cpu_has(X86_FEATURE_FXSR))
 		fpstate_init_fxstate(&state->fxsave);
@@ -252,7 +252,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	 * leak into the child task:
 	 */
 	if (use_eager_fpu())
-		memset(&dst_fpu->state.xsave, 0, xstate_size);
+		memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
 
 	/*
 	 * Save current FPU registers directly into the child
@@ -271,7 +271,8 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	 */
 	preempt_disable();
 	if (!copy_fpregs_to_fpstate(dst_fpu)) {
-		memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
+		memcpy(&src_fpu->state, &dst_fpu->state,
+		       fpu_kernel_xstate_size);
 
 		if (use_eager_fpu())
 			copy_kernel_to_fpregs(&src_fpu->state);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 5b1928c0aad4..60f3839c5bfa 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -145,8 +145,8 @@ static void __init fpu__init_system_generic(void)
  * This is inherent to the XSAVE architecture which puts all state
  * components into a single, continuous memory block:
  */
-unsigned int xstate_size;
-EXPORT_SYMBOL_GPL(xstate_size);
+unsigned int fpu_kernel_xstate_size;
+EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size);
 
 /* Get alignment of the TYPE. */
 #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
@@ -178,7 +178,7 @@ static void __init fpu__init_task_struct_size(void)
 	 * Add back the dynamically-calculated register state
 	 * size.
 	 */
-	task_size += xstate_size;
+	task_size += fpu_kernel_xstate_size;
 
 	/*
 	 * We dynamically size 'struct fpu', so we require that
@@ -195,7 +195,7 @@ static void __init fpu__init_task_struct_size(void)
 }
 
 /*
- * Set up the user and kernel xstate_size based on the legacy FPU context size.
+ * Set up the user and kernel xstate sizes based on the legacy FPU context size.
  *
  * We set this up first, and later it will be overwritten by
  * fpu__init_system_xstate() if the CPU knows about xstates.
@@ -208,7 +208,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 	on_boot_cpu = 0;
 
 	/*
-	 * Note that xstate_size might be overwriten later during
+	 * Note that xstate sizes might be overwritten later during
 	 * fpu__init_system_xstate().
 	 */
 
@@ -219,15 +219,17 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 		 */
 		setup_clear_cpu_cap(X86_FEATURE_XSAVE);
 		setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
-		xstate_size = sizeof(struct swregs_state);
+		fpu_kernel_xstate_size = sizeof(struct swregs_state);
 	} else {
 		if (boot_cpu_has(X86_FEATURE_FXSR))
-			xstate_size = sizeof(struct fxregs_state);
+			fpu_kernel_xstate_size =
+				sizeof(struct fxregs_state);
 		else
-			xstate_size = sizeof(struct fregs_state);
+			fpu_kernel_xstate_size =
+				sizeof(struct fregs_state);
 	}
 
-	fpu_user_xstate_size = xstate_size;
+	fpu_user_xstate_size = fpu_kernel_xstate_size;
 
 	/*
 	 * Quirk: we don't yet handle the XSAVES* instructions
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 0d29d4de4209..06d80f62c03f 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -263,7 +263,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 	int ia32_fxstate = (buf != buf_fx);
 	struct task_struct *tsk = current;
 	struct fpu *fpu = &tsk->thread.fpu;
-	int state_size = xstate_size;
+	int state_size = fpu_kernel_xstate_size;
 	u64 xfeatures = 0;
 	int fx_only = 0;
 
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 9c4da358ebb9..46abfafe61c8 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -537,7 +537,7 @@ static void do_extra_xstate_size_checks(void)
 		 */
 		paranoid_xstate_size += xfeature_size(i);
 	}
-	XSTATE_WARN_ON(paranoid_xstate_size != xstate_size);
+	XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size);
 }
 
 
@@ -616,7 +616,7 @@ static int init_xstate_size(void)
 	 * The size is OK, we are definitely going to use xsave,
 	 * make it known to the world that we need more space.
 	 */
-	xstate_size = possible_xstate_size;
+	fpu_kernel_xstate_size = possible_xstate_size;
 	do_extra_xstate_size_checks();
 
 	/*
@@ -679,14 +679,14 @@ void __init fpu__init_system_xstate(void)
 		return;
 	}
 
-	update_regset_xstate_info(xstate_size, xfeatures_mask);
+	update_regset_xstate_info(fpu_kernel_xstate_size, xfeatures_mask);
 	fpu__init_prepare_fx_sw_frame();
 	setup_init_fpu_buf();
 	setup_xstate_comp();
 
 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
 		xfeatures_mask,
-		xstate_size,
+		fpu_kernel_xstate_size,
 		boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
 }
 
-- 
cgit v1.2.3


From 7d9370607d28afd454775c623d5447603473a3c3 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Fri, 20 May 2016 10:47:07 -0700
Subject: x86/fpu/xstate: Keep init_fpstate.xsave.header.xfeatures as zero for
 init optimization

Keep init_fpstate.xsave.header.xfeatures as zero for init optimization.
This is important for init optimization that is implemented in processor.
If a bit corresponding to an xstate in xstate_bv is 0, it means the
xstate is in init status and will not be read from memory to the processor
during XRSTOR/XRSTORS instruction. This largely impacts context switch
performance.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/2fb4ec7f18b76e8cda057a8c0038def74a9b8044.1463760376.git.yu-cheng.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 46abfafe61c8..dbfef1b7be7c 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -329,13 +329,11 @@ static void __init setup_init_fpu_buf(void)
 	setup_xstate_features();
 	print_xstate_features();
 
-	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
-		init_fpstate.xsave.header.xfeatures = xfeatures_mask;
-	}
 
 	/*
-	 * Init all the features state with header_bv being 0x0
+	 * Init all the features state with header.xfeatures being 0x0
 	 */
 	copy_kernel_to_xregs_booting(&init_fpstate.xsave);
 
-- 
cgit v1.2.3


From 99aa22d0d8f70d9317727ab40c85b2ead740a6ca Mon Sep 17 00:00:00 2001
From: Yu-cheng Yu <yu-cheng.yu@intel.com>
Date: Fri, 20 May 2016 10:47:08 -0700
Subject: x86/fpu/xstate: Copy xstate registers directly to the signal frame
 when compacted format is in use

XSAVES is a kernel instruction and uses a compacted format. When working
with user space, the kernel should provide standard-format, non-supervisor
state data. We cannot do __copy_to_user() from a compacted-format kernel
xstate area to a signal frame.

Dave Hansen proposes this method to simplify copy xstate directly to user.

This patch is based on an earlier patch from Fenghua Yu <fenghua.yu@intel.com>

Originally-from: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/c36f419d525517d04209a28dd8e1e5af9000036e.1463760376.git.yu-cheng.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h | 1 +
 arch/x86/kernel/fpu/signal.c      | 3 ++-
 arch/x86/kernel/fpu/xstate.c      | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 16df2c44ac66..d812cf361282 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -47,5 +47,6 @@ extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
 void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
+int using_compacted_format(void);
 
 #endif
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 06d80f62c03f..8aa96cbb5dfb 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -8,6 +8,7 @@
 #include <asm/fpu/internal.h>
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
+#include <asm/fpu/xstate.h>
 
 #include <asm/sigframe.h>
 #include <asm/trace/fpu.h>
@@ -169,7 +170,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-	if (fpregs_active()) {
+	if (fpregs_active() || using_compacted_format()) {
 		/* Save the live register state to the user directly. */
 		if (copy_fpregs_to_sigframe(buf_fx))
 			return -1;
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index dbfef1b7be7c..0b01f003df8b 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -420,7 +420,7 @@ static int xfeature_size(int xfeature_nr)
  * that it is obvious which aspect of 'XSAVES' is being handled
  * by the calling code.
  */
-static int using_compacted_format(void)
+int using_compacted_format(void)
 {
 	return boot_cpu_has(X86_FEATURE_XSAVES);
 }
-- 
cgit v1.2.3


From 4aef66c8ae91d00affeeb24cfb176b53354ac969 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 17 Jun 2016 17:02:01 +0200
Subject: locking/atomic, arch/arc: Fix build

Resolve conflict between commits:

  fbffe892e525 ("locking/atomic, arch/arc: Implement atomic_fetch_{add,sub,and,andnot,or,xor}()")

and:

  ed6aefed726a ("Revert "ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff"")

Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nigel Topham <ntopham@synopsys.com>
Cc: Noam Camus <noamc@ezchip.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-snps-arc@lists.infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arc/include/asm/atomic.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index bd9c51cb2bfd..4e3c1b6b0806 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -71,7 +71,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 {									\
 	unsigned int val, orig;						\
-	SCOND_FAIL_RETRY_VAR_DEF                                        \
 									\
 	/*								\
 	 * Explicit full memory barrier needed before/after as		\
@@ -84,11 +83,8 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 	"	" #asm_op " %[val], %[orig], %[i]	\n"		\
 	"	scond   %[val], [%[ctr]]		\n"		\
 	"						\n"		\
-	SCOND_FAIL_RETRY_ASM						\
-									\
 	: [val]	"=&r"	(val),						\
 	  [orig] "=&r" (orig)						\
-	  SCOND_FAIL_RETRY_VARS						\
 	: [ctr]	"r"	(&v->counter),					\
 	  [i]	"ir"	(i)						\
 	: "cc");							\
@@ -199,10 +195,6 @@ ATOMIC_OPS(andnot, &= ~, bic)
 ATOMIC_OPS(or, |=, or)
 ATOMIC_OPS(xor, ^=, xor)
 
-#undef SCOND_FAIL_RETRY_VAR_DEF
-#undef SCOND_FAIL_RETRY_ASM
-#undef SCOND_FAIL_RETRY_VARS
-
 #else /* CONFIG_ARC_PLAT_EZNPS */
 
 static inline int atomic_read(const atomic_t *v)
-- 
cgit v1.2.3


From 86a664d58f3ba2398a378dc9da6d4cfa737d2281 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 17 Jun 2016 17:05:38 +0200
Subject: locking/atomic, arch/m68k: Remove comment

I misread the inline asm. It uses a rare construct to provide an input
to a previously declared output to do the atomic_read().

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andreas Schwab <schwab@linux-m68k.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Cc: linux-m68k@lists.linux-m68k.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/m68k/include/asm/atomic.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 3e03de7ae33b..cf4c3a7b1a45 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -38,13 +38,6 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 
 #ifdef CONFIG_RMW_INSNS
 
-/*
- * Am I reading these CAS loops right in that %2 is the old value and the first
- * iteration uses an uninitialized value?
- *
- * Would it not make sense to add: tmp = atomic_read(v); to avoid this?
- */
-
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
 static inline int atomic_##op##_return(int i, atomic_t *v)		\
 {									\
-- 
cgit v1.2.3


From b3b630b26ae87a54e2f396b459aab0cd2286fc77 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 20 Jun 2016 22:57:22 +0200
Subject: ARM: dts: sunxi: Add pll3 to simplefb nodes clocks lists

Now that we've a clock node describing pll3 we must add it to the
simplefb nodes clocks lists to avoid it getting turned off when
simplefb is used.

This fixes the screen going black when using simplefb.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm/boot/dts/sun4i-a10.dtsi  | 21 ++++++++++++---------
 arch/arm/boot/dts/sun5i-a10s.dtsi | 11 ++++++-----
 arch/arm/boot/dts/sun7i-a20.dtsi  | 11 ++++++-----
 3 files changed, 24 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index a03e56fb5dbc..ca58eb279d55 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -65,8 +65,9 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_be0-lcd0-hdmi";
-			clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>,
-				 <&ahb_gates 44>, <&dram_gates 26>;
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+				 <&ahb_gates 43>, <&ahb_gates 44>,
+				 <&dram_gates 26>;
 			status = "disabled";
 		};
 
@@ -74,8 +75,9 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_fe0-de_be0-lcd0-hdmi";
-			clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>,
-				 <&ahb_gates 44>, <&ahb_gates 46>,
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+				 <&ahb_gates 43>, <&ahb_gates 44>,
+				 <&ahb_gates 46>,
 				 <&dram_gates 25>, <&dram_gates 26>;
 			status = "disabled";
 		};
@@ -84,9 +86,9 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_fe0-de_be0-lcd0";
-			clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 44>,
-				 <&ahb_gates 46>, <&dram_gates 25>,
-				 <&dram_gates 26>;
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+				 <&ahb_gates 44>, <&ahb_gates 46>,
+				 <&dram_gates 25>, <&dram_gates 26>;
 			status = "disabled";
 		};
 
@@ -94,8 +96,9 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_fe0-de_be0-lcd0-tve0";
-			clocks = <&pll5 1>, <&ahb_gates 34>, <&ahb_gates 36>,
-				 <&ahb_gates 44>, <&ahb_gates 46>,
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 34>,
+				 <&ahb_gates 36>, <&ahb_gates 44>,
+				 <&ahb_gates 46>,
 				 <&dram_gates 5>, <&dram_gates 25>, <&dram_gates 26>;
 			status = "disabled";
 		};
diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi
index bddd0de88af6..367f33012493 100644
--- a/arch/arm/boot/dts/sun5i-a10s.dtsi
+++ b/arch/arm/boot/dts/sun5i-a10s.dtsi
@@ -65,8 +65,8 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_be0-lcd0-hdmi";
-			clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>,
-				 <&ahb_gates 44>;
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+				 <&ahb_gates 43>, <&ahb_gates 44>;
 			status = "disabled";
 		};
 
@@ -74,7 +74,8 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_be0-lcd0";
-			clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 44>;
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+				 <&ahb_gates 44>;
 			status = "disabled";
 		};
 
@@ -82,8 +83,8 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_be0-lcd0-tve0";
-			clocks = <&pll5 1>, <&ahb_gates 34>, <&ahb_gates 36>,
-				 <&ahb_gates 44>;
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 34>,
+				 <&ahb_gates 36>, <&ahb_gates 44>;
 			status = "disabled";
 		};
 	};
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index febdf4c72fb0..f480051c1f8a 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -67,8 +67,9 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_be0-lcd0-hdmi";
-			clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>,
-				 <&ahb_gates 44>, <&dram_gates 26>;
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+				 <&ahb_gates 43>, <&ahb_gates 44>,
+				 <&dram_gates 26>;
 			status = "disabled";
 		};
 
@@ -76,8 +77,8 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_be0-lcd0";
-			clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 44>,
-				 <&dram_gates 26>;
+			clocks = <&pll3>, <&pll5 1>, <&ahb_gates 36>,
+				 <&ahb_gates 44>, <&dram_gates 26>;
 			status = "disabled";
 		};
 
@@ -85,7 +86,7 @@
 			compatible = "allwinner,simple-framebuffer",
 				     "simple-framebuffer";
 			allwinner,pipeline = "de_be0-lcd0-tve0";
-			clocks = <&pll5 1>,
+			clocks = <&pll3>, <&pll5 1>,
 				 <&ahb_gates 34>, <&ahb_gates 36>, <&ahb_gates 44>,
 				 <&dram_gates 5>, <&dram_gates 26>;
 			status = "disabled";
-- 
cgit v1.2.3


From b7271b9f3e18181559b96a610f4e42bdb04b07f5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 22 Jun 2016 11:16:49 +0200
Subject: locking/atomic, arch/tile: Fix tilepro build

The tilepro change wasn't ever compiled it seems (the 0day built bot
also doesn't have a toolchain for it).

Make it work.

The thing that makes the patch bigger than desired is namespace
collision with the C11 __atomic builtin functions. So rename the
tilepro functions to __atomic32.

Reported-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 1af5de9af138 ("locking/atomic, arch/tile: Implement atomic{,64}_fetch_{add,sub,and,or,xor}()")
Link: http://lkml.kernel.org/r/20160622091649.GB30154@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/tile/include/asm/atomic_32.h | 24 ++++++++++++------------
 arch/tile/include/asm/futex.h     | 14 +++++++-------
 arch/tile/lib/atomic_32.c         | 16 ++++++++--------
 arch/tile/lib/atomic_asm_32.S     | 21 +++++++++++++--------
 4 files changed, 40 insertions(+), 35 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index da8eb4ed3752..a93774255136 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -143,15 +143,15 @@ static inline void atomic64_##op(long long i, atomic64_t *v)	\
 {								\
 	_atomic64_fetch_##op(&v->counter, i);			\
 }								\
-static inline void atomic64_##op(long long i, atomic64_t *v)	\
+static inline long long atomic64_fetch_##op(long long i, atomic64_t *v)	\
 {								\
 	smp_mb();						\
 	return _atomic64_fetch_##op(&v->counter, i);		\
 }
 
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+ATOMIC64_OPS(xor)
 
 #undef ATOMIC64_OPS
 
@@ -266,16 +266,16 @@ struct __get_user {
 	unsigned long val;
 	int err;
 };
-extern struct __get_user __atomic_cmpxchg(volatile int *p,
+extern struct __get_user __atomic32_cmpxchg(volatile int *p,
 					  int *lock, int o, int n);
-extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
+extern struct __get_user __atomic32_xchg(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_xchg_add(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_xchg_add_unless(volatile int *p,
 						  int *lock, int o, int n);
-extern struct __get_user __atomic_fetch_or(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_fetch_and(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_fetch_andn(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_fetch_xor(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_or(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_and(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_andn(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_xor(volatile int *p, int *lock, int n);
 extern long long __atomic64_cmpxchg(volatile long long *p, int *lock,
 					long long o, long long n);
 extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n);
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
index 1a6ef1b69cb1..e64a1b75fc38 100644
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -80,16 +80,16 @@
 		ret = gu.err;						\
 	}
 
-#define __futex_set() __futex_call(__atomic_xchg)
-#define __futex_add() __futex_call(__atomic_xchg_add)
-#define __futex_or() __futex_call(__atomic_or)
-#define __futex_andn() __futex_call(__atomic_andn)
-#define __futex_xor() __futex_call(__atomic_xor)
+#define __futex_set() __futex_call(__atomic32_xchg)
+#define __futex_add() __futex_call(__atomic32_xchg_add)
+#define __futex_or() __futex_call(__atomic32_fetch_or)
+#define __futex_andn() __futex_call(__atomic32_fetch_andn)
+#define __futex_xor() __futex_call(__atomic32_fetch_xor)
 
 #define __futex_cmpxchg()						\
 	{								\
-		struct __get_user gu = __atomic_cmpxchg((u32 __force *)uaddr, \
-							lock, oldval, oparg); \
+		struct __get_user gu = __atomic32_cmpxchg((u32 __force *)uaddr, \
+							  lock, oldval, oparg); \
 		val = gu.val;						\
 		ret = gu.err;						\
 	}
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index 5b6bd932c9c7..f8128800dbf5 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -61,13 +61,13 @@ static inline int *__atomic_setup(volatile void *v)
 
 int _atomic_xchg(int *v, int n)
 {
-	return __atomic_xchg(v, __atomic_setup(v), n).val;
+	return __atomic32_xchg(v, __atomic_setup(v), n).val;
 }
 EXPORT_SYMBOL(_atomic_xchg);
 
 int _atomic_xchg_add(int *v, int i)
 {
-	return __atomic_xchg_add(v, __atomic_setup(v), i).val;
+	return __atomic32_xchg_add(v, __atomic_setup(v), i).val;
 }
 EXPORT_SYMBOL(_atomic_xchg_add);
 
@@ -78,37 +78,37 @@ int _atomic_xchg_add_unless(int *v, int a, int u)
 	 * to use the first argument consistently as the "old value"
 	 * in the assembly, as is done for _atomic_cmpxchg().
 	 */
-	return __atomic_xchg_add_unless(v, __atomic_setup(v), u, a).val;
+	return __atomic32_xchg_add_unless(v, __atomic_setup(v), u, a).val;
 }
 EXPORT_SYMBOL(_atomic_xchg_add_unless);
 
 int _atomic_cmpxchg(int *v, int o, int n)
 {
-	return __atomic_cmpxchg(v, __atomic_setup(v), o, n).val;
+	return __atomic32_cmpxchg(v, __atomic_setup(v), o, n).val;
 }
 EXPORT_SYMBOL(_atomic_cmpxchg);
 
 unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_fetch_or((int *)p, __atomic_setup(p), mask).val;
+	return __atomic32_fetch_or((int *)p, __atomic_setup(p), mask).val;
 }
 EXPORT_SYMBOL(_atomic_fetch_or);
 
 unsigned long _atomic_fetch_and(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_fetch_and((int *)p, __atomic_setup(p), mask).val;
+	return __atomic32_fetch_and((int *)p, __atomic_setup(p), mask).val;
 }
 EXPORT_SYMBOL(_atomic_fetch_and);
 
 unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_fetch_andn((int *)p, __atomic_setup(p), mask).val;
+	return __atomic32_fetch_andn((int *)p, __atomic_setup(p), mask).val;
 }
 EXPORT_SYMBOL(_atomic_fetch_andn);
 
 unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask)
 {
-	return __atomic_fetch_xor((int *)p, __atomic_setup(p), mask).val;
+	return __atomic32_fetch_xor((int *)p, __atomic_setup(p), mask).val;
 }
 EXPORT_SYMBOL(_atomic_fetch_xor);
 
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index 507abdd2bf9a..1a70e6c0f259 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -172,15 +172,20 @@ STD_ENTRY_SECTION(__atomic\name, .text.atomic)
 	.endif
 	.endm
 
-atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
-atomic_op _xchg, 32, "move r24, r2"
-atomic_op _xchg_add, 32, "add r24, r22, r2"
-atomic_op _xchg_add_unless, 32, \
+
+/*
+ * Use __atomic32 prefix to avoid collisions with GCC builtin __atomic functions.
+ */
+
+atomic_op 32_cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
+atomic_op 32_xchg, 32, "move r24, r2"
+atomic_op 32_xchg_add, 32, "add r24, r22, r2"
+atomic_op 32_xchg_add_unless, 32, \
 	"sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
-atomic_op _fetch_or, 32, "or r24, r22, r2"
-atomic_op _fetch_and, 32, "and r24, r22, r2"
-atomic_op _fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2"
-atomic_op _fetch_xor, 32, "xor r24, r22, r2"
+atomic_op 32_fetch_or, 32, "or r24, r22, r2"
+atomic_op 32_fetch_and, 32, "and r24, r22, r2"
+atomic_op 32_fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2"
+atomic_op 32_fetch_xor, 32, "xor r24, r22, r2"
 
 atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
 	{ bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
-- 
cgit v1.2.3


From 5691e03593cb9f7abfee93f4581de8933f1d6630 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 9 May 2016 09:11:56 +0200
Subject: tty: frv, remove unused serial macros

STD_COM_FLAGS needs not be defined as it is not used anywhere on frv.

SERIAL_PORT_DFNS is defined to be empty. 8250 is aware of empty
SERIAL_PORT_DFNS and does:
 #ifndef SERIAL_PORT_DFNS
 #define SERIAL_PORT_DFNS
 #endif

So no need to define it on frv.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/frv/include/asm/serial.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/frv/include/asm/serial.h b/arch/frv/include/asm/serial.h
index bce0d0d07e60..614c6d76789a 100644
--- a/arch/frv/include/asm/serial.h
+++ b/arch/frv/include/asm/serial.h
@@ -12,7 +12,3 @@
  * the base baud is derived from the clock speed and so is variable
  */
 #define BASE_BAUD 0
-
-#define STD_COM_FLAGS		UPF_BOOT_AUTOCONF
-
-#define SERIAL_PORT_DFNS
-- 
cgit v1.2.3


From 6137b7a62978915d76db69e17e25e8e0b1057254 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 9 May 2016 09:11:57 +0200
Subject: tty: stop defining STD_COM_FLAGS in drivers

STD_COM_FLAGS is mostly a bad name for what the drivers thinks it is.
Stop using it and pass the flags directly.

cyclades defines it as 0, so we do not assign anything to freshly
tty_port_init'ed structure.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/xtensa/platforms/xt2000/setup.c | 1 -
 drivers/tty/cyclades.c               | 3 ---
 drivers/tty/serial/m32r_sio.c        | 5 +----
 3 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/xtensa/platforms/xt2000/setup.c b/arch/xtensa/platforms/xt2000/setup.c
index 5f4bd71971d6..4904c5c16918 100644
--- a/arch/xtensa/platforms/xt2000/setup.c
+++ b/arch/xtensa/platforms/xt2000/setup.c
@@ -113,7 +113,6 @@ void platform_heartbeat(void)
 }
 
 //#define RS_TABLE_SIZE 2
-//#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF|UPF_SKIP_TEST)
 
 #define _SERIAL_PORT(_base,_irq)					\
 {									\
diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c
index fcc4962fa4c6..5e4fa9206861 100644
--- a/drivers/tty/cyclades.c
+++ b/drivers/tty/cyclades.c
@@ -93,8 +93,6 @@ static void cy_send_xchar(struct tty_struct *tty, char ch);
 #define	SERIAL_XMIT_SIZE	(min(PAGE_SIZE, 4096))
 #endif
 
-#define STD_COM_FLAGS (0)
-
 /* firmware stuff */
 #define ZL_MAX_BLOCKS	16
 #define DRIVER_VERSION	0x02010203
@@ -3083,7 +3081,6 @@ static int cy_init_card(struct cyclades_card *cinfo)
 
 		info->port.closing_wait = CLOSING_WAIT_DELAY;
 		info->port.close_delay = 5 * HZ / 10;
-		info->port.flags = STD_COM_FLAGS;
 		init_completion(&info->shutdown_wait);
 
 		if (cy_is_Z(cinfo)) {
diff --git a/drivers/tty/serial/m32r_sio.c b/drivers/tty/serial/m32r_sio.c
index 68765f7c2645..1b01504cf306 100644
--- a/drivers/tty/serial/m32r_sio.c
+++ b/drivers/tty/serial/m32r_sio.c
@@ -51,9 +51,6 @@
 
 #define PASS_LIMIT	256
 
-/* Standard COM flags */
-#define STD_COM_FLAGS (UPF_BOOT_AUTOCONF | UPF_SKIP_TEST)
-
 static const struct {
 	unsigned int port;
 	unsigned int irq;
@@ -892,7 +889,7 @@ static void __init m32r_sio_init_ports(void)
 		up->port.iobase   = old_serial_port[i].port;
 		up->port.irq      = irq_canonicalize(old_serial_port[i].irq);
 		up->port.uartclk  = BAUD_RATE * 16;
-		up->port.flags    = STD_COM_FLAGS;
+		up->port.flags    = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST;
 		up->port.membase  = 0;
 		up->port.iotype   = 0;
 		up->port.regshift = 0;
-- 
cgit v1.2.3


From 65fe935dd2387a4faf15314c73f5e6d31ef0217e Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 13 Jun 2016 15:10:02 -0700
Subject: x86/KASLR, x86/power: Remove x86 hibernation restrictions

With the following fix:

  70595b479ce1 ("x86/power/64: Fix crash whan the hibernation code passes control to the image kernel")

... there is no longer a problem with hibernation resuming a
KASLR-booted kernel image, so remove the restriction.

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Linux PM list <linux-pm@vger.kernel.org>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: linux-doc@vger.kernel.org
Link: http://lkml.kernel.org/r/20160613221002.GA29719@www.outflux.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/kernel-parameters.txt | 10 ++++------
 arch/x86/boot/compressed/kaslr.c    |  7 -------
 kernel/power/hibernate.c            |  6 ------
 3 files changed, 4 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 82b42c958d1c..fa8c6d470ad2 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1803,12 +1803,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	js=		[HW,JOY] Analog joystick
 			See Documentation/input/joystick.txt.
 
-	kaslr/nokaslr	[X86]
-			Enable/disable kernel and module base offset ASLR
-			(Address Space Layout Randomization) if built into
-			the kernel. When CONFIG_HIBERNATION is selected,
-			kASLR is disabled by default. When kASLR is enabled,
-			hibernation will be disabled.
+	nokaslr		[KNL]
+			When CONFIG_RANDOMIZE_BASE is set, this disables
+			kernel and module base offset ASLR (Address Space
+			Layout Randomization).
 
 	keepinitrd	[HW,ARM]
 
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index cfeb0259ed81..dff42177cb0c 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -471,17 +471,10 @@ unsigned char *choose_random_location(unsigned long input,
 	unsigned long choice = output;
 	unsigned long random_addr;
 
-#ifdef CONFIG_HIBERNATION
-	if (!cmdline_find_option_bool("kaslr")) {
-		warn("KASLR disabled: 'kaslr' not on cmdline (hibernation selected).");
-		goto out;
-	}
-#else
 	if (cmdline_find_option_bool("nokaslr")) {
 		warn("KASLR disabled: 'nokaslr' on cmdline.");
 		goto out;
 	}
-#endif
 
 	boot_params->hdr.loadflags |= KASLR_FLAG;
 
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index fca9254280ee..9021387c6ff4 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -1154,11 +1154,6 @@ static int __init nohibernate_setup(char *str)
 	return 1;
 }
 
-static int __init kaslr_nohibernate_setup(char *str)
-{
-	return nohibernate_setup(str);
-}
-
 static int __init page_poison_nohibernate_setup(char *str)
 {
 #ifdef CONFIG_PAGE_POISONING_ZERO
@@ -1182,5 +1177,4 @@ __setup("hibernate=", hibernate_setup);
 __setup("resumewait", resumewait_setup);
 __setup("resumedelay=", resumedelay_setup);
 __setup("nohibernate", nohibernate_setup);
-__setup("kaslr", kaslr_nohibernate_setup);
 __setup("page_poison=", page_poison_nohibernate_setup);
-- 
cgit v1.2.3


From 98f78525371b55ccd1c480207ce10296c72fa340 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 May 2016 15:45:30 -0700
Subject: x86/boot: Refuse to build with data relocations

The compressed kernel is built with -fPIC/-fPIE so that it can run in any
location a bootloader happens to put it. However, since ELF relocation
processing is not happening (and all the relocation information has
already been stripped at link time), none of the code can use data
relocations (e.g. static assignments of pointers). This is already noted
in a warning comment at the top of misc.c, but this adds an explicit
check for the condition during the linking stage to block any such bugs
from appearing.

If this was in place with the earlier bug in pagetable.c, the build
would fail like this:

  ...
    CC      arch/x86/boot/compressed/pagetable.o
    DATAREL arch/x86/boot/compressed/vmlinux
  error: arch/x86/boot/compressed/pagetable.o has data relocations!
  make[2]: *** [arch/x86/boot/compressed/vmlinux] Error 1
  ...

A clean build shows:

  ...
    CC      arch/x86/boot/compressed/pagetable.o
    DATAREL arch/x86/boot/compressed/vmlinux
    LD      arch/x86/boot/compressed/vmlinux
  ...

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: H.J. Lu <hjl.tools@gmail.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1464216334-17200-2-git-send-email-keescook@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/compressed/Makefile | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index f1356889204e..536ccfcc01c6 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -85,7 +85,25 @@ vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
 	$(objtree)/drivers/firmware/efi/libstub/lib.a
 vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
 
+# The compressed kernel is built with -fPIC/-fPIE so that a boot loader
+# can place it anywhere in memory and it will still run. However, since
+# it is executed as-is without any ELF relocation processing performed
+# (and has already had all relocation sections stripped from the binary),
+# none of the code can use data relocations (e.g. static assignments of
+# pointer values), since they will be meaningless at runtime. This check
+# will refuse to link the vmlinux if any of these relocations are found.
+quiet_cmd_check_data_rel = DATAREL $@
+define cmd_check_data_rel
+	for obj in $(filter %.o,$^); do \
+		readelf -S $$obj | grep -qF .rel.local && { \
+			echo "error: $$obj has data relocations!" >&2; \
+			exit 1; \
+		} || true; \
+	done
+endef
+
 $(obj)/vmlinux: $(vmlinux-objs-y) FORCE
+	$(call if_changed,check_data_rel)
 	$(call if_changed,ld)
 
 OBJCOPYFLAGS_vmlinux.bin :=  -R .comment -S
-- 
cgit v1.2.3


From 11fdf97a3cd1a5a27625f820ceb74e1caba4fd26 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 May 2016 15:45:31 -0700
Subject: x86/KASLR: Clarify identity map interface

This extracts the call to prepare_level4() into a top-level function
that the user of the pagetable.c interface must call to initialize
the new page tables. For clarity and to match the "finalize" function,
it has been renamed to initialize_identity_maps(). This function also
gains the initialization of mapping_info so we don't have to do it each
time in add_identity_map().

Additionally add copyright notice to the top, to make it clear that the
bulk of the pagetable.c code was written by Yinghai, and that I just
added bugs later. :)

Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: H.J. Lu <hjl.tools@gmail.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1464216334-17200-3-git-send-email-keescook@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/compressed/kaslr.c     |  3 +++
 arch/x86/boot/compressed/misc.h      |  3 +++
 arch/x86/boot/compressed/pagetable.c | 26 ++++++++++++++++----------
 3 files changed, 22 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index dff42177cb0c..54037c9f2def 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -478,6 +478,9 @@ unsigned char *choose_random_location(unsigned long input,
 
 	boot_params->hdr.loadflags |= KASLR_FLAG;
 
+	/* Prepare to add new identity pagetables on demand. */
+	initialize_identity_maps();
+
 	/* Record the various known unsafe memory ranges. */
 	mem_avoid_init(input, input_size, output);
 
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index b6fec1ff10e4..09c4ddd02ac6 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -85,10 +85,13 @@ unsigned char *choose_random_location(unsigned long input_ptr,
 #endif
 
 #ifdef CONFIG_X86_64
+void initialize_identity_maps(void);
 void add_identity_map(unsigned long start, unsigned long size);
 void finalize_identity_maps(void);
 extern unsigned char _pgtable[];
 #else
+static inline void initialize_identity_maps(void)
+{ }
 static inline void add_identity_map(unsigned long start, unsigned long size)
 { }
 static inline void finalize_identity_maps(void)
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index 34b95df14e69..6e31a6aac4d3 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -2,6 +2,9 @@
  * This code is used on x86_64 to create page table identity mappings on
  * demand by building up a new set of page tables (or appending to the
  * existing ones), and then switching over to them when ready.
+ *
+ * Copyright (C) 2015-2016  Yinghai Lu
+ * Copyright (C)      2016  Kees Cook
  */
 
 /*
@@ -59,9 +62,21 @@ static struct alloc_pgt_data pgt_data;
 /* The top level page table entry pointer. */
 static unsigned long level4p;
 
+/*
+ * Mapping information structure passed to kernel_ident_mapping_init().
+ * Due to relocation, pointers must be assigned at run time not build time.
+ */
+static struct x86_mapping_info mapping_info = {
+	.pmd_flag       = __PAGE_KERNEL_LARGE_EXEC,
+};
+
 /* Locates and clears a region for a new top level page table. */
-static void prepare_level4(void)
+void initialize_identity_maps(void)
 {
+	/* Init mapping_info with run-time function/buffer pointers. */
+	mapping_info.alloc_pgt_page = alloc_pgt_page;
+	mapping_info.context = &pgt_data;
+
 	/*
 	 * It should be impossible for this not to already be true,
 	 * but since calling this a second time would rewind the other
@@ -96,17 +111,8 @@ static void prepare_level4(void)
  */
 void add_identity_map(unsigned long start, unsigned long size)
 {
-	struct x86_mapping_info mapping_info = {
-		.alloc_pgt_page	= alloc_pgt_page,
-		.context	= &pgt_data,
-		.pmd_flag	= __PAGE_KERNEL_LARGE_EXEC,
-	};
 	unsigned long end = start + size;
 
-	/* Make sure we have a top level page table ready to use. */
-	if (!level4p)
-		prepare_level4();
-
 	/* Align boundary to 2M. */
 	start = round_down(start, PMD_SIZE);
 	end = round_up(end, PMD_SIZE);
-- 
cgit v1.2.3


From 8391c73c96f28d4e8c40fd401fd0c9c04391b44a Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe@redhat.com>
Date: Wed, 25 May 2016 15:45:32 -0700
Subject: x86/KASLR: Randomize virtual address separately

The current KASLR implementation randomizes the physical and virtual
addresses of the kernel together (both are offset by the same amount). It
calculates the delta of the physical address where vmlinux was linked
to load and where it is finally loaded. If the delta is not equal to 0
(i.e. the kernel was relocated), relocation handling needs be done.

On 64-bit, this patch randomizes both the physical address where kernel
is decompressed and the virtual address where kernel text is mapped and
will execute from. We now have two values being chosen, so the function
arguments are reorganized to pass by pointer so they can be directly
updated. Since relocation handling only depends on the virtual address,
we must check the virtual delta, not the physical delta for processing
kernel relocations. This also populates the page table for the new
virtual address range. 32-bit does not support a separate virtual address,
so it continues to use the physical offset for its virtual offset.

Additionally updates the sanity checks done on the resulting kernel
addresses since they are potentially separate now.

[kees: rewrote changelog, limited virtual split to 64-bit only, update checks]
[kees: fix CONFIG_RANDOMIZE_BASE=n boot failure]
Signed-off-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: H.J. Lu <hjl.tools@gmail.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1464216334-17200-4-git-send-email-keescook@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/compressed/kaslr.c | 41 +++++++++++++++++----------------
 arch/x86/boot/compressed/misc.c  | 49 ++++++++++++++++++++++++----------------
 arch/x86/boot/compressed/misc.h  | 22 ++++++++++--------
 3 files changed, 64 insertions(+), 48 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 54037c9f2def..5550546916be 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -463,17 +463,20 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
  * Since this function examines addresses much more numerically,
  * it takes the input and output pointers as 'unsigned long'.
  */
-unsigned char *choose_random_location(unsigned long input,
-				      unsigned long input_size,
-				      unsigned long output,
-				      unsigned long output_size)
+void choose_random_location(unsigned long input,
+			    unsigned long input_size,
+			    unsigned long *output,
+			    unsigned long output_size,
+			    unsigned long *virt_addr)
 {
-	unsigned long choice = output;
 	unsigned long random_addr;
 
+	/* By default, keep output position unchanged. */
+	*virt_addr = *output;
+
 	if (cmdline_find_option_bool("nokaslr")) {
 		warn("KASLR disabled: 'nokaslr' on cmdline.");
-		goto out;
+		return;
 	}
 
 	boot_params->hdr.loadflags |= KASLR_FLAG;
@@ -482,25 +485,25 @@ unsigned char *choose_random_location(unsigned long input,
 	initialize_identity_maps();
 
 	/* Record the various known unsafe memory ranges. */
-	mem_avoid_init(input, input_size, output);
+	mem_avoid_init(input, input_size, *output);
 
 	/* Walk e820 and find a random address. */
-	random_addr = find_random_phys_addr(output, output_size);
+	random_addr = find_random_phys_addr(*output, output_size);
 	if (!random_addr) {
 		warn("KASLR disabled: could not find suitable E820 region!");
-		goto out;
+	} else {
+		/* Update the new physical address location. */
+		if (*output != random_addr) {
+			add_identity_map(random_addr, output_size);
+			*output = random_addr;
+		}
 	}
 
-	/* Always enforce the minimum. */
-	if (random_addr < choice)
-		goto out;
-
-	choice = random_addr;
-
-	add_identity_map(choice, output_size);
-
 	/* This actually loads the identity pagetable on x86_64. */
 	finalize_identity_maps();
-out:
-	return (unsigned char *)choice;
+
+	/* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */
+	if (IS_ENABLED(CONFIG_X86_64))
+		random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size);
+	*virt_addr = random_addr;
 }
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index f14db4e21654..b3c5a5f030ce 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -170,7 +170,8 @@ void __puthex(unsigned long value)
 }
 
 #if CONFIG_X86_NEED_RELOCS
-static void handle_relocations(void *output, unsigned long output_len)
+static void handle_relocations(void *output, unsigned long output_len,
+			       unsigned long virt_addr)
 {
 	int *reloc;
 	unsigned long delta, map, ptr;
@@ -182,11 +183,6 @@ static void handle_relocations(void *output, unsigned long output_len)
 	 * and where it was actually loaded.
 	 */
 	delta = min_addr - LOAD_PHYSICAL_ADDR;
-	if (!delta) {
-		debug_putstr("No relocation needed... ");
-		return;
-	}
-	debug_putstr("Performing relocations... ");
 
 	/*
 	 * The kernel contains a table of relocation addresses. Those
@@ -197,6 +193,20 @@ static void handle_relocations(void *output, unsigned long output_len)
 	 */
 	map = delta - __START_KERNEL_map;
 
+	/*
+	 * 32-bit always performs relocations. 64-bit relocations are only
+	 * needed if KASLR has chosen a different starting address offset
+	 * from __START_KERNEL_map.
+	 */
+	if (IS_ENABLED(CONFIG_X86_64))
+		delta = virt_addr - LOAD_PHYSICAL_ADDR;
+
+	if (!delta) {
+		debug_putstr("No relocation needed... ");
+		return;
+	}
+	debug_putstr("Performing relocations... ");
+
 	/*
 	 * Process relocations: 32 bit relocations first then 64 bit after.
 	 * Three sets of binary relocations are added to the end of the kernel
@@ -250,7 +260,8 @@ static void handle_relocations(void *output, unsigned long output_len)
 #endif
 }
 #else
-static inline void handle_relocations(void *output, unsigned long output_len)
+static inline void handle_relocations(void *output, unsigned long output_len,
+				      unsigned long virt_addr)
 { }
 #endif
 
@@ -327,7 +338,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 				  unsigned long output_len)
 {
 	const unsigned long kernel_total_size = VO__end - VO__text;
-	unsigned char *output_orig = output;
+	unsigned long virt_addr = (unsigned long)output;
 
 	/* Retain x86 boot parameters pointer passed from startup_32/64. */
 	boot_params = rmode;
@@ -366,13 +377,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 	 * the entire decompressed kernel plus relocation table, or the
 	 * entire decompressed kernel plus .bss and .brk sections.
 	 */
-	output = choose_random_location((unsigned long)input_data, input_len,
-					(unsigned long)output,
-					max(output_len, kernel_total_size));
+	choose_random_location((unsigned long)input_data, input_len,
+				(unsigned long *)&output,
+				max(output_len, kernel_total_size),
+				&virt_addr);
 
 	/* Validate memory location choices. */
 	if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
-		error("Destination address inappropriately aligned");
+		error("Destination physical address inappropriately aligned");
+	if (virt_addr & (MIN_KERNEL_ALIGN - 1))
+		error("Destination virtual address inappropriately aligned");
 #ifdef CONFIG_X86_64
 	if (heap > 0x3fffffffffffUL)
 		error("Destination address too large");
@@ -382,19 +396,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 #endif
 #ifndef CONFIG_RELOCATABLE
 	if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
-		error("Wrong destination address");
+		error("Destination address does not match LOAD_PHYSICAL_ADDR");
+	if ((unsigned long)output != virt_addr)
+		error("Destination virtual address changed when not relocatable");
 #endif
 
 	debug_putstr("\nDecompressing Linux... ");
 	__decompress(input_data, input_len, NULL, NULL, output, output_len,
 			NULL, error);
 	parse_elf(output);
-	/*
-	 * 32-bit always performs relocations. 64-bit relocations are only
-	 * needed if kASLR has chosen a different load address.
-	 */
-	if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig)
-		handle_relocations(output, output_len);
+	handle_relocations(output, output_len, virt_addr);
 	debug_putstr("done.\nBooting the kernel.\n");
 	return output;
 }
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 09c4ddd02ac6..1c8355eadbd1 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -67,20 +67,22 @@ int cmdline_find_option_bool(const char *option);
 
 #if CONFIG_RANDOMIZE_BASE
 /* kaslr.c */
-unsigned char *choose_random_location(unsigned long input_ptr,
-				      unsigned long input_size,
-				      unsigned long output_ptr,
-				      unsigned long output_size);
+void choose_random_location(unsigned long input,
+			    unsigned long input_size,
+			    unsigned long *output,
+			    unsigned long output_size,
+			    unsigned long *virt_addr);
 /* cpuflags.c */
 bool has_cpuflag(int flag);
 #else
-static inline
-unsigned char *choose_random_location(unsigned long input_ptr,
-				      unsigned long input_size,
-				      unsigned long output_ptr,
-				      unsigned long output_size)
+static inline void choose_random_location(unsigned long input,
+					  unsigned long input_size,
+					  unsigned long *output,
+					  unsigned long output_size,
+					  unsigned long *virt_addr)
 {
-	return (unsigned char *)output_ptr;
+	/* No change from existing output location. */
+	*virt_addr = *output;
 }
 #endif
 
-- 
cgit v1.2.3


From ed9f007ee68478f6a50ec9971ade25a0129a5c0e Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 May 2016 15:45:33 -0700
Subject: x86/KASLR: Extend kernel image physical address randomization to
 addresses larger than 4G

We want the physical address to be randomized anywhere between
16MB and the top of physical memory (up to 64TB).

This patch exchanges the prior slots[] array for the new slot_areas[]
array, and lifts the limitation of KERNEL_IMAGE_SIZE on the physical
address offset for 64-bit. As before, process_e820_entry() walks
memory and populates slot_areas[], splitting on any detected mem_avoid
collisions.

Finally, since the slots[] array and its associated functions are not
needed any more, so they are removed.

Based on earlier patches by Baoquan He.

Originally-from: Baoquan He <bhe@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: H.J. Lu <hjl.tools@gmail.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/1464216334-17200-5-git-send-email-keescook@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig                 |  27 +++++----
 arch/x86/boot/compressed/kaslr.c | 115 +++++++++++++++++++++++----------------
 2 files changed, 85 insertions(+), 57 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0a7b885964ba..770ae5259dff 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1934,21 +1934,26 @@ config RANDOMIZE_BASE
 	  attempts relying on knowledge of the location of kernel
 	  code internals.
 
-	  The kernel physical and virtual address can be randomized
-	  from 16MB up to 1GB on 64-bit and 512MB on 32-bit. (Note that
-	  using RANDOMIZE_BASE reduces the memory space available to
-	  kernel modules from 1.5GB to 1GB.)
+	  On 64-bit, the kernel physical and virtual addresses are
+	  randomized separately. The physical address will be anywhere
+	  between 16MB and the top of physical memory (up to 64TB). The
+	  virtual address will be randomized from 16MB up to 1GB (9 bits
+	  of entropy). Note that this also reduces the memory space
+	  available to kernel modules from 1.5GB to 1GB.
+
+	  On 32-bit, the kernel physical and virtual addresses are
+	  randomized together. They will be randomized from 16MB up to
+	  512MB (8 bits of entropy).
 
 	  Entropy is generated using the RDRAND instruction if it is
 	  supported. If RDTSC is supported, its value is mixed into
 	  the entropy pool as well. If neither RDRAND nor RDTSC are
-	  supported, then entropy is read from the i8254 timer.
-
-	  Since the kernel is built using 2GB addressing, and
-	  PHYSICAL_ALIGN must be at a minimum of 2MB, only 10 bits of
-	  entropy is theoretically possible. Currently, with the
-	  default value for PHYSICAL_ALIGN and due to page table
-	  layouts, 64-bit uses 9 bits of entropy and 32-bit uses 8 bits.
+	  supported, then entropy is read from the i8254 timer. The
+	  usable entropy is limited by the kernel being built using
+	  2GB addressing, and that PHYSICAL_ALIGN must be at a
+	  minimum of 2MB. As a result, only 10 bits of entropy are
+	  theoretically possible, but the implementations are further
+	  limited due to memory layouts.
 
 	  If CONFIG_HIBERNATE is also enabled, KASLR is disabled at boot
 	  time. To enable it, boot with "kaslr" on the kernel command
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 5550546916be..36e28112523a 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -132,17 +132,6 @@ enum mem_avoid_index {
 
 static struct mem_vector mem_avoid[MEM_AVOID_MAX];
 
-static bool mem_contains(struct mem_vector *region, struct mem_vector *item)
-{
-	/* Item at least partially before region. */
-	if (item->start < region->start)
-		return false;
-	/* Item at least partially after region. */
-	if (item->start + item->size > region->start + region->size)
-		return false;
-	return true;
-}
-
 static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
 {
 	/* Item one is entirely before item two. */
@@ -319,8 +308,6 @@ static bool mem_avoid_overlap(struct mem_vector *img,
 	return is_overlapping;
 }
 
-static unsigned long slots[KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN];
-
 struct slot_area {
 	unsigned long addr;
 	int num;
@@ -351,36 +338,44 @@ static void store_slot_info(struct mem_vector *region, unsigned long image_size)
 	}
 }
 
-static void slots_append(unsigned long addr)
-{
-	/* Overflowing the slots list should be impossible. */
-	if (slot_max >= KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN)
-		return;
-
-	slots[slot_max++] = addr;
-}
-
 static unsigned long slots_fetch_random(void)
 {
+	unsigned long slot;
+	int i;
+
 	/* Handle case of no slots stored. */
 	if (slot_max == 0)
 		return 0;
 
-	return slots[get_random_long("Physical") % slot_max];
+	slot = get_random_long("Physical") % slot_max;
+
+	for (i = 0; i < slot_area_index; i++) {
+		if (slot >= slot_areas[i].num) {
+			slot -= slot_areas[i].num;
+			continue;
+		}
+		return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN;
+	}
+
+	if (i == slot_area_index)
+		debug_putstr("slots_fetch_random() failed!?\n");
+	return 0;
 }
 
 static void process_e820_entry(struct e820entry *entry,
 			       unsigned long minimum,
 			       unsigned long image_size)
 {
-	struct mem_vector region, img, overlap;
+	struct mem_vector region, overlap;
+	struct slot_area slot_area;
+	unsigned long start_orig;
 
 	/* Skip non-RAM entries. */
 	if (entry->type != E820_RAM)
 		return;
 
-	/* Ignore entries entirely above our maximum. */
-	if (entry->addr >= KERNEL_IMAGE_SIZE)
+	/* On 32-bit, ignore entries entirely above our maximum. */
+	if (IS_ENABLED(CONFIG_X86_32) && entry->addr >= KERNEL_IMAGE_SIZE)
 		return;
 
 	/* Ignore entries entirely below our minimum. */
@@ -390,31 +385,55 @@ static void process_e820_entry(struct e820entry *entry,
 	region.start = entry->addr;
 	region.size = entry->size;
 
-	/* Potentially raise address to minimum location. */
-	if (region.start < minimum)
-		region.start = minimum;
+	/* Give up if slot area array is full. */
+	while (slot_area_index < MAX_SLOT_AREA) {
+		start_orig = region.start;
 
-	/* Potentially raise address to meet alignment requirements. */
-	region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
+		/* Potentially raise address to minimum location. */
+		if (region.start < minimum)
+			region.start = minimum;
 
-	/* Did we raise the address above the bounds of this e820 region? */
-	if (region.start > entry->addr + entry->size)
-		return;
+		/* Potentially raise address to meet alignment needs. */
+		region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
 
-	/* Reduce size by any delta from the original address. */
-	region.size -= region.start - entry->addr;
+		/* Did we raise the address above this e820 region? */
+		if (region.start > entry->addr + entry->size)
+			return;
 
-	/* Reduce maximum size to fit end of image within maximum limit. */
-	if (region.start + region.size > KERNEL_IMAGE_SIZE)
-		region.size = KERNEL_IMAGE_SIZE - region.start;
+		/* Reduce size by any delta from the original address. */
+		region.size -= region.start - start_orig;
 
-	/* Walk each aligned slot and check for avoided areas. */
-	for (img.start = region.start, img.size = image_size ;
-	     mem_contains(&region, &img) ;
-	     img.start += CONFIG_PHYSICAL_ALIGN) {
-		if (mem_avoid_overlap(&img, &overlap))
-			continue;
-		slots_append(img.start);
+		/* On 32-bit, reduce region size to fit within max size. */
+		if (IS_ENABLED(CONFIG_X86_32) &&
+		    region.start + region.size > KERNEL_IMAGE_SIZE)
+			region.size = KERNEL_IMAGE_SIZE - region.start;
+
+		/* Return if region can't contain decompressed kernel */
+		if (region.size < image_size)
+			return;
+
+		/* If nothing overlaps, store the region and return. */
+		if (!mem_avoid_overlap(&region, &overlap)) {
+			store_slot_info(&region, image_size);
+			return;
+		}
+
+		/* Store beginning of region if holds at least image_size. */
+		if (overlap.start > region.start + image_size) {
+			struct mem_vector beginning;
+
+			beginning.start = region.start;
+			beginning.size = overlap.start - region.start;
+			store_slot_info(&beginning, image_size);
+		}
+
+		/* Return if overlap extends to or past end of region. */
+		if (overlap.start + overlap.size >= region.start + region.size)
+			return;
+
+		/* Clip off the overlapping region and start over. */
+		region.size -= overlap.start - region.start + overlap.size;
+		region.start = overlap.start + overlap.size;
 	}
 }
 
@@ -431,6 +450,10 @@ static unsigned long find_random_phys_addr(unsigned long minimum,
 	for (i = 0; i < boot_params->e820_entries; i++) {
 		process_e820_entry(&boot_params->e820_map[i], minimum,
 				   image_size);
+		if (slot_area_index == MAX_SLOT_AREA) {
+			debug_putstr("Aborted e820 scan (slot_areas full)!\n");
+			break;
+		}
 	}
 
 	return slots_fetch_random();
-- 
cgit v1.2.3


From e066cc47776a89bbdaf4184c0e75f7d389f9ab48 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 25 May 2016 15:45:34 -0700
Subject: x86/KASLR: Allow randomization below the load address

Currently the kernel image physical address randomization's lower
boundary is the original kernel load address.

For bootloaders that load kernels into very high memory (e.g. kexec),
this means randomization takes place in a very small window at the
top of memory, ignoring the large region of physical memory below
the load address.

Since mem_avoid[] is already correctly tracking the regions that must be
avoided, this patch changes the minimum address to whatever is less:
512M (to conservatively avoid unknown things in lower memory) or the
load address. Now, for example, if the kernel is loaded at 8G, [512M,
8G) will be added to the list of possible physical memory positions.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
[ Rewrote the changelog, refactored the code to use min(). ]
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: H.J. Lu <hjl.tools@gmail.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1464216334-17200-6-git-send-email-keescook@chromium.org
[ Edited the changelog some more, plus the code comment as well. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/compressed/kaslr.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 36e28112523a..749c9e00c674 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -492,7 +492,7 @@ void choose_random_location(unsigned long input,
 			    unsigned long output_size,
 			    unsigned long *virt_addr)
 {
-	unsigned long random_addr;
+	unsigned long random_addr, min_addr;
 
 	/* By default, keep output position unchanged. */
 	*virt_addr = *output;
@@ -510,8 +510,15 @@ void choose_random_location(unsigned long input,
 	/* Record the various known unsafe memory ranges. */
 	mem_avoid_init(input, input_size, *output);
 
+	/*
+	 * Low end of the randomization range should be the
+	 * smaller of 512M or the initial kernel image
+	 * location:
+	 */
+	min_addr = min(*output, 512UL << 20);
+
 	/* Walk e820 and find a random address. */
-	random_addr = find_random_phys_addr(*output, output_size);
+	random_addr = find_random_phys_addr(min_addr, output_size);
 	if (!random_addr) {
 		warn("KASLR disabled: could not find suitable E820 region!");
 	} else {
-- 
cgit v1.2.3


From f09509b9398b23ca53360ca57106555698ec2e93 Mon Sep 17 00:00:00 2001
From: David Carrillo-Cisneros <davidcc@google.com>
Date: Tue, 21 Jun 2016 11:31:10 -0700
Subject: perf/x86/intel: Print LBR support statement after validation

The following commit:

  338b522ca43c ("perf/x86/intel: Protect LBR and extra_regs against KVM lying")

added an additional test to LBR support detection that is performed after
printing the LBR support statement to dmesg.

Move the LBR support output after the very last test, to make sure we
print the true status of LBR support.

Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Stephane Eranian <eranian@google.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1466533874-52003-2-git-send-email-davidcc@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 2 ++
 arch/x86/events/intel/lbr.c  | 9 ---------
 2 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 3ed528c2370c..61a027b694a3 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3958,6 +3958,8 @@ __init int intel_pmu_init(void)
 			x86_pmu.lbr_nr = 0;
 	}
 
+	if (x86_pmu.lbr_nr)
+		pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
 	/*
 	 * Access extra MSR may cause #GP under certain circumstances.
 	 * E.g. KVM doesn't support offcore event
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 9e2b40cdb05f..2dca66cec617 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -956,7 +956,6 @@ void __init intel_pmu_lbr_init_core(void)
 	 * SW branch filter usage:
 	 * - compensate for lack of HW filter
 	 */
-	pr_cont("4-deep LBR, ");
 }
 
 /* nehalem/westmere */
@@ -977,7 +976,6 @@ void __init intel_pmu_lbr_init_nhm(void)
 	 *   That requires LBR_FAR but that means far
 	 *   jmp need to be filtered out
 	 */
-	pr_cont("16-deep LBR, ");
 }
 
 /* sandy bridge */
@@ -997,7 +995,6 @@ void __init intel_pmu_lbr_init_snb(void)
 	 *   That requires LBR_FAR but that means far
 	 *   jmp need to be filtered out
 	 */
-	pr_cont("16-deep LBR, ");
 }
 
 /* haswell */
@@ -1010,8 +1007,6 @@ void intel_pmu_lbr_init_hsw(void)
 
 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
 	x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
-
-	pr_cont("16-deep LBR, ");
 }
 
 /* skylake */
@@ -1031,7 +1026,6 @@ __init void intel_pmu_lbr_init_skl(void)
 	 *   That requires LBR_FAR but that means far
 	 *   jmp need to be filtered out
 	 */
-	pr_cont("32-deep LBR, ");
 }
 
 /* atom */
@@ -1057,7 +1051,6 @@ void __init intel_pmu_lbr_init_atom(void)
 	 * SW branch filter usage:
 	 * - compensate for lack of HW filter
 	 */
-	pr_cont("8-deep LBR, ");
 }
 
 /* slm */
@@ -1088,6 +1081,4 @@ void intel_pmu_lbr_init_knl(void)
 
 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
 	x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
-
-	pr_cont("8-deep LBR, ");
 }
-- 
cgit v1.2.3


From 19fc9ddd61e059cc45464bdf6e8fa304bb94080f Mon Sep 17 00:00:00 2001
From: David Carrillo-Cisneros <davidcc@google.com>
Date: Tue, 21 Jun 2016 11:31:11 -0700
Subject: perf/x86/intel: Fix MSR_LAST_BRANCH_FROM_x bug when no TSX

Intel's SDM states that bits 61:62 in MSR_LAST_BRANCH_FROM_x are the
TSX flags for formats with LBR_TSX flags (i.e. LBR_FORMAT_EIP_EFLAGS2).

However, when the CPU has TSX support deactivated, bits 61:62 actually
behave as follows:

  - For wrmsr(), bits 61:62 are considered part of the sign extension.
  - When capturing branches, the LBR hw will always clear bits 61:62.
    regardless of the sign extension.

Therefore, if:

  1) LBR has TSX format.
  2) CPU has no TSX support enabled.

... then any value passed to wrmsr() must be sign extended to 63 bits
and any value from rdmsr() must be converted to have a sign extension
of 61 bits, ignoring the values at TSX flags.

This bug was masked by the work-around to the Intel's CPU bug:
BJ94. "LBR May Contain Incorrect Information When Using FREEZE_LBRS_ON_PMI"
in Document Number: 324643-037US.

The aforementioned work-around uses hw flags to filter out all kernel
branches, limiting LBR callstack to user level execution only.

Since user addresses are not sign extended, they do not trigger the wrmsr()
bug in MSR_LAST_BRANCH_FROM_x when saved/restored at context switch.

To verify the hw bug:

  $ perf record -b -e cycles sleep 1
  $ rdmsr -p 0 0x680
  0x1fffffffb0b9b0cc
  $ wrmsr -p 0 0x680 0x1fffffffb0b9b0cc
  write(): Input/output error

The quirk for LBR_FROM_ MSRs is required before calls to wrmsrl() and
after rdmsrl().

This patch introduces it for wrmsrl()'s done for testing LBR support.

Future patch in series adds the quirk for context switch, that would
be required if LBR callstack is to be enabled for ring 0.

Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Stephane Eranian <eranian@google.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1466533874-52003-3-git-send-email-davidcc@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 18 +++++++++++++++
 arch/x86/events/intel/lbr.c  | 52 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/events/perf_event.h |  2 ++
 3 files changed, 72 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 61a027b694a3..3eccc42e2d88 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3361,6 +3361,13 @@ static void intel_snb_check_microcode(void)
 	}
 }
 
+static bool is_lbr_from(unsigned long msr)
+{
+	unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr;
+
+	return x86_pmu.lbr_from <= msr && msr < lbr_from_nr;
+}
+
 /*
  * Under certain circumstances, access certain MSR may cause #GP.
  * The function tests if the input MSR can be safely accessed.
@@ -3381,13 +3388,24 @@ static bool check_msr(unsigned long msr, u64 mask)
 	 * Only change the bits which can be updated by wrmsrl.
 	 */
 	val_tmp = val_old ^ mask;
+
+	if (is_lbr_from(msr))
+		val_tmp = lbr_from_signext_quirk_wr(val_tmp);
+
 	if (wrmsrl_safe(msr, val_tmp) ||
 	    rdmsrl_safe(msr, &val_new))
 		return false;
 
+	/*
+	 * Quirk only affects validation in wrmsr(), so wrmsrl()'s value
+	 * should equal rdmsrl()'s even with the quirk.
+	 */
 	if (val_new != val_tmp)
 		return false;
 
+	if (is_lbr_from(msr))
+		val_old = lbr_from_signext_quirk_wr(val_old);
+
 	/* Here it's sure that the MSR can be safely accessed.
 	 * Restore the old value and return.
 	 */
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 2dca66cec617..88093e0915a9 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -81,6 +81,8 @@ static enum {
 #define LBR_FROM_FLAG_IN_TX    (1ULL << 62)
 #define LBR_FROM_FLAG_ABORT    (1ULL << 61)
 
+#define LBR_FROM_SIGNEXT_2MSB	(BIT_ULL(60) | BIT_ULL(59))
+
 /*
  * x86control flow change classification
  * x86control flow changes include branches, interrupts, traps, faults
@@ -235,6 +237,53 @@ enum {
 	LBR_VALID,
 };
 
+/*
+ * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
+ * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
+ * TSX is not supported they have no consistent behavior:
+ *
+ *   - For wrmsr(), bits 61:62 are considered part of the sign extension.
+ *   - For HW updates (branch captures) bits 61:62 are always OFF and are not
+ *     part of the sign extension.
+ *
+ * Therefore, if:
+ *
+ *   1) LBR has TSX format
+ *   2) CPU has no TSX support enabled
+ *
+ * ... then any value passed to wrmsr() must be sign extended to 63 bits and any
+ * value from rdmsr() must be converted to have a 61 bits sign extension,
+ * ignoring the TSX flags.
+ */
+static inline bool lbr_from_signext_quirk_needed(void)
+{
+	int lbr_format = x86_pmu.intel_cap.lbr_format;
+	bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
+			   boot_cpu_has(X86_FEATURE_RTM);
+
+	return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
+}
+
+DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
+
+/* If quirk is enabled, ensure sign extension is 63 bits: */
+inline u64 lbr_from_signext_quirk_wr(u64 val)
+{
+	if (static_branch_unlikely(&lbr_from_quirk_key)) {
+		/*
+		 * Sign extend into bits 61:62 while preserving bit 63.
+		 *
+		 * Quirk is enabled when TSX is disabled. Therefore TSX bits
+		 * in val are always OFF and must be changed to be sign
+		 * extension bits. Since bits 59:60 are guaranteed to be
+		 * part of the sign extension bits, we can just copy them
+		 * to 61:62.
+		 */
+		val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2;
+	}
+	return val;
+}
+
 static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
 {
 	int i;
@@ -1007,6 +1056,9 @@ void intel_pmu_lbr_init_hsw(void)
 
 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
 	x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
+
+	if (lbr_from_signext_quirk_needed())
+		static_branch_enable(&lbr_from_quirk_key);
 }
 
 /* skylake */
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index e2d7285a2dac..8c4a47706296 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -902,6 +902,8 @@ void intel_ds_init(void);
 
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
 
+u64 lbr_from_signext_quirk_wr(u64 val);
+
 void intel_pmu_lbr_reset(void);
 
 void intel_pmu_lbr_enable(struct perf_event *event);
-- 
cgit v1.2.3


From 3812bba84f3d721ff7dc3bb90360bc5ed6771994 Mon Sep 17 00:00:00 2001
From: David Carrillo-Cisneros <davidcc@google.com>
Date: Tue, 21 Jun 2016 11:31:12 -0700
Subject: perf/x86/intel: Fix trivial formatting and style bug

Replace spaces by tabs in LBR_FROM_* constants to align with newly
defined constant. Use BIT_ULL.

Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Stephane Eranian <eranian@google.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1466533874-52003-4-git-send-email-davidcc@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/lbr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 88093e0915a9..0da0eb0d875d 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -77,9 +77,9 @@ static enum {
 	 LBR_IND_JMP	|\
 	 LBR_FAR)
 
-#define LBR_FROM_FLAG_MISPRED  (1ULL << 63)
-#define LBR_FROM_FLAG_IN_TX    (1ULL << 62)
-#define LBR_FROM_FLAG_ABORT    (1ULL << 61)
+#define LBR_FROM_FLAG_MISPRED	BIT_ULL(63)
+#define LBR_FROM_FLAG_IN_TX	BIT_ULL(62)
+#define LBR_FROM_FLAG_ABORT	BIT_ULL(61)
 
 #define LBR_FROM_SIGNEXT_2MSB	(BIT_ULL(60) | BIT_ULL(59))
 
-- 
cgit v1.2.3


From 71adae99ed187de9fcf988cc8873ee2c3af3385f Mon Sep 17 00:00:00 2001
From: David Carrillo-Cisneros <davidcc@google.com>
Date: Tue, 21 Jun 2016 11:31:13 -0700
Subject: perf/x86/intel: Add MSR_LAST_BRANCH_FROM_x quirk for ctx switch

Add quirk for context switch to save/restore the value of
MSR_LAST_BRANCH_FROM_x when LBR is enabled and there is potential for
kernel addresses to be in the lbr_from register.

To test this patch, use a perf tool and kernel with the patch
next in this series. That patch removes the work around that masked
the hw bug:

  $ ./lbr_perf record --call-graph lbr -e cycles:k sleep 1

where lbr_perf is the patched perf tool, that allows to specify :k
on lbr mode. The above command will trigger a #GPF :

 WARNING: CPU: 28 PID: 14096 at arch/x86/mm/extable.c:65 ex_handler_wrmsr_unsafe+0x70/0x80
 unchecked MSR access error: WRMSR to 0x681 (tried to write 0x1fffffff81010794)
 ...
 Call Trace:
  [<ffffffff8167af49>] dump_stack+0x4d/0x63
  [<ffffffff810b9b15>] __warn+0xe5/0x100
  [<ffffffff810b9be9>] warn_slowpath_fmt+0x49/0x50
  [<ffffffff810abb40>] ex_handler_wrmsr_unsafe+0x70/0x80
  [<ffffffff810abc42>] fixup_exception+0x42/0x50
  [<ffffffff81079d1a>] do_general_protection+0x8a/0x160
  [<ffffffff81684ec2>] general_protection+0x22/0x30
  [<ffffffff810101b9>] ? intel_pmu_lbr_sched_task+0xc9/0x380
  [<ffffffff81009d7c>] intel_pmu_sched_task+0x3c/0x60
  [<ffffffff81003a2b>] x86_pmu_sched_task+0x1b/0x20
  [<ffffffff81192a5b>] perf_pmu_sched_task+0x6b/0xb0
  [<ffffffff8119746d>] __perf_event_task_sched_in+0x7d/0x150
  [<ffffffff810dd9dc>] finish_task_switch+0x15c/0x200
  [<ffffffff8167f894>] __schedule+0x274/0x6cc
  [<ffffffff8167fdd9>] schedule+0x39/0x90
  [<ffffffff81675398>] exit_to_usermode_loop+0x39/0x89
  [<ffffffff810028ce>] prepare_exit_to_usermode+0x2e/0x30
  [<ffffffff81683c1b>] retint_user+0x8/0x10

Signed-off-by: David Carrillo-Cisneros <davidcc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Stephane Eranian <eranian@google.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1466533874-52003-5-git-send-email-davidcc@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/lbr.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 0da0eb0d875d..52bef15c7615 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -284,6 +284,20 @@ inline u64 lbr_from_signext_quirk_wr(u64 val)
 	return val;
 }
 
+/*
+ * If quirk is needed, ensure sign extension is 61 bits:
+ */
+u64 lbr_from_signext_quirk_rd(u64 val)
+{
+	if (static_branch_unlikely(&lbr_from_quirk_key))
+		/*
+		 * Quirk is on when TSX is not enabled. Therefore TSX
+		 * flags must be read as OFF.
+		 */
+		val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT);
+	return val;
+}
+
 static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
 {
 	int i;
@@ -300,7 +314,8 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
 	tos = task_ctx->tos;
 	for (i = 0; i < tos; i++) {
 		lbr_idx = (tos - i) & mask;
-		wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
+		wrmsrl(x86_pmu.lbr_from + lbr_idx,
+			lbr_from_signext_quirk_wr(task_ctx->lbr_from[i]));
 		wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
 			wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
@@ -313,7 +328,7 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 {
 	int i;
 	unsigned lbr_idx, mask;
-	u64 tos;
+	u64 tos, val;
 
 	if (task_ctx->lbr_callstack_users == 0) {
 		task_ctx->lbr_stack_state = LBR_NONE;
@@ -324,7 +339,8 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 	tos = intel_pmu_lbr_tos();
 	for (i = 0; i < tos; i++) {
 		lbr_idx = (tos - i) & mask;
-		rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
+		rdmsrl(x86_pmu.lbr_from + lbr_idx, val);
+		task_ctx->lbr_from[i] = lbr_from_signext_quirk_rd(val);
 		rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
 			rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
@@ -502,6 +518,8 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 		int lbr_flags = lbr_desc[lbr_format];
 
 		rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
+		from = lbr_from_signext_quirk_rd(from);
+
 		rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
 
 		if (lbr_format == LBR_FORMAT_INFO && need_info) {
-- 
cgit v1.2.3


From d4cf1949f9689314aef962eea95df84a8288d097 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 23 Jun 2016 10:44:49 +0200
Subject: perf/x86/intel: Add {rd,wr}lbr_{to,from} wrappers

The whole rdmsr()/wrmsr() for lbr_from got a little unweildy with the
sign extension quirk, provide a few simple wrappers to clean things up.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Carrillo-Cisneros <davidcc@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/lbr.c | 53 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 40 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 52bef15c7615..cc4555a9e876 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -289,12 +289,42 @@ inline u64 lbr_from_signext_quirk_wr(u64 val)
  */
 u64 lbr_from_signext_quirk_rd(u64 val)
 {
-	if (static_branch_unlikely(&lbr_from_quirk_key))
+	if (static_branch_unlikely(&lbr_from_quirk_key)) {
 		/*
 		 * Quirk is on when TSX is not enabled. Therefore TSX
 		 * flags must be read as OFF.
 		 */
 		val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT);
+	}
+	return val;
+}
+
+static inline void wrlbr_from(unsigned int idx, u64 val)
+{
+	val = lbr_from_signext_quirk_wr(val);
+	wrmsrl(x86_pmu.lbr_from + idx, val);
+}
+
+static inline void wrlbr_to(unsigned int idx, u64 val)
+{
+	wrmsrl(x86_pmu.lbr_to + idx, val);
+}
+
+static inline u64 rdlbr_from(unsigned int idx)
+{
+	u64 val;
+
+	rdmsrl(x86_pmu.lbr_from + idx, val);
+
+	return lbr_from_signext_quirk_rd(val);
+}
+
+static inline u64 rdlbr_to(unsigned int idx)
+{
+	u64 val;
+
+	rdmsrl(x86_pmu.lbr_from + idx, val);
+
 	return val;
 }
 
@@ -314,9 +344,9 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
 	tos = task_ctx->tos;
 	for (i = 0; i < tos; i++) {
 		lbr_idx = (tos - i) & mask;
-		wrmsrl(x86_pmu.lbr_from + lbr_idx,
-			lbr_from_signext_quirk_wr(task_ctx->lbr_from[i]));
-		wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+		wrlbr_from(lbr_idx, task_ctx->lbr_from[i]);
+		wrlbr_to  (lbr_idx, task_ctx->lbr_to[i]);
+
 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
 			wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
 	}
@@ -326,9 +356,9 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
 
 static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 {
-	int i;
 	unsigned lbr_idx, mask;
-	u64 tos, val;
+	u64 tos;
+	int i;
 
 	if (task_ctx->lbr_callstack_users == 0) {
 		task_ctx->lbr_stack_state = LBR_NONE;
@@ -339,9 +369,8 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 	tos = intel_pmu_lbr_tos();
 	for (i = 0; i < tos; i++) {
 		lbr_idx = (tos - i) & mask;
-		rdmsrl(x86_pmu.lbr_from + lbr_idx, val);
-		task_ctx->lbr_from[i] = lbr_from_signext_quirk_rd(val);
-		rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+		task_ctx->lbr_from[i] = rdlbr_from(lbr_idx);
+		task_ctx->lbr_to[i]   = rdlbr_to(lbr_idx);
 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
 			rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
 	}
@@ -517,10 +546,8 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 		u16 cycles = 0;
 		int lbr_flags = lbr_desc[lbr_format];
 
-		rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
-		from = lbr_from_signext_quirk_rd(from);
-
-		rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
+		from = rdlbr_from(lbr_idx);
+		to   = rdlbr_to(lbr_idx);
 
 		if (lbr_format == LBR_FORMAT_INFO && need_info) {
 			u64 info;
-- 
cgit v1.2.3


From dbf984d825935f61965bcfacfd8e8dfdaf3e8051 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sat, 25 Jun 2016 13:24:57 +0200
Subject: x86/boot/64: Add forgotten end of function marker

Add secondary_startup_64()'s ENDPROC() marker.

No functionality change.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/20160625112457.16930-1-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/head_64.S | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 5df831ef1442..c7920ba69563 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -299,6 +299,7 @@ ENTRY(secondary_startup_64)
 	pushq	$__KERNEL_CS	# set correct cs
 	pushq	%rax		# target address in negative space
 	lretq
+ENDPROC(secondary_startup_64)
 
 #include "verify_cpu.S"
 
-- 
cgit v1.2.3


From f6d1747f898cfe1fe52e3d18f5c77e5bd21fed9a Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 25 Jun 2016 08:20:26 +0100
Subject: x86/efi: Remove unused variable 'efi'

Remove unused variable 'efi', it is never used. This fixes the following
clang build warning:

  arch/x86/boot/compressed/eboot.c:803:2: warning: Value stored to 'efi' is never read

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1466839230-12781-4-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/compressed/eboot.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 52fef606bc54..ff574dad95cc 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -757,7 +757,6 @@ struct boot_params *make_boot_params(struct efi_config *c)
 	struct boot_params *boot_params;
 	struct apm_bios_info *bi;
 	struct setup_header *hdr;
-	struct efi_info *efi;
 	efi_loaded_image_t *image;
 	void *options, *handle;
 	efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
@@ -800,7 +799,6 @@ struct boot_params *make_boot_params(struct efi_config *c)
 	memset(boot_params, 0x0, 0x4000);
 
 	hdr = &boot_params->hdr;
-	efi = &boot_params->efi_info;
 	bi = &boot_params->apm_bios_info;
 
 	/* Copy the second sector to boot_params */
-- 
cgit v1.2.3


From 80e75596079f0a41f905836ad0ccaac68ba33612 Mon Sep 17 00:00:00 2001
From: Alex Thorlton <athorlton@sgi.com>
Date: Sat, 25 Jun 2016 08:20:27 +0100
Subject: efi: Convert efi_call_virt() to efi_call_virt_pointer()

This commit makes a few slight modifications to the efi_call_virt() macro
to get it to work with function pointers that are stored in locations
other than efi.systab->runtime, and renames the macro to
efi_call_virt_pointer().  The majority of the changes here are to pull
these macros up into header files so that they can be accessed from
outside of drivers/firmware/efi/runtime-wrappers.c.

The most significant change not directly related to the code move is to
add an extra "p" argument into the appropriate efi_call macros, and use
that new argument in place of the, formerly hard-coded,
efi.systab->runtime pointer.

The last piece of the puzzle was to add an efi_call_virt() macro back into
drivers/firmware/efi/runtime-wrappers.c to wrap around the new
efi_call_virt_pointer() macro - this was mainly to keep the code from
looking too cluttered by adding a bunch of extra references to
efi.systab->runtime everywhere.

Note that I also broke up the code in the efi_call_virt_pointer() macro a
bit in the process of moving it.

Signed-off-by: Alex Thorlton <athorlton@sgi.com>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roy Franz <roy.franz@linaro.org>
Cc: Russ Anderson <rja@sgi.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1466839230-12781-5-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/include/asm/efi.h              |  4 +--
 arch/arm64/include/asm/efi.h            |  4 +--
 arch/x86/include/asm/efi.h              |  9 +++---
 drivers/firmware/efi/runtime-wrappers.c | 53 +++++++--------------------------
 include/linux/efi.h                     | 51 +++++++++++++++++++++++++++++++
 5 files changed, 69 insertions(+), 52 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h
index a708fa1f0905..766bf9b78160 100644
--- a/arch/arm/include/asm/efi.h
+++ b/arch/arm/include/asm/efi.h
@@ -28,10 +28,10 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
 #define arch_efi_call_virt_setup()	efi_virtmap_load()
 #define arch_efi_call_virt_teardown()	efi_virtmap_unload()
 
-#define arch_efi_call_virt(f, args...)					\
+#define arch_efi_call_virt(p, f, args...)				\
 ({									\
 	efi_##f##_t *__f;						\
-	__f = efi.systab->runtime->f;					\
+	__f = p->f;							\
 	__f(args);							\
 })
 
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 622db3c6474e..bd887663689b 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -23,10 +23,10 @@ int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
 	efi_virtmap_load();						\
 })
 
-#define arch_efi_call_virt(f, args...)					\
+#define arch_efi_call_virt(p, f, args...)				\
 ({									\
 	efi_##f##_t *__f;						\
-	__f = efi.systab->runtime->f;					\
+	__f = p->f;							\
 	__f(args);							\
 })
 
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 78d1e7467eae..55b4596ef688 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -41,10 +41,9 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
 /*
  * Wrap all the virtual calls in a way that forces the parameters on the stack.
  */
-#define arch_efi_call_virt(f, args...)					\
+#define arch_efi_call_virt(p, f, args...)				\
 ({									\
-	((efi_##f##_t __attribute__((regparm(0)))*)			\
-		efi.systab->runtime->f)(args);				\
+	((efi_##f##_t __attribute__((regparm(0)))*) p->f)(args);	\
 })
 
 #define efi_ioremap(addr, size, type, attr)	ioremap_cache(addr, size)
@@ -81,8 +80,8 @@ struct efi_scratch {
 	}								\
 })
 
-#define arch_efi_call_virt(f, args...)					\
-	efi_call((void *)efi.systab->runtime->f, args)			\
+#define arch_efi_call_virt(p, f, args...)				\
+	efi_call((void *)p->f, args)					\
 
 #define arch_efi_call_virt_teardown()					\
 ({									\
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 23bef6bb73ee..41958774cde3 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -22,7 +22,16 @@
 #include <linux/stringify.h>
 #include <asm/efi.h>
 
-static void efi_call_virt_check_flags(unsigned long flags, const char *call)
+/*
+ * Wrap around the new efi_call_virt_generic() macros so that the
+ * code doesn't get too cluttered:
+ */
+#define efi_call_virt(f, args...)   \
+	efi_call_virt_pointer(efi.systab->runtime, f, args)
+#define __efi_call_virt(f, args...) \
+	__efi_call_virt_pointer(efi.systab->runtime, f, args)
+
+void efi_call_virt_check_flags(unsigned long flags, const char *call)
 {
 	unsigned long cur_flags, mismatch;
 
@@ -38,48 +47,6 @@ static void efi_call_virt_check_flags(unsigned long flags, const char *call)
 	local_irq_restore(flags);
 }
 
-/*
- * Arch code can implement the following three template macros, avoiding
- * reptition for the void/non-void return cases of {__,}efi_call_virt:
- *
- *  * arch_efi_call_virt_setup
- *
- *    Sets up the environment for the call (e.g. switching page tables,
- *    allowing kernel-mode use of floating point, if required).
- *
- *  * arch_efi_call_virt
- *
- *    Performs the call. The last expression in the macro must be the call
- *    itself, allowing the logic to be shared by the void and non-void
- *    cases.
- *
- *  * arch_efi_call_virt_teardown
- *
- *    Restores the usual kernel environment once the call has returned.
- */
-
-#define efi_call_virt(f, args...)					\
-({									\
-	efi_status_t __s;						\
-	unsigned long flags;						\
-	arch_efi_call_virt_setup();					\
-	local_save_flags(flags);					\
-	__s = arch_efi_call_virt(f, args);				\
-	efi_call_virt_check_flags(flags, __stringify(f));		\
-	arch_efi_call_virt_teardown();					\
-	__s;								\
-})
-
-#define __efi_call_virt(f, args...)					\
-({									\
-	unsigned long flags;						\
-	arch_efi_call_virt_setup();					\
-	local_save_flags(flags);					\
-	arch_efi_call_virt(f, args);					\
-	efi_call_virt_check_flags(flags, __stringify(f));		\
-	arch_efi_call_virt_teardown();					\
-})
-
 /*
  * According to section 7.1 of the UEFI spec, Runtime Services are not fully
  * reentrant, and there are particular combinations of calls that need to be
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 03009695760d..75d148dc9c3f 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1480,4 +1480,55 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
 			   unsigned long size);
 
 bool efi_runtime_disabled(void);
+extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
+
+/*
+ * Arch code can implement the following three template macros, avoiding
+ * reptition for the void/non-void return cases of {__,}efi_call_virt():
+ *
+ *  * arch_efi_call_virt_setup()
+ *
+ *    Sets up the environment for the call (e.g. switching page tables,
+ *    allowing kernel-mode use of floating point, if required).
+ *
+ *  * arch_efi_call_virt()
+ *
+ *    Performs the call. The last expression in the macro must be the call
+ *    itself, allowing the logic to be shared by the void and non-void
+ *    cases.
+ *
+ *  * arch_efi_call_virt_teardown()
+ *
+ *    Restores the usual kernel environment once the call has returned.
+ */
+
+#define efi_call_virt_pointer(p, f, args...)				\
+({									\
+	efi_status_t __s;						\
+	unsigned long __flags;						\
+									\
+	arch_efi_call_virt_setup();					\
+									\
+	local_save_flags(__flags);					\
+	__s = arch_efi_call_virt(p, f, args);				\
+	efi_call_virt_check_flags(__flags, __stringify(f));		\
+									\
+	arch_efi_call_virt_teardown();					\
+									\
+	__s;								\
+})
+
+#define __efi_call_virt_pointer(p, f, args...)				\
+({									\
+	unsigned long __flags;						\
+									\
+	arch_efi_call_virt_setup();					\
+									\
+	local_save_flags(__flags);					\
+	arch_efi_call_virt(p, f, args);					\
+	efi_call_virt_check_flags(__flags, __stringify(f));		\
+									\
+	arch_efi_call_virt_teardown();					\
+})
+
 #endif /* _LINUX_EFI_H */
-- 
cgit v1.2.3


From d1be84a232e359ca9456c63e72cb0082d68311b6 Mon Sep 17 00:00:00 2001
From: Alex Thorlton <athorlton@sgi.com>
Date: Sat, 25 Jun 2016 08:20:28 +0100
Subject: x86/uv: Update uv_bios_call() to use efi_call_virt_pointer()

Now that the efi_call_virt() macro has been generalized to be able to
use EFI system tables besides efi.systab, we are able to convert our
uv_bios_call() wrapper to use this standard EFI callback mechanism.

This simple change is part of a much larger effort to recover from some
issues with the way we were mapping in some of our MMRs, and the way
that we were doing our BIOS callbacks, which were uncovered by commit
67a9108ed431 ("x86/efi: Build our own page table structures").

The first issue that this uncovered was that we were relying on the EFI
memory mapping mechanism to map in our MMR space for us, which, while
reliable, was technically a bug, as it relied on "undefined" behavior in
the mapping code.

The reason we were able to piggyback on the EFI memory mapping code to
map in our MMRs was because, previously, EFI code used the
trampoline_pgd, which shares a few entries with the main kernel pgd.  It
just so happened, that the memory range containing our MMRs was inside
one of those shared regions, which kept our code working without issue
for quite a while.

Anyways, once we discovered this problem, we brought back our original
code to map in the MMRs with commit:

  08914f436bdd ("x86/platform/UV: Bring back the call to map_low_mmrs in uv_system_init")

This got our systems a little further along, but we were still running
into trouble with our EFI callbacks, which prevented us from booting
all the way up.

Our first step towards fixing the BIOS callbacks was to get our
uv_bios_call() wrapper updated to use efi_call_virt() instead of the plain
efi_call().  The previous patch took care of the effort needed to make
that possible.  Along the way, we hit a major issue with some confusion
about how to properly pull arguments higher than number 6 off the stack
in the efi_call() code, which resulted in the following commit from Linus:

  683ad8092cd2 ("x86/efi: Fix 7-parameter efi_call()s")

Now that all of those issues are out of the way, we're able to make this
simple change to use the new efi_call_virt_pointer() in uv_bios_call()
which gets our machines booting, running properly, and able to execute our
callbacks with 6+ arguments.

Note that, since we are now using the EFI page table when we make our
function call, we are no longer able to make the call using the __va()
of our function pointer, since the memory range containing that address
isn't mapped into the EFI page table.  For now, we will use the physical
address of the function directly, since that is mapped into the EFI page
table.  In the near future, we're going to get some code added in to
properly update our function pointer to its virtual address during
SetVirtualAddressMap.

Signed-off-by: Alex Thorlton <athorlton@sgi.com>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roy Franz <roy.franz@linaro.org>
Cc: Russ Anderson <rja@sgi.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1466839230-12781-6-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/uv/bios_uv.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 815fec6e05e2..66b2166ea4a1 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -40,8 +40,7 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
 		 */
 		return BIOS_STATUS_UNIMPLEMENTED;
 
-	ret = efi_call((void *)__va(tab->function), (u64)which,
-			a1, a2, a3, a4, a5);
+	ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(uv_bios_call);
-- 
cgit v1.2.3


From 21f866257c7027f8f49bfde83f559f9e58f9ee93 Mon Sep 17 00:00:00 2001
From: Alex Thorlton <athorlton@sgi.com>
Date: Sat, 25 Jun 2016 08:20:29 +0100
Subject: x86/efi: Update efi_thunk() to use the the arch_efi_call_virt*()
 macros

Currently, the efi_thunk macro has some semi-duplicated code in it that
can be replaced with the arch_efi_call_virt_setup/teardown macros. This
commit simply replaces the duplicated code with those macros.

Suggested-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Alex Thorlton <athorlton@sgi.com>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roy Franz <roy.franz@linaro.org>
Cc: Russ Anderson <rja@sgi.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1466839230-12781-7-git-send-email-matt@codeblueprint.co.uk
[ Renamed variables to the standard __ prefix. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/efi/efi_64.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index b226b3f497f1..5cb4301c4dcf 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -466,22 +466,17 @@ extern efi_status_t efi64_thunk(u32, ...);
 #define efi_thunk(f, ...)						\
 ({									\
 	efi_status_t __s;						\
-	unsigned long flags;						\
-	u32 func;							\
+	unsigned long __flags;						\
+	u32 __func;							\
 									\
-	efi_sync_low_kernel_mappings();					\
-	local_irq_save(flags);						\
+	local_irq_save(__flags);					\
+	arch_efi_call_virt_setup();					\
 									\
-	efi_scratch.prev_cr3 = read_cr3();				\
-	write_cr3((unsigned long)efi_scratch.efi_pgt);			\
-	__flush_tlb_all();						\
+	__func = runtime_service32(f);					\
+	__s = efi64_thunk(__func, __VA_ARGS__);				\
 									\
-	func = runtime_service32(f);					\
-	__s = efi64_thunk(func, __VA_ARGS__);			\
-									\
-	write_cr3(efi_scratch.prev_cr3);				\
-	__flush_tlb_all();						\
-	local_irq_restore(flags);					\
+	arch_efi_call_virt_teardown();					\
+	local_irq_restore(__flags);					\
 									\
 	__s;								\
 })
-- 
cgit v1.2.3


From b684e9bc750b6349ff59f1b1ab4397cae255765f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 25 Jun 2016 08:20:30 +0100
Subject: x86/efi: Remove the unused efi_get_time() function

Nothing calls the efi_get_time() function on x86, but it does suffer
from the 32-bit time_t overflow in 2038.

This removes the function, we can always put it back in case we need
it later.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1466839230-12781-8-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/efi/efi.c | 15 ---------------
 include/linux/efi.h         |  1 -
 2 files changed, 16 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f93545e7dc54..d898b334ff46 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -98,21 +98,6 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
 	return status;
 }
 
-void efi_get_time(struct timespec *now)
-{
-	efi_status_t status;
-	efi_time_t eft;
-	efi_time_cap_t cap;
-
-	status = efi.get_time(&eft, &cap);
-	if (status != EFI_SUCCESS)
-		pr_err("Oops: efitime: can't read time!\n");
-
-	now->tv_sec = mktime(eft.year, eft.month, eft.day, eft.hour,
-			     eft.minute, eft.second);
-	now->tv_nsec = 0;
-}
-
 void __init efi_find_mirror(void)
 {
 	efi_memory_desc_t *md;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 75d148dc9c3f..0174f28ce9b3 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -990,7 +990,6 @@ extern u64 efi_mem_desc_end(efi_memory_desc_t *md);
 extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
 		struct resource *data_resource, struct resource *bss_resource);
-extern void efi_get_time(struct timespec *now);
 extern void efi_reserve_boot_services(void);
 extern int efi_get_fdt_params(struct efi_fdt_params *params);
 extern struct kobject *efi_kobj;
-- 
cgit v1.2.3


From 1e8567d53d3bbefe3f6e0a27685da1b842f4c1fa Mon Sep 17 00:00:00 2001
From: Huang Tao <huangtao@rock-chips.com>
Date: Thu, 16 Jun 2016 16:18:58 +0200
Subject: arm64: dts: rockchip: Add rktimer device node for rk3399

Add a 'rktimer' node in the device treee for the ARM64 rk3399 SoC.

Signed-off-by: Huang Tao <huangtao@rock-chips.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Heiko Stuebner <heiko@sntech.de>
Tested-by: Jianqun Xu <jay.xu@rock-chips.com>
Signed-off-by: Caesar Wang <wxt@rock-chips.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm64/boot/dts/rockchip/rk3399.dtsi | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index 46f325a143b0..f0c0d7665102 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -492,6 +492,14 @@
 		interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
 	};
 
+	rktimer: rktimer@ff850000 {
+		compatible = "rockchip,rk3399-timer";
+		reg = <0x0 0xff850000 0x0 0x1000>;
+		interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cru PCLK_TIMER0>, <&cru SCLK_TIMER00>;
+		clock-names = "pclk", "timer";
+	};
+
 	spdif: spdif@ff870000 {
 		compatible = "rockchip,rk3399-spdif";
 		reg = <0x0 0xff870000 0x0 0x1000>;
-- 
cgit v1.2.3


From 0586421746ef2bc33898d2d7f3dbb0eec6b234c3 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Tue, 7 Jun 2016 00:03:34 +0200
Subject: clocksource/drivers/microblaze: Convert init function to return error

The init functions do not return any error. They behave as the following:

  - panic, thus leading to a kernel crash while another timer may work and
       make the system boot up correctly

  or

  - print an error and let the caller unaware if the state of the system

Change that by converting the init functions to return an error conforming
to the CLOCKSOURCE_OF_RET prototype.

Proper error handling (rollback, errno value) will be changed later case
by case, thus this change just return back an error or success in the init
function.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/microblaze/kernel/timer.c | 51 +++++++++++++++++++++++++++++++-----------
 1 file changed, 38 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index 67e2ef48d2d0..7f35e7b50f1b 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -170,7 +170,7 @@ static struct irqaction timer_irqaction = {
 	.dev_id = &clockevent_xilinx_timer,
 };
 
-static __init void xilinx_clockevent_init(void)
+static __init int xilinx_clockevent_init(void)
 {
 	clockevent_xilinx_timer.mult =
 		div_sc(timer_clock_freq, NSEC_PER_SEC,
@@ -181,6 +181,8 @@ static __init void xilinx_clockevent_init(void)
 		clockevent_delta2ns(1, &clockevent_xilinx_timer);
 	clockevent_xilinx_timer.cpumask = cpumask_of(0);
 	clockevents_register_device(&clockevent_xilinx_timer);
+
+	return 0;
 }
 
 static u64 xilinx_clock_read(void)
@@ -229,8 +231,14 @@ static struct clocksource clocksource_microblaze = {
 
 static int __init xilinx_clocksource_init(void)
 {
-	if (clocksource_register_hz(&clocksource_microblaze, timer_clock_freq))
-		panic("failed to register clocksource");
+	int ret;
+
+	ret = clocksource_register_hz(&clocksource_microblaze,
+				      timer_clock_freq);
+	if (ret) {
+		pr_err("failed to register clocksource");
+		return ret;
+	}
 
 	/* stop timer1 */
 	write_fn(read_fn(timer_baseaddr + TCSR1) & ~TCSR_ENT,
@@ -239,16 +247,16 @@ static int __init xilinx_clocksource_init(void)
 	write_fn(TCSR_TINT|TCSR_ENT|TCSR_ARHT, timer_baseaddr + TCSR1);
 
 	/* register timecounter - for ftrace support */
-	init_xilinx_timecounter();
-	return 0;
+	return init_xilinx_timecounter();
 }
 
-static void __init xilinx_timer_init(struct device_node *timer)
+static int __init xilinx_timer_init(struct device_node *timer)
 {
 	struct clk *clk;
 	static int initialized;
 	u32 irq;
 	u32 timer_num = 1;
+	int ret;
 
 	if (initialized)
 		return;
@@ -258,7 +266,7 @@ static void __init xilinx_timer_init(struct device_node *timer)
 	timer_baseaddr = of_iomap(timer, 0);
 	if (!timer_baseaddr) {
 		pr_err("ERROR: invalid timer base address\n");
-		BUG();
+		return -ENXIO;
 	}
 
 	write_fn = timer_write32;
@@ -271,11 +279,15 @@ static void __init xilinx_timer_init(struct device_node *timer)
 	}
 
 	irq = irq_of_parse_and_map(timer, 0);
+	if (irq <= 0) {
+		pr_err("Failed to parse and map irq");
+		return -EINVAL;
+	}
 
 	of_property_read_u32(timer, "xlnx,one-timer-only", &timer_num);
 	if (timer_num) {
-		pr_emerg("Please enable two timers in HW\n");
-		BUG();
+		pr_err("Please enable two timers in HW\n");
+		return -EINVAL;
 	}
 
 	pr_info("%s: irq=%d\n", timer->full_name, irq);
@@ -297,15 +309,28 @@ static void __init xilinx_timer_init(struct device_node *timer)
 
 	freq_div_hz = timer_clock_freq / HZ;
 
-	setup_irq(irq, &timer_irqaction);
+	ret = setup_irq(irq, &timer_irqaction);
+	if (ret) {
+		pr_err("Failed to setup IRQ");
+		return ret;
+	}
+
 #ifdef CONFIG_HEART_BEAT
 	microblaze_setup_heartbeat();
 #endif
-	xilinx_clocksource_init();
-	xilinx_clockevent_init();
+
+	ret = xilinx_clocksource_init();
+	if (ret)
+		return ret;
+
+	ret = xilinx_clockevent_init();
+	if (ret)
+		return ret;
 
 	sched_clock_register(xilinx_clock_read, 32, timer_clock_freq);
+
+	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE(xilinx_timer, "xlnx,xps-timer-1.00.a",
+CLOCKSOURCE_OF_DECLARE_RET(xilinx_timer, "xlnx,xps-timer-1.00.a",
 		       xilinx_timer_init);
-- 
cgit v1.2.3


From 2712616fed84ddf60c788269d39bb26eb6779945 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Tue, 7 Jun 2016 00:03:49 +0200
Subject: clocksource/drivers/ralink: Convert init function to return error

The init functions do not return any error. They behave as the following:

  - panic, thus leading to a kernel crash while another timer may work and
       make the system boot up correctly

  or

  - print an error and let the caller unaware if the state of the system

Change that by converting the init functions to return an error conforming
to the CLOCKSOURCE_OF_RET prototype.

Proper error handling (rollback, errno value) will be changed later case
by case, thus this change just return back an error or success in the init
function.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/mips/ralink/cevt-rt3352.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/ralink/cevt-rt3352.c b/arch/mips/ralink/cevt-rt3352.c
index 3ad0b0794f7d..f2d3c7908626 100644
--- a/arch/mips/ralink/cevt-rt3352.c
+++ b/arch/mips/ralink/cevt-rt3352.c
@@ -117,11 +117,13 @@ static int systick_set_oneshot(struct clock_event_device *evt)
 	return 0;
 }
 
-static void __init ralink_systick_init(struct device_node *np)
+static int __init ralink_systick_init(struct device_node *np)
 {
+	int ret;
+
 	systick.membase = of_iomap(np, 0);
 	if (!systick.membase)
-		return;
+		return -ENXIO;
 
 	systick_irqaction.name = np->name;
 	systick.dev.name = np->name;
@@ -131,16 +133,21 @@ static void __init ralink_systick_init(struct device_node *np)
 	systick.dev.irq = irq_of_parse_and_map(np, 0);
 	if (!systick.dev.irq) {
 		pr_err("%s: request_irq failed", np->name);
-		return;
+		return -EINVAL;
 	}
 
-	clocksource_mmio_init(systick.membase + SYSTICK_COUNT, np->name,
-			SYSTICK_FREQ, 301, 16, clocksource_mmio_readl_up);
+	ret = clocksource_mmio_init(systick.membase + SYSTICK_COUNT, np->name,
+				    SYSTICK_FREQ, 301, 16,
+				    clocksource_mmio_readl_up);
+	if (ret)
+		return ret;
 
 	clockevents_register_device(&systick.dev);
 
 	pr_info("%s: running - mult: %d, shift: %d\n",
 			np->name, systick.dev.mult, systick.dev.shift);
+
+	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE(systick, "ralink,cevt-systick", ralink_systick_init);
+CLOCKSOURCE_OF_DECLARE_RET(systick, "ralink,cevt-systick", ralink_systick_init);
-- 
cgit v1.2.3


From dd1364a7439be4d20f87637a72eb7bd4553827f0 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Tue, 7 Jun 2016 00:04:02 +0200
Subject: clocksource/drivers/nios2: Convert init function to return error

The init functions do not return any error. They behave as the following:

  - panic, thus leading to a kernel crash while another timer may work and
       make the system boot up correctly

  or

  - print an error and let the caller unaware if the state of the system

Change that by converting the init functions to return an error conforming
to the CLOCKSOURCE_OF_RET prototype.

Proper error handling (rollback, errno value) will be changed later case
by case, thus this change just return back an error or success in the init
function.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/nios2/kernel/time.c | 65 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c
index e835dda2bfe2..b75e40e17963 100644
--- a/arch/nios2/kernel/time.c
+++ b/arch/nios2/kernel/time.c
@@ -206,15 +206,21 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static void __init nios2_timer_get_base_and_freq(struct device_node *np,
+static int __init nios2_timer_get_base_and_freq(struct device_node *np,
 				void __iomem **base, u32 *freq)
 {
 	*base = of_iomap(np, 0);
-	if (!*base)
-		panic("Unable to map reg for %s\n", np->name);
+	if (!*base) {
+		pr_crit("Unable to map reg for %s\n", np->name);
+		return -ENXIO;
+	}
+
+	if (of_property_read_u32(np, "clock-frequency", freq)) {
+		pr_crit("Unable to get %s clock frequency\n", np->name);
+		return -EINVAL;
+	}
 
-	if (of_property_read_u32(np, "clock-frequency", freq))
-		panic("Unable to get %s clock frequency\n", np->name);
+	return 0;
 }
 
 static struct nios2_clockevent_dev nios2_ce = {
@@ -231,17 +237,21 @@ static struct nios2_clockevent_dev nios2_ce = {
 	},
 };
 
-static __init void nios2_clockevent_init(struct device_node *timer)
+static __init int nios2_clockevent_init(struct device_node *timer)
 {
 	void __iomem *iobase;
 	u32 freq;
-	int irq;
+	int irq, ret;
 
-	nios2_timer_get_base_and_freq(timer, &iobase, &freq);
+	ret = nios2_timer_get_base_and_freq(timer, &iobase, &freq);
+	if (ret)
+		return ret;
 
 	irq = irq_of_parse_and_map(timer, 0);
-	if (!irq)
-		panic("Unable to parse timer irq\n");
+	if (!irq) {
+		pr_crit("Unable to parse timer irq\n");
+		return -EINVAL;
+	}
 
 	nios2_ce.timer.base = iobase;
 	nios2_ce.timer.freq = freq;
@@ -253,25 +263,35 @@ static __init void nios2_clockevent_init(struct device_node *timer)
 	/* clear pending interrupt */
 	timer_writew(&nios2_ce.timer, 0, ALTERA_TIMER_STATUS_REG);
 
-	if (request_irq(irq, timer_interrupt, IRQF_TIMER, timer->name,
-		&nios2_ce.ced))
-		panic("Unable to setup timer irq\n");
+	ret = request_irq(irq, timer_interrupt, IRQF_TIMER, timer->name,
+			  &nios2_ce.ced);
+	if (ret) {
+		pr_crit("Unable to setup timer irq\n");
+		return ret;
+	}
 
 	clockevents_config_and_register(&nios2_ce.ced, freq, 1, ULONG_MAX);
+
+	return 0;
 }
 
-static __init void nios2_clocksource_init(struct device_node *timer)
+static __init int nios2_clocksource_init(struct device_node *timer)
 {
 	unsigned int ctrl;
 	void __iomem *iobase;
 	u32 freq;
+	int ret;
 
-	nios2_timer_get_base_and_freq(timer, &iobase, &freq);
+	ret = nios2_timer_get_base_and_freq(timer, &iobase, &freq);
+	if (ret)
+		return ret;
 
 	nios2_cs.timer.base = iobase;
 	nios2_cs.timer.freq = freq;
 
-	clocksource_register_hz(&nios2_cs.cs, freq);
+	ret = clocksource_register_hz(&nios2_cs.cs, freq);
+	if (ret)
+		return ret;
 
 	timer_writew(&nios2_cs.timer, USHRT_MAX, ALTERA_TIMER_PERIODL_REG);
 	timer_writew(&nios2_cs.timer, USHRT_MAX, ALTERA_TIMER_PERIODH_REG);
@@ -282,6 +302,8 @@ static __init void nios2_clocksource_init(struct device_node *timer)
 
 	/* Calibrate the delay loop directly */
 	lpj_fine = freq / HZ;
+
+	return 0;
 }
 
 /*
@@ -289,22 +311,25 @@ static __init void nios2_clocksource_init(struct device_node *timer)
  * more instances, the second one gets used as clocksource and all
  * others are unused.
 */
-static void __init nios2_time_init(struct device_node *timer)
+static int __init nios2_time_init(struct device_node *timer)
 {
 	static int num_called;
+	int ret;
 
 	switch (num_called) {
 	case 0:
-		nios2_clockevent_init(timer);
+		ret = nios2_clockevent_init(timer);
 		break;
 	case 1:
-		nios2_clocksource_init(timer);
+		ret = nios2_clocksource_init(timer);
 		break;
 	default:
 		break;
 	}
 
 	num_called++;
+
+	return ret;
 }
 
 void read_persistent_clock(struct timespec *ts)
@@ -327,4 +352,4 @@ void __init time_init(void)
 	clocksource_probe();
 }
 
-CLOCKSOURCE_OF_DECLARE(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init);
+CLOCKSOURCE_OF_DECLARE_RET(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init);
-- 
cgit v1.2.3


From dcbc0eddcbbf441fdcf0eb4c2e9c1716ac235972 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Tue, 7 Jun 2016 00:03:11 +0200
Subject: clocksource/drivers/smp_twd: Convert init function to return error

The init functions do not return any error. They behave as the following:

  - panic, thus leading to a kernel crash while another timer may work and
       make the system boot up correctly

  or

  - print an error and let the caller unaware if the state of the system

Change that by converting the init functions to return an error conforming
to the CLOCKSOURCE_OF_RET prototype.

Proper error handling (rollback, errno value) will be changed later case
by case, thus this change just return back an error or success in the init
function.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/kernel/smp_twd.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index 1bfa7a7f5533..2b24be41d9cc 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -390,7 +390,7 @@ int __init twd_local_timer_register(struct twd_local_timer *tlt)
 }
 
 #ifdef CONFIG_OF
-static void __init twd_local_timer_of_register(struct device_node *np)
+static int __init twd_local_timer_of_register(struct device_node *np)
 {
 	int err;
 
@@ -410,8 +410,9 @@ static void __init twd_local_timer_of_register(struct device_node *np)
 
 out:
 	WARN(err, "twd_local_timer_of_register failed (%d)\n", err);
+	return err;
 }
-CLOCKSOURCE_OF_DECLARE(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register);
-CLOCKSOURCE_OF_DECLARE(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register);
-CLOCKSOURCE_OF_DECLARE(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register);
+CLOCKSOURCE_OF_DECLARE_RET(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register);
+CLOCKSOURCE_OF_DECLARE_RET(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register);
+CLOCKSOURCE_OF_DECLARE_RET(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register);
 #endif
-- 
cgit v1.2.3


From 43d7560494a264a34e8bb5257ef43b0be6134dac Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Wed, 15 Jun 2016 14:50:12 +0200
Subject: clocksource/drivers/arc: Convert init function to return error

The init functions do not return any error. They behave as the following:

  - panic, thus leading to a kernel crash while another timer may work and
       make the system boot up correctly

  or

  - print an error and let the caller unaware if the state of the system

Change that by converting the init functions to return an error conforming
to the CLOCKSOURCE_OF_RET prototype.

Proper error handling (rollback, errno value) will be changed later case
by case, thus this change just return back an error or success in the init
function.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arc/kernel/time.c | 69 ++++++++++++++++++++++++++++++--------------------
 1 file changed, 42 insertions(+), 27 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index 4549ab255dd1..09de669f4ff0 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -116,21 +116,21 @@ static struct clocksource arc_counter_gfrc = {
 	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static void __init arc_cs_setup_gfrc(struct device_node *node)
+static int __init arc_cs_setup_gfrc(struct device_node *node)
 {
 	int exists = cpuinfo_arc700[0].extn.gfrc;
 	int ret;
 
 	if (WARN(!exists, "Global-64-bit-Ctr clocksource not detected"))
-		return;
+		return -ENXIO;
 
 	ret = arc_get_timer_clk(node);
 	if (ret)
-		return;
+		return ret;
 
-	clocksource_register_hz(&arc_counter_gfrc, arc_timer_freq);
+	return clocksource_register_hz(&arc_counter_gfrc, arc_timer_freq);
 }
-CLOCKSOURCE_OF_DECLARE(arc_gfrc, "snps,archs-timer-gfrc", arc_cs_setup_gfrc);
+CLOCKSOURCE_OF_DECLARE_RET(arc_gfrc, "snps,archs-timer-gfrc", arc_cs_setup_gfrc);
 
 #endif
 
@@ -172,27 +172,27 @@ static struct clocksource arc_counter_rtc = {
 	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static void __init arc_cs_setup_rtc(struct device_node *node)
+static int __init arc_cs_setup_rtc(struct device_node *node)
 {
 	int exists = cpuinfo_arc700[smp_processor_id()].extn.rtc;
 	int ret;
 
 	if (WARN(!exists, "Local-64-bit-Ctr clocksource not detected"))
-		return;
+		return -ENXIO;
 
 	/* Local to CPU hence not usable in SMP */
 	if (WARN(IS_ENABLED(CONFIG_SMP), "Local-64-bit-Ctr not usable in SMP"))
-		return;
+		return -EINVAL;
 
 	ret = arc_get_timer_clk(node);
 	if (ret)
-		return;
+		return ret;
 
 	write_aux_reg(AUX_RTC_CTRL, 1);
 
-	clocksource_register_hz(&arc_counter_rtc, arc_timer_freq);
+	return clocksource_register_hz(&arc_counter_rtc, arc_timer_freq);
 }
-CLOCKSOURCE_OF_DECLARE(arc_rtc, "snps,archs-timer-rtc", arc_cs_setup_rtc);
+CLOCKSOURCE_OF_DECLARE_RET(arc_rtc, "snps,archs-timer-rtc", arc_cs_setup_rtc);
 
 #endif
 
@@ -213,23 +213,23 @@ static struct clocksource arc_counter_timer1 = {
 	.flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static void __init arc_cs_setup_timer1(struct device_node *node)
+static int __init arc_cs_setup_timer1(struct device_node *node)
 {
 	int ret;
 
 	/* Local to CPU hence not usable in SMP */
 	if (IS_ENABLED(CONFIG_SMP))
-		return;
+		return -EINVAL;
 
 	ret = arc_get_timer_clk(node);
 	if (ret)
-		return;
+		return ret;
 
 	write_aux_reg(ARC_REG_TIMER1_LIMIT, ARC_TIMER_MAX);
 	write_aux_reg(ARC_REG_TIMER1_CNT, 0);
 	write_aux_reg(ARC_REG_TIMER1_CTRL, TIMER_CTRL_NH);
 
-	clocksource_register_hz(&arc_counter_timer1, arc_timer_freq);
+	return clocksource_register_hz(&arc_counter_timer1, arc_timer_freq);
 }
 
 /********** Clock Event Device *********/
@@ -324,20 +324,28 @@ static struct notifier_block arc_timer_cpu_nb = {
 /*
  * clockevent setup for boot CPU
  */
-static void __init arc_clockevent_setup(struct device_node *node)
+static int __init arc_clockevent_setup(struct device_node *node)
 {
 	struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device);
 	int ret;
 
-	register_cpu_notifier(&arc_timer_cpu_nb);
+	ret = register_cpu_notifier(&arc_timer_cpu_nb);
+	if (ret) {
+		pr_err("Failed to register cpu notifier");
+		return ret;
+	}
 
 	arc_timer_irq = irq_of_parse_and_map(node, 0);
-	if (arc_timer_irq <= 0)
-		panic("clockevent: missing irq");
+	if (arc_timer_irq <= 0) {
+		pr_err("clockevent: missing irq");
+		return -EINVAL;
+	}
 
 	ret = arc_get_timer_clk(node);
-	if (ret)
-		panic("clockevent: missing clk");
+	if (ret) {
+		pr_err("clockevent: missing clk");
+		return ret;
+	}
 
 	evt->irq = arc_timer_irq;
 	evt->cpumask = cpumask_of(smp_processor_id());
@@ -347,24 +355,31 @@ static void __init arc_clockevent_setup(struct device_node *node)
 	/* Needs apriori irq_set_percpu_devid() done in intc map function */
 	ret = request_percpu_irq(arc_timer_irq, timer_irq_handler,
 				 "Timer0 (per-cpu-tick)", evt);
-	if (ret)
-		panic("clockevent: unable to request irq\n");
+	if (ret) {
+		pr_err("clockevent: unable to request irq\n");
+		return ret;
+	}
 
 	enable_percpu_irq(arc_timer_irq, 0);
+
+	return 0;
 }
 
-static void __init arc_of_timer_init(struct device_node *np)
+static int __init arc_of_timer_init(struct device_node *np)
 {
 	static int init_count = 0;
+	int ret;
 
 	if (!init_count) {
 		init_count = 1;
-		arc_clockevent_setup(np);
+		ret = arc_clockevent_setup(np);
 	} else {
-		arc_cs_setup_timer1(np);
+		ret = arc_cs_setup_timer1(np);
 	}
+
+	return ret;
 }
-CLOCKSOURCE_OF_DECLARE(arc_clkevt, "snps,arc-timer", arc_of_timer_init);
+CLOCKSOURCE_OF_DECLARE_RET(arc_clkevt, "snps,arc-timer", arc_of_timer_init);
 
 /*
  * Called from start_kernel() - boot CPU only
-- 
cgit v1.2.3


From 177cf6e52b0a1a382b9892d3cc9aafd6e7c5943f Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Tue, 7 Jun 2016 00:27:44 +0200
Subject: clocksources: Switch back to the clksrc table

All the clocksource drivers's init function are now converted to return
an error code. CLOCKSOURCE_OF_DECLARE is no longer used as well as the
clksrc-of table.

Let's convert back the names:
 - CLOCKSOURCE_OF_DECLARE_RET => CLOCKSOURCE_OF_DECLARE
 - clksrc-of-ret              => clksrc-of

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

For exynos_mct and samsung_pwm_timer:
Acked-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>

For arch/arc:
Acked-by: Vineet Gupta <vgupta@synopsys.com>

For mediatek driver:
Acked-by: Matthias Brugger <matthias.bgg@gmail.com>

For the Rockchip-part
Acked-by: Heiko Stuebner <heiko@sntech.de>

For STi :
Acked-by: Patrice Chotard <patrice.chotard@st.com>

For the mps2-timer.c and versatile.c changes:
Acked-by: Liviu Dudau <Liviu.Dudau@arm.com>

For the OXNAS part :
Acked-by: Neil Armstrong <narmstrong@baylibre.com>

For LPC32xx driver:
Acked-by: Sylvain Lemieux <slemieux.tyco@gmail.com>

For Broadcom Kona timer change:
Acked-by: Ray Jui <ray.jui@broadcom.com>

For Sun4i and Sun5i:
Acked-by: Chen-Yu Tsai <wens@csie.org>

For Meson6:
Acked-by: Carlo Caione <carlo@caione.org>

For Keystone:
Acked-by: Santosh Shilimkar <ssantosh@kernel.org>

For NPS:
Acked-by: Noam Camus <noamca@mellanox.com>

For bcm2835:
Acked-by: Eric Anholt <eric@anholt.net>
---
 arch/arc/kernel/time.c                    |  6 +++---
 arch/arm/kernel/smp_twd.c                 |  6 +++---
 arch/microblaze/kernel/timer.c            |  2 +-
 arch/mips/ralink/cevt-rt3352.c            |  2 +-
 arch/nios2/kernel/time.c                  |  2 +-
 drivers/clocksource/arm_arch_timer.c      |  6 +++---
 drivers/clocksource/arm_global_timer.c    |  2 +-
 drivers/clocksource/armv7m_systick.c      |  2 +-
 drivers/clocksource/asm9260_timer.c       |  2 +-
 drivers/clocksource/bcm2835_timer.c       |  2 +-
 drivers/clocksource/bcm_kona_timer.c      |  4 ++--
 drivers/clocksource/cadence_ttc_timer.c   |  2 +-
 drivers/clocksource/clksrc-dbx500-prcmu.c |  2 +-
 drivers/clocksource/clksrc-probe.c        | 14 --------------
 drivers/clocksource/clksrc_st_lpc.c       |  2 +-
 drivers/clocksource/clps711x-timer.c      |  2 +-
 drivers/clocksource/dw_apb_timer_of.c     |  8 ++++----
 drivers/clocksource/exynos_mct.c          |  4 ++--
 drivers/clocksource/fsl_ftm_timer.c       |  2 +-
 drivers/clocksource/h8300_timer16.c       |  2 +-
 drivers/clocksource/h8300_timer8.c        |  2 +-
 drivers/clocksource/h8300_tpu.c           |  2 +-
 drivers/clocksource/meson6_timer.c        |  2 +-
 drivers/clocksource/mips-gic-timer.c      |  2 +-
 drivers/clocksource/moxart_timer.c        |  2 +-
 drivers/clocksource/mps2-timer.c          |  2 +-
 drivers/clocksource/mtk_timer.c           |  2 +-
 drivers/clocksource/mxs_timer.c           |  2 +-
 drivers/clocksource/nomadik-mtu.c         |  2 +-
 drivers/clocksource/pxa_timer.c           |  2 +-
 drivers/clocksource/qcom-timer.c          |  4 ++--
 drivers/clocksource/rockchip_timer.c      |  8 ++++----
 drivers/clocksource/samsung_pwm_timer.c   |  8 ++++----
 drivers/clocksource/sun4i_timer.c         |  2 +-
 drivers/clocksource/tango_xtal.c          |  2 +-
 drivers/clocksource/tegra20_timer.c       |  4 ++--
 drivers/clocksource/time-armada-370-xp.c  |  6 +++---
 drivers/clocksource/time-efm32.c          |  4 ++--
 drivers/clocksource/time-lpc32xx.c        |  2 +-
 drivers/clocksource/time-orion.c          |  2 +-
 drivers/clocksource/time-pistachio.c      |  2 +-
 drivers/clocksource/timer-atlas7.c        |  2 +-
 drivers/clocksource/timer-atmel-pit.c     |  2 +-
 drivers/clocksource/timer-atmel-st.c      |  2 +-
 drivers/clocksource/timer-digicolor.c     |  2 +-
 drivers/clocksource/timer-imx-gpt.c       | 24 ++++++++++++------------
 drivers/clocksource/timer-integrator-ap.c |  2 +-
 drivers/clocksource/timer-keystone.c      |  2 +-
 drivers/clocksource/timer-nps.c           |  4 ++--
 drivers/clocksource/timer-oxnas-rps.c     |  4 ++--
 drivers/clocksource/timer-prima2.c        |  2 +-
 drivers/clocksource/timer-sp804.c         |  4 ++--
 drivers/clocksource/timer-stm32.c         |  2 +-
 drivers/clocksource/timer-sun5i.c         |  4 ++--
 drivers/clocksource/timer-ti-32k.c        |  2 +-
 drivers/clocksource/timer-u300.c          |  2 +-
 drivers/clocksource/versatile.c           |  4 ++--
 drivers/clocksource/vf_pit_timer.c        |  2 +-
 drivers/clocksource/vt8500_timer.c        |  2 +-
 drivers/clocksource/zevio-timer.c         |  2 +-
 include/asm-generic/vmlinux.lds.h         |  2 --
 include/linux/clocksource.h               |  5 +----
 62 files changed, 98 insertions(+), 117 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c
index 09de669f4ff0..98f22d2eb563 100644
--- a/arch/arc/kernel/time.c
+++ b/arch/arc/kernel/time.c
@@ -130,7 +130,7 @@ static int __init arc_cs_setup_gfrc(struct device_node *node)
 
 	return clocksource_register_hz(&arc_counter_gfrc, arc_timer_freq);
 }
-CLOCKSOURCE_OF_DECLARE_RET(arc_gfrc, "snps,archs-timer-gfrc", arc_cs_setup_gfrc);
+CLOCKSOURCE_OF_DECLARE(arc_gfrc, "snps,archs-timer-gfrc", arc_cs_setup_gfrc);
 
 #endif
 
@@ -192,7 +192,7 @@ static int __init arc_cs_setup_rtc(struct device_node *node)
 
 	return clocksource_register_hz(&arc_counter_rtc, arc_timer_freq);
 }
-CLOCKSOURCE_OF_DECLARE_RET(arc_rtc, "snps,archs-timer-rtc", arc_cs_setup_rtc);
+CLOCKSOURCE_OF_DECLARE(arc_rtc, "snps,archs-timer-rtc", arc_cs_setup_rtc);
 
 #endif
 
@@ -379,7 +379,7 @@ static int __init arc_of_timer_init(struct device_node *np)
 
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE_RET(arc_clkevt, "snps,arc-timer", arc_of_timer_init);
+CLOCKSOURCE_OF_DECLARE(arc_clkevt, "snps,arc-timer", arc_of_timer_init);
 
 /*
  * Called from start_kernel() - boot CPU only
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index 2b24be41d9cc..b6ec65e68009 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -412,7 +412,7 @@ out:
 	WARN(err, "twd_local_timer_of_register failed (%d)\n", err);
 	return err;
 }
-CLOCKSOURCE_OF_DECLARE_RET(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register);
-CLOCKSOURCE_OF_DECLARE_RET(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register);
-CLOCKSOURCE_OF_DECLARE_RET(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register);
+CLOCKSOURCE_OF_DECLARE(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register);
+CLOCKSOURCE_OF_DECLARE(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register);
+CLOCKSOURCE_OF_DECLARE(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register);
 #endif
diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index 7f35e7b50f1b..5bbf38b916ef 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -332,5 +332,5 @@ static int __init xilinx_timer_init(struct device_node *timer)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(xilinx_timer, "xlnx,xps-timer-1.00.a",
+CLOCKSOURCE_OF_DECLARE(xilinx_timer, "xlnx,xps-timer-1.00.a",
 		       xilinx_timer_init);
diff --git a/arch/mips/ralink/cevt-rt3352.c b/arch/mips/ralink/cevt-rt3352.c
index f2d3c7908626..f24eee04e16a 100644
--- a/arch/mips/ralink/cevt-rt3352.c
+++ b/arch/mips/ralink/cevt-rt3352.c
@@ -150,4 +150,4 @@ static int __init ralink_systick_init(struct device_node *np)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(systick, "ralink,cevt-systick", ralink_systick_init);
+CLOCKSOURCE_OF_DECLARE(systick, "ralink,cevt-systick", ralink_systick_init);
diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c
index b75e40e17963..d9563ddb337e 100644
--- a/arch/nios2/kernel/time.c
+++ b/arch/nios2/kernel/time.c
@@ -352,4 +352,4 @@ void __init time_init(void)
 	clocksource_probe();
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init);
+CLOCKSOURCE_OF_DECLARE(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init);
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index d0cda68e2c41..9e33309ad2ea 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -784,8 +784,8 @@ static int __init arch_timer_of_init(struct device_node *np)
 
 	return arch_timer_init();
 }
-CLOCKSOURCE_OF_DECLARE_RET(armv7_arch_timer, "arm,armv7-timer", arch_timer_of_init);
-CLOCKSOURCE_OF_DECLARE_RET(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init);
+CLOCKSOURCE_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_of_init);
+CLOCKSOURCE_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init);
 
 static int __init arch_timer_mem_init(struct device_node *np)
 {
@@ -868,7 +868,7 @@ out:
 	of_node_put(best_frame);
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE_RET(armv7_arch_timer_mem, "arm,armv7-timer-mem",
+CLOCKSOURCE_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem",
 		       arch_timer_mem_init);
 
 #ifdef CONFIG_ACPI
diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c
index 40104fc93f24..2a9ceb6e93f9 100644
--- a/drivers/clocksource/arm_global_timer.c
+++ b/drivers/clocksource/arm_global_timer.c
@@ -358,5 +358,5 @@ out_unmap:
 }
 
 /* Only tested on r2p2 and r3p0  */
-CLOCKSOURCE_OF_DECLARE_RET(arm_gt, "arm,cortex-a9-global-timer",
+CLOCKSOURCE_OF_DECLARE(arm_gt, "arm,cortex-a9-global-timer",
 			global_timer_of_register);
diff --git a/drivers/clocksource/armv7m_systick.c b/drivers/clocksource/armv7m_systick.c
index 2b55410edaf6..e93af1f6a36c 100644
--- a/drivers/clocksource/armv7m_systick.c
+++ b/drivers/clocksource/armv7m_systick.c
@@ -81,5 +81,5 @@ out_unmap:
 	return ret;
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(arm_systick, "arm,armv7m-systick",
+CLOCKSOURCE_OF_DECLARE(arm_systick, "arm,armv7m-systick",
 			system_timer_of_register);
diff --git a/drivers/clocksource/asm9260_timer.c b/drivers/clocksource/asm9260_timer.c
index d113c0275d06..1ba871b7fe11 100644
--- a/drivers/clocksource/asm9260_timer.c
+++ b/drivers/clocksource/asm9260_timer.c
@@ -238,5 +238,5 @@ static int __init asm9260_timer_init(struct device_node *np)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE_RET(asm9260_timer, "alphascale,asm9260-timer",
+CLOCKSOURCE_OF_DECLARE(asm9260_timer, "alphascale,asm9260-timer",
 		asm9260_timer_init);
diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c
index 2dcf896b5381..e71acf231c89 100644
--- a/drivers/clocksource/bcm2835_timer.c
+++ b/drivers/clocksource/bcm2835_timer.c
@@ -142,5 +142,5 @@ static int __init bcm2835_timer_init(struct device_node *node)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE_RET(bcm2835, "brcm,bcm2835-system-timer",
+CLOCKSOURCE_OF_DECLARE(bcm2835, "brcm,bcm2835-system-timer",
 			bcm2835_timer_init);
diff --git a/drivers/clocksource/bcm_kona_timer.c b/drivers/clocksource/bcm_kona_timer.c
index c251aa68993f..86f87be3d80f 100644
--- a/drivers/clocksource/bcm_kona_timer.c
+++ b/drivers/clocksource/bcm_kona_timer.c
@@ -195,9 +195,9 @@ static int __init kona_timer_init(struct device_node *node)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(brcm_kona, "brcm,kona-timer", kona_timer_init);
+CLOCKSOURCE_OF_DECLARE(brcm_kona, "brcm,kona-timer", kona_timer_init);
 /*
  * bcm,kona-timer is deprecated by brcm,kona-timer
  * being kept here for driver compatibility
  */
-CLOCKSOURCE_OF_DECLARE_RET(bcm_kona, "bcm,kona-timer", kona_timer_init);
+CLOCKSOURCE_OF_DECLARE(bcm_kona, "bcm,kona-timer", kona_timer_init);
diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c
index e2e76311dde2..388a77bdc39a 100644
--- a/drivers/clocksource/cadence_ttc_timer.c
+++ b/drivers/clocksource/cadence_ttc_timer.c
@@ -539,4 +539,4 @@ static int __init ttc_timer_init(struct device_node *timer)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(ttc, "cdns,ttc", ttc_timer_init);
+CLOCKSOURCE_OF_DECLARE(ttc, "cdns,ttc", ttc_timer_init);
diff --git a/drivers/clocksource/clksrc-dbx500-prcmu.c b/drivers/clocksource/clksrc-dbx500-prcmu.c
index 5a59d2953388..77a365f573d7 100644
--- a/drivers/clocksource/clksrc-dbx500-prcmu.c
+++ b/drivers/clocksource/clksrc-dbx500-prcmu.c
@@ -86,5 +86,5 @@ static int __init clksrc_dbx500_prcmu_init(struct device_node *node)
 #endif
 	return clocksource_register_hz(&clocksource_dbx500_prcmu, RATE_32K);
 }
-CLOCKSOURCE_OF_DECLARE_RET(dbx500_prcmu, "stericsson,db8500-prcmu-timer-4",
+CLOCKSOURCE_OF_DECLARE(dbx500_prcmu, "stericsson,db8500-prcmu-timer-4",
 		       clksrc_dbx500_prcmu_init);
diff --git a/drivers/clocksource/clksrc-probe.c b/drivers/clocksource/clksrc-probe.c
index 5fa6a555b35c..bc62be97f0a8 100644
--- a/drivers/clocksource/clksrc-probe.c
+++ b/drivers/clocksource/clksrc-probe.c
@@ -20,19 +20,14 @@
 #include <linux/clocksource.h>
 
 extern struct of_device_id __clksrc_of_table[];
-extern struct of_device_id __clksrc_ret_of_table[];
 
 static const struct of_device_id __clksrc_of_table_sentinel
 	__used __section(__clksrc_of_table_end);
 
-static const struct of_device_id __clksrc_ret_of_table_sentinel
-	__used __section(__clksrc_ret_of_table_end);
-
 void __init clocksource_probe(void)
 {
 	struct device_node *np;
 	const struct of_device_id *match;
-	of_init_fn_1 init_func;
 	of_init_fn_1_ret init_func_ret;
 	unsigned clocksources = 0;
 	int ret;
@@ -41,15 +36,6 @@ void __init clocksource_probe(void)
 		if (!of_device_is_available(np))
 			continue;
 
-		init_func = match->data;
-		init_func(np);
-		clocksources++;
-	}
-
-	for_each_matching_node_and_match(np, __clksrc_ret_of_table, &match) {
-		if (!of_device_is_available(np))
-			continue;
-
 		init_func_ret = match->data;
 
 		ret = init_func_ret(np);
diff --git a/drivers/clocksource/clksrc_st_lpc.c b/drivers/clocksource/clksrc_st_lpc.c
index c9022a9eb593..03cc49217bb4 100644
--- a/drivers/clocksource/clksrc_st_lpc.c
+++ b/drivers/clocksource/clksrc_st_lpc.c
@@ -132,4 +132,4 @@ static int __init st_clksrc_of_register(struct device_node *np)
 
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE_RET(ddata, "st,stih407-lpc", st_clksrc_of_register);
+CLOCKSOURCE_OF_DECLARE(ddata, "st,stih407-lpc", st_clksrc_of_register);
diff --git a/drivers/clocksource/clps711x-timer.c b/drivers/clocksource/clps711x-timer.c
index 3b6619808442..84aed78261e4 100644
--- a/drivers/clocksource/clps711x-timer.c
+++ b/drivers/clocksource/clps711x-timer.c
@@ -119,5 +119,5 @@ static int __init clps711x_timer_init(struct device_node *np)
 		return -EINVAL;
 	}
 }
-CLOCKSOURCE_OF_DECLARE_RET(clps711x, "cirrus,clps711x-timer", clps711x_timer_init);
+CLOCKSOURCE_OF_DECLARE(clps711x, "cirrus,clps711x-timer", clps711x_timer_init);
 #endif
diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c
index 4985a2cadad9..aee6c0d39a7c 100644
--- a/drivers/clocksource/dw_apb_timer_of.c
+++ b/drivers/clocksource/dw_apb_timer_of.c
@@ -167,7 +167,7 @@ static int __init dw_apb_timer_init(struct device_node *timer)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE_RET(pc3x2_timer, "picochip,pc3x2-timer", dw_apb_timer_init);
-CLOCKSOURCE_OF_DECLARE_RET(apb_timer_osc, "snps,dw-apb-timer-osc", dw_apb_timer_init);
-CLOCKSOURCE_OF_DECLARE_RET(apb_timer_sp, "snps,dw-apb-timer-sp", dw_apb_timer_init);
-CLOCKSOURCE_OF_DECLARE_RET(apb_timer, "snps,dw-apb-timer", dw_apb_timer_init);
+CLOCKSOURCE_OF_DECLARE(pc3x2_timer, "picochip,pc3x2-timer", dw_apb_timer_init);
+CLOCKSOURCE_OF_DECLARE(apb_timer_osc, "snps,dw-apb-timer-osc", dw_apb_timer_init);
+CLOCKSOURCE_OF_DECLARE(apb_timer_sp, "snps,dw-apb-timer-sp", dw_apb_timer_init);
+CLOCKSOURCE_OF_DECLARE(apb_timer, "snps,dw-apb-timer", dw_apb_timer_init);
diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c
index f6caed0c36ae..0d18dd4b3bd2 100644
--- a/drivers/clocksource/exynos_mct.c
+++ b/drivers/clocksource/exynos_mct.c
@@ -627,5 +627,5 @@ static int __init mct_init_ppi(struct device_node *np)
 {
 	return mct_init_dt(np, MCT_INT_PPI);
 }
-CLOCKSOURCE_OF_DECLARE_RET(exynos4210, "samsung,exynos4210-mct", mct_init_spi);
-CLOCKSOURCE_OF_DECLARE_RET(exynos4412, "samsung,exynos4412-mct", mct_init_ppi);
+CLOCKSOURCE_OF_DECLARE(exynos4210, "samsung,exynos4210-mct", mct_init_spi);
+CLOCKSOURCE_OF_DECLARE(exynos4412, "samsung,exynos4412-mct", mct_init_ppi);
diff --git a/drivers/clocksource/fsl_ftm_timer.c b/drivers/clocksource/fsl_ftm_timer.c
index 9ad4ca32fb2d..738515b89073 100644
--- a/drivers/clocksource/fsl_ftm_timer.c
+++ b/drivers/clocksource/fsl_ftm_timer.c
@@ -369,4 +369,4 @@ err:
 	kfree(priv);
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE_RET(flextimer, "fsl,ftm-timer", ftm_timer_init);
+CLOCKSOURCE_OF_DECLARE(flextimer, "fsl,ftm-timer", ftm_timer_init);
diff --git a/drivers/clocksource/h8300_timer16.c b/drivers/clocksource/h8300_timer16.c
index 9d99fc85ffad..07d9d5be9054 100644
--- a/drivers/clocksource/h8300_timer16.c
+++ b/drivers/clocksource/h8300_timer16.c
@@ -187,5 +187,5 @@ free_clk:
 	return ret;
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(h8300_16bit, "renesas,16bit-timer",
+CLOCKSOURCE_OF_DECLARE(h8300_16bit, "renesas,16bit-timer",
 			   h8300_16timer_init);
diff --git a/drivers/clocksource/h8300_timer8.c b/drivers/clocksource/h8300_timer8.c
index 0292a1907619..546bb180f5a4 100644
--- a/drivers/clocksource/h8300_timer8.c
+++ b/drivers/clocksource/h8300_timer8.c
@@ -215,4 +215,4 @@ free_clk:
 	return ret;
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(h8300_8bit, "renesas,8bit-timer", h8300_8timer_init);
+CLOCKSOURCE_OF_DECLARE(h8300_8bit, "renesas,8bit-timer", h8300_8timer_init);
diff --git a/drivers/clocksource/h8300_tpu.c b/drivers/clocksource/h8300_tpu.c
index 4faf718b30f3..7bdf1991c847 100644
--- a/drivers/clocksource/h8300_tpu.c
+++ b/drivers/clocksource/h8300_tpu.c
@@ -154,4 +154,4 @@ free_clk:
 	return ret;
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(h8300_tpu, "renesas,tpu", h8300_tpu_init);
+CLOCKSOURCE_OF_DECLARE(h8300_tpu, "renesas,tpu", h8300_tpu_init);
diff --git a/drivers/clocksource/meson6_timer.c b/drivers/clocksource/meson6_timer.c
index 3a6e78f62b19..52af591a9fc7 100644
--- a/drivers/clocksource/meson6_timer.c
+++ b/drivers/clocksource/meson6_timer.c
@@ -174,5 +174,5 @@ static int __init meson6_timer_init(struct device_node *node)
 					1, 0xfffe);
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE_RET(meson6, "amlogic,meson6-timer",
+CLOCKSOURCE_OF_DECLARE(meson6, "amlogic,meson6-timer",
 		       meson6_timer_init);
diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c
index b164b8712f39..1572c7a778ab 100644
--- a/drivers/clocksource/mips-gic-timer.c
+++ b/drivers/clocksource/mips-gic-timer.c
@@ -222,5 +222,5 @@ static void __init gic_clocksource_of_init(struct device_node *node)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE_RET(mips_gic_timer, "mti,gic-timer",
+CLOCKSOURCE_OF_DECLARE(mips_gic_timer, "mti,gic-timer",
 		       gic_clocksource_of_init);
diff --git a/drivers/clocksource/moxart_timer.c b/drivers/clocksource/moxart_timer.c
index b9c30cd035bf..841454417acd 100644
--- a/drivers/clocksource/moxart_timer.c
+++ b/drivers/clocksource/moxart_timer.c
@@ -178,4 +178,4 @@ static int __init moxart_timer_init(struct device_node *node)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE_RET(moxart, "moxa,moxart-timer", moxart_timer_init);
+CLOCKSOURCE_OF_DECLARE(moxart, "moxa,moxart-timer", moxart_timer_init);
diff --git a/drivers/clocksource/mps2-timer.c b/drivers/clocksource/mps2-timer.c
index c303fa9ba87f..3e4431ed9aa9 100644
--- a/drivers/clocksource/mps2-timer.c
+++ b/drivers/clocksource/mps2-timer.c
@@ -274,4 +274,4 @@ static int __init mps2_timer_init(struct device_node *np)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(mps2_timer, "arm,mps2-timer", mps2_timer_init);
+CLOCKSOURCE_OF_DECLARE(mps2_timer, "arm,mps2-timer", mps2_timer_init);
diff --git a/drivers/clocksource/mtk_timer.c b/drivers/clocksource/mtk_timer.c
index 432a2c0884a9..90659493c59c 100644
--- a/drivers/clocksource/mtk_timer.c
+++ b/drivers/clocksource/mtk_timer.c
@@ -265,4 +265,4 @@ err_kzalloc:
 
 	return -EINVAL;
 }
-CLOCKSOURCE_OF_DECLARE_RET(mtk_mt6577, "mediatek,mt6577-timer", mtk_timer_init);
+CLOCKSOURCE_OF_DECLARE(mtk_mt6577, "mediatek,mt6577-timer", mtk_timer_init);
diff --git a/drivers/clocksource/mxs_timer.c b/drivers/clocksource/mxs_timer.c
index 17b9d1991178..630a8d3904bb 100644
--- a/drivers/clocksource/mxs_timer.c
+++ b/drivers/clocksource/mxs_timer.c
@@ -295,4 +295,4 @@ static int __init mxs_timer_init(struct device_node *np)
 
 	return setup_irq(irq, &mxs_timer_irq);
 }
-CLOCKSOURCE_OF_DECLARE_RET(mxs, "fsl,timrot", mxs_timer_init);
+CLOCKSOURCE_OF_DECLARE(mxs, "fsl,timrot", mxs_timer_init);
diff --git a/drivers/clocksource/nomadik-mtu.c b/drivers/clocksource/nomadik-mtu.c
index d2be5b3e0318..3c124d1ca600 100644
--- a/drivers/clocksource/nomadik-mtu.c
+++ b/drivers/clocksource/nomadik-mtu.c
@@ -284,5 +284,5 @@ static int __init nmdk_timer_of_init(struct device_node *node)
 
 	return nmdk_timer_init(base, irq, pclk, clk);
 }
-CLOCKSOURCE_OF_DECLARE_RET(nomadik_mtu, "st,nomadik-mtu",
+CLOCKSOURCE_OF_DECLARE(nomadik_mtu, "st,nomadik-mtu",
 		       nmdk_timer_of_init);
diff --git a/drivers/clocksource/pxa_timer.c b/drivers/clocksource/pxa_timer.c
index 59af75cc4c74..937e10b84d58 100644
--- a/drivers/clocksource/pxa_timer.c
+++ b/drivers/clocksource/pxa_timer.c
@@ -213,7 +213,7 @@ static int __init pxa_timer_dt_init(struct device_node *np)
 
 	return pxa_timer_common_init(irq, clk_get_rate(clk));
 }
-CLOCKSOURCE_OF_DECLARE_RET(pxa_timer, "marvell,pxa-timer", pxa_timer_dt_init);
+CLOCKSOURCE_OF_DECLARE(pxa_timer, "marvell,pxa-timer", pxa_timer_dt_init);
 
 /*
  * Legacy timer init for non device-tree boards.
diff --git a/drivers/clocksource/qcom-timer.c b/drivers/clocksource/qcom-timer.c
index 79f73bddc5f4..662576339049 100644
--- a/drivers/clocksource/qcom-timer.c
+++ b/drivers/clocksource/qcom-timer.c
@@ -273,5 +273,5 @@ static int __init msm_dt_timer_init(struct device_node *np)
 
 	return msm_timer_init(freq, 32, irq, !!percpu_offset);
 }
-CLOCKSOURCE_OF_DECLARE_RET(kpss_timer, "qcom,kpss-timer", msm_dt_timer_init);
-CLOCKSOURCE_OF_DECLARE_RET(scss_timer, "qcom,scss-timer", msm_dt_timer_init);
+CLOCKSOURCE_OF_DECLARE(kpss_timer, "qcom,kpss-timer", msm_dt_timer_init);
+CLOCKSOURCE_OF_DECLARE(scss_timer, "qcom,scss-timer", msm_dt_timer_init);
diff --git a/drivers/clocksource/rockchip_timer.c b/drivers/clocksource/rockchip_timer.c
index 85aee6953944..23e267acba25 100644
--- a/drivers/clocksource/rockchip_timer.c
+++ b/drivers/clocksource/rockchip_timer.c
@@ -205,7 +205,7 @@ static int __init rk3399_timer_init(struct device_node *np)
 	return rk_timer_init(np, TIMER_CONTROL_REG3399);
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(rk3288_timer, "rockchip,rk3288-timer",
-			   rk3288_timer_init);
-CLOCKSOURCE_OF_DECLARE_RET(rk3399_timer, "rockchip,rk3399-timer",
-			   rk3399_timer_init);
+CLOCKSOURCE_OF_DECLARE(rk3288_timer, "rockchip,rk3288-timer",
+		       rk3288_timer_init);
+CLOCKSOURCE_OF_DECLARE(rk3399_timer, "rockchip,rk3399-timer",
+		       rk3399_timer_init);
diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c
index 27a9797e8187..54565bd0093b 100644
--- a/drivers/clocksource/samsung_pwm_timer.c
+++ b/drivers/clocksource/samsung_pwm_timer.c
@@ -466,7 +466,7 @@ static int __init s3c2410_pwm_clocksource_init(struct device_node *np)
 {
 	return samsung_pwm_alloc(np, &s3c24xx_variant);
 }
-CLOCKSOURCE_OF_DECLARE_RET(s3c2410_pwm, "samsung,s3c2410-pwm", s3c2410_pwm_clocksource_init);
+CLOCKSOURCE_OF_DECLARE(s3c2410_pwm, "samsung,s3c2410-pwm", s3c2410_pwm_clocksource_init);
 
 static const struct samsung_pwm_variant s3c64xx_variant = {
 	.bits		= 32,
@@ -479,7 +479,7 @@ static int __init s3c64xx_pwm_clocksource_init(struct device_node *np)
 {
 	return samsung_pwm_alloc(np, &s3c64xx_variant);
 }
-CLOCKSOURCE_OF_DECLARE_RET(s3c6400_pwm, "samsung,s3c6400-pwm", s3c64xx_pwm_clocksource_init);
+CLOCKSOURCE_OF_DECLARE(s3c6400_pwm, "samsung,s3c6400-pwm", s3c64xx_pwm_clocksource_init);
 
 static const struct samsung_pwm_variant s5p64x0_variant = {
 	.bits		= 32,
@@ -492,7 +492,7 @@ static int __init s5p64x0_pwm_clocksource_init(struct device_node *np)
 {
 	return samsung_pwm_alloc(np, &s5p64x0_variant);
 }
-CLOCKSOURCE_OF_DECLARE_RET(s5p6440_pwm, "samsung,s5p6440-pwm", s5p64x0_pwm_clocksource_init);
+CLOCKSOURCE_OF_DECLARE(s5p6440_pwm, "samsung,s5p6440-pwm", s5p64x0_pwm_clocksource_init);
 
 static const struct samsung_pwm_variant s5p_variant = {
 	.bits		= 32,
@@ -505,5 +505,5 @@ static int __init s5p_pwm_clocksource_init(struct device_node *np)
 {
 	return samsung_pwm_alloc(np, &s5p_variant);
 }
-CLOCKSOURCE_OF_DECLARE_RET(s5pc100_pwm, "samsung,s5pc100-pwm", s5p_pwm_clocksource_init);
+CLOCKSOURCE_OF_DECLARE(s5pc100_pwm, "samsung,s5pc100-pwm", s5p_pwm_clocksource_init);
 #endif
diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c
index 445373091fe8..97669ee4df2a 100644
--- a/drivers/clocksource/sun4i_timer.c
+++ b/drivers/clocksource/sun4i_timer.c
@@ -226,5 +226,5 @@ static int __init sun4i_timer_init(struct device_node *node)
 
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE_RET(sun4i, "allwinner,sun4i-a10-timer",
+CLOCKSOURCE_OF_DECLARE(sun4i, "allwinner,sun4i-a10-timer",
 		       sun4i_timer_init);
diff --git a/drivers/clocksource/tango_xtal.c b/drivers/clocksource/tango_xtal.c
index 7dc716cc976f..12fcef8cf2d3 100644
--- a/drivers/clocksource/tango_xtal.c
+++ b/drivers/clocksource/tango_xtal.c
@@ -53,4 +53,4 @@ static int __init tango_clocksource_init(struct device_node *np)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(tango, "sigma,tick-counter", tango_clocksource_init);
+CLOCKSOURCE_OF_DECLARE(tango, "sigma,tick-counter", tango_clocksource_init);
diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c
index 543c37e3a62c..f960891aa04e 100644
--- a/drivers/clocksource/tegra20_timer.c
+++ b/drivers/clocksource/tegra20_timer.c
@@ -237,7 +237,7 @@ static int __init tegra20_init_timer(struct device_node *np)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE_RET(tegra20_timer, "nvidia,tegra20-timer", tegra20_init_timer);
+CLOCKSOURCE_OF_DECLARE(tegra20_timer, "nvidia,tegra20-timer", tegra20_init_timer);
 
 static int __init tegra20_init_rtc(struct device_node *np)
 {
@@ -261,4 +261,4 @@ static int __init tegra20_init_rtc(struct device_node *np)
 
 	return register_persistent_clock(NULL, tegra_read_persistent_clock64);
 }
-CLOCKSOURCE_OF_DECLARE_RET(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
+CLOCKSOURCE_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c
index bc4ab48d4048..a4e59239f28e 100644
--- a/drivers/clocksource/time-armada-370-xp.c
+++ b/drivers/clocksource/time-armada-370-xp.c
@@ -371,7 +371,7 @@ static int __init armada_xp_timer_init(struct device_node *np)
 
 	return armada_370_xp_timer_common_init(np);
 }
-CLOCKSOURCE_OF_DECLARE_RET(armada_xp, "marvell,armada-xp-timer",
+CLOCKSOURCE_OF_DECLARE(armada_xp, "marvell,armada-xp-timer",
 		       armada_xp_timer_init);
 
 static int __init armada_375_timer_init(struct device_node *np)
@@ -409,7 +409,7 @@ static int __init armada_375_timer_init(struct device_node *np)
 
 	return armada_370_xp_timer_common_init(np);
 }
-CLOCKSOURCE_OF_DECLARE_RET(armada_375, "marvell,armada-375-timer",
+CLOCKSOURCE_OF_DECLARE(armada_375, "marvell,armada-375-timer",
 		       armada_375_timer_init);
 
 static int __init armada_370_timer_init(struct device_node *np)
@@ -432,5 +432,5 @@ static int __init armada_370_timer_init(struct device_node *np)
 
 	return armada_370_xp_timer_common_init(np);
 }
-CLOCKSOURCE_OF_DECLARE_RET(armada_370, "marvell,armada-370-timer",
+CLOCKSOURCE_OF_DECLARE(armada_370, "marvell,armada-370-timer",
 		       armada_370_timer_init);
diff --git a/drivers/clocksource/time-efm32.c b/drivers/clocksource/time-efm32.c
index 6e2f79fd6ad0..5ac344b383e1 100644
--- a/drivers/clocksource/time-efm32.c
+++ b/drivers/clocksource/time-efm32.c
@@ -283,5 +283,5 @@ static int __init efm32_timer_init(struct device_node *np)
 
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE_RET(efm32compat, "efm32,timer", efm32_timer_init);
-CLOCKSOURCE_OF_DECLARE_RET(efm32, "energymicro,efm32-timer", efm32_timer_init);
+CLOCKSOURCE_OF_DECLARE(efm32compat, "efm32,timer", efm32_timer_init);
+CLOCKSOURCE_OF_DECLARE(efm32, "energymicro,efm32-timer", efm32_timer_init);
diff --git a/drivers/clocksource/time-lpc32xx.c b/drivers/clocksource/time-lpc32xx.c
index cb5b8665ff82..9649cfdb9213 100644
--- a/drivers/clocksource/time-lpc32xx.c
+++ b/drivers/clocksource/time-lpc32xx.c
@@ -311,4 +311,4 @@ static int __init lpc32xx_timer_init(struct device_node *np)
 
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE_RET(lpc32xx_timer, "nxp,lpc3220-timer", lpc32xx_timer_init);
+CLOCKSOURCE_OF_DECLARE(lpc32xx_timer, "nxp,lpc3220-timer", lpc32xx_timer_init);
diff --git a/drivers/clocksource/time-orion.c b/drivers/clocksource/time-orion.c
index 5fdeb5dcc4cf..a28f496e97cf 100644
--- a/drivers/clocksource/time-orion.c
+++ b/drivers/clocksource/time-orion.c
@@ -167,4 +167,4 @@ static int __init orion_timer_init(struct device_node *np)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE_RET(orion_timer, "marvell,orion-timer", orion_timer_init);
+CLOCKSOURCE_OF_DECLARE(orion_timer, "marvell,orion-timer", orion_timer_init);
diff --git a/drivers/clocksource/time-pistachio.c b/drivers/clocksource/time-pistachio.c
index adaaec5e9abd..a7d9a08e4b0e 100644
--- a/drivers/clocksource/time-pistachio.c
+++ b/drivers/clocksource/time-pistachio.c
@@ -214,5 +214,5 @@ static int __init pistachio_clksrc_of_init(struct device_node *node)
 	sched_clock_register(pistachio_read_sched_clock, 32, rate);
 	return clocksource_register_hz(&pcs_gpt.cs, rate);
 }
-CLOCKSOURCE_OF_DECLARE_RET(pistachio_gptimer, "img,pistachio-gptimer",
+CLOCKSOURCE_OF_DECLARE(pistachio_gptimer, "img,pistachio-gptimer",
 		       pistachio_clksrc_of_init);
diff --git a/drivers/clocksource/timer-atlas7.c b/drivers/clocksource/timer-atlas7.c
index 7b1a0071a211..90f8fbc154a4 100644
--- a/drivers/clocksource/timer-atlas7.c
+++ b/drivers/clocksource/timer-atlas7.c
@@ -304,4 +304,4 @@ static int __init sirfsoc_of_timer_init(struct device_node *np)
 
 	return sirfsoc_atlas7_timer_init(np);
 }
-CLOCKSOURCE_OF_DECLARE_RET(sirfsoc_atlas7_timer, "sirf,atlas7-tick", sirfsoc_of_timer_init);
+CLOCKSOURCE_OF_DECLARE(sirfsoc_atlas7_timer, "sirf,atlas7-tick", sirfsoc_of_timer_init);
diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c
index ffaca7c2c996..1ffac0cb0cb7 100644
--- a/drivers/clocksource/timer-atmel-pit.c
+++ b/drivers/clocksource/timer-atmel-pit.c
@@ -270,5 +270,5 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node)
 
 	return at91sam926x_pit_common_init(data);
 }
-CLOCKSOURCE_OF_DECLARE_RET(at91sam926x_pit, "atmel,at91sam9260-pit",
+CLOCKSOURCE_OF_DECLARE(at91sam926x_pit, "atmel,at91sam9260-pit",
 		       at91sam926x_pit_dt_init);
diff --git a/drivers/clocksource/timer-atmel-st.c b/drivers/clocksource/timer-atmel-st.c
index e9331d36965b..e90ab5b63a90 100644
--- a/drivers/clocksource/timer-atmel-st.c
+++ b/drivers/clocksource/timer-atmel-st.c
@@ -260,5 +260,5 @@ static int __init atmel_st_timer_init(struct device_node *node)
 	/* register clocksource */
 	return clocksource_register_hz(&clk32k, sclk_rate);
 }
-CLOCKSOURCE_OF_DECLARE_RET(atmel_st_timer, "atmel,at91rm9200-st",
+CLOCKSOURCE_OF_DECLARE(atmel_st_timer, "atmel,at91rm9200-st",
 		       atmel_st_timer_init);
diff --git a/drivers/clocksource/timer-digicolor.c b/drivers/clocksource/timer-digicolor.c
index b929061ebe56..10318cc99c0e 100644
--- a/drivers/clocksource/timer-digicolor.c
+++ b/drivers/clocksource/timer-digicolor.c
@@ -202,5 +202,5 @@ static int __init digicolor_timer_init(struct device_node *node)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE_RET(conexant_digicolor, "cnxt,cx92755-timer",
+CLOCKSOURCE_OF_DECLARE(conexant_digicolor, "cnxt,cx92755-timer",
 		       digicolor_timer_init);
diff --git a/drivers/clocksource/timer-imx-gpt.c b/drivers/clocksource/timer-imx-gpt.c
index d5640a747078..f595460bfc58 100644
--- a/drivers/clocksource/timer-imx-gpt.c
+++ b/drivers/clocksource/timer-imx-gpt.c
@@ -545,15 +545,15 @@ static int __init imx6dl_timer_init_dt(struct device_node *np)
 	return mxc_timer_init_dt(np, GPT_TYPE_IMX6DL);
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(imx1_timer, "fsl,imx1-gpt", imx1_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE_RET(imx21_timer, "fsl,imx21-gpt", imx21_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE_RET(imx27_timer, "fsl,imx27-gpt", imx21_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE_RET(imx31_timer, "fsl,imx31-gpt", imx31_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE_RET(imx25_timer, "fsl,imx25-gpt", imx31_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE_RET(imx50_timer, "fsl,imx50-gpt", imx31_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE_RET(imx51_timer, "fsl,imx51-gpt", imx31_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE_RET(imx53_timer, "fsl,imx53-gpt", imx31_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE_RET(imx6q_timer, "fsl,imx6q-gpt", imx31_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE_RET(imx6dl_timer, "fsl,imx6dl-gpt", imx6dl_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE_RET(imx6sl_timer, "fsl,imx6sl-gpt", imx6dl_timer_init_dt);
-CLOCKSOURCE_OF_DECLARE_RET(imx6sx_timer, "fsl,imx6sx-gpt", imx6dl_timer_init_dt);
+CLOCKSOURCE_OF_DECLARE(imx1_timer, "fsl,imx1-gpt", imx1_timer_init_dt);
+CLOCKSOURCE_OF_DECLARE(imx21_timer, "fsl,imx21-gpt", imx21_timer_init_dt);
+CLOCKSOURCE_OF_DECLARE(imx27_timer, "fsl,imx27-gpt", imx21_timer_init_dt);
+CLOCKSOURCE_OF_DECLARE(imx31_timer, "fsl,imx31-gpt", imx31_timer_init_dt);
+CLOCKSOURCE_OF_DECLARE(imx25_timer, "fsl,imx25-gpt", imx31_timer_init_dt);
+CLOCKSOURCE_OF_DECLARE(imx50_timer, "fsl,imx50-gpt", imx31_timer_init_dt);
+CLOCKSOURCE_OF_DECLARE(imx51_timer, "fsl,imx51-gpt", imx31_timer_init_dt);
+CLOCKSOURCE_OF_DECLARE(imx53_timer, "fsl,imx53-gpt", imx31_timer_init_dt);
+CLOCKSOURCE_OF_DECLARE(imx6q_timer, "fsl,imx6q-gpt", imx31_timer_init_dt);
+CLOCKSOURCE_OF_DECLARE(imx6dl_timer, "fsl,imx6dl-gpt", imx6dl_timer_init_dt);
+CLOCKSOURCE_OF_DECLARE(imx6sl_timer, "fsl,imx6sl-gpt", imx6dl_timer_init_dt);
+CLOCKSOURCE_OF_DECLARE(imx6sx_timer, "fsl,imx6sx-gpt", imx6dl_timer_init_dt);
diff --git a/drivers/clocksource/timer-integrator-ap.c b/drivers/clocksource/timer-integrator-ap.c
index 675faceab5d6..df6e672afc04 100644
--- a/drivers/clocksource/timer-integrator-ap.c
+++ b/drivers/clocksource/timer-integrator-ap.c
@@ -232,5 +232,5 @@ static int __init integrator_ap_timer_init_of(struct device_node *node)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(integrator_ap_timer, "arm,integrator-timer",
+CLOCKSOURCE_OF_DECLARE(integrator_ap_timer, "arm,integrator-timer",
 		       integrator_ap_timer_init_of);
diff --git a/drivers/clocksource/timer-keystone.c b/drivers/clocksource/timer-keystone.c
index 4199823b2ec9..ab68a47ab3b4 100644
--- a/drivers/clocksource/timer-keystone.c
+++ b/drivers/clocksource/timer-keystone.c
@@ -226,5 +226,5 @@ err:
 	return error;
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(keystone_timer, "ti,keystone-timer",
+CLOCKSOURCE_OF_DECLARE(keystone_timer, "ti,keystone-timer",
 			   keystone_timer_init);
diff --git a/drivers/clocksource/timer-nps.c b/drivers/clocksource/timer-nps.c
index b5c7b2bd77bd..70c149af8ee0 100644
--- a/drivers/clocksource/timer-nps.c
+++ b/drivers/clocksource/timer-nps.c
@@ -96,5 +96,5 @@ static int __init nps_timer_init(struct device_node *node)
 	return nps_setup_clocksource(node, clk);
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(ezchip_nps400_clksrc, "ezchip,nps400-timer",
-			   nps_timer_init);
+CLOCKSOURCE_OF_DECLARE(ezchip_nps400_clksrc, "ezchip,nps400-timer",
+		       nps_timer_init);
diff --git a/drivers/clocksource/timer-oxnas-rps.c b/drivers/clocksource/timer-oxnas-rps.c
index 0d99f40ee757..bd887e2a8cf8 100644
--- a/drivers/clocksource/timer-oxnas-rps.c
+++ b/drivers/clocksource/timer-oxnas-rps.c
@@ -293,5 +293,5 @@ err_alloc:
 	return ret;
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(ox810se_rps,
-			   "oxsemi,ox810se-rps-timer", oxnas_rps_timer_init);
+CLOCKSOURCE_OF_DECLARE(ox810se_rps,
+		       "oxsemi,ox810se-rps-timer", oxnas_rps_timer_init);
diff --git a/drivers/clocksource/timer-prima2.c b/drivers/clocksource/timer-prima2.c
index 7b1084d0b45e..dae8a66301d7 100644
--- a/drivers/clocksource/timer-prima2.c
+++ b/drivers/clocksource/timer-prima2.c
@@ -246,5 +246,5 @@ static int __init sirfsoc_prima2_timer_init(struct device_node *np)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE_RET(sirfsoc_prima2_timer,
+CLOCKSOURCE_OF_DECLARE(sirfsoc_prima2_timer,
 	"sirf,prima2-tick", sirfsoc_prima2_timer_init);
diff --git a/drivers/clocksource/timer-sp804.c b/drivers/clocksource/timer-sp804.c
index 3dc47efc9298..d07863388e05 100644
--- a/drivers/clocksource/timer-sp804.c
+++ b/drivers/clocksource/timer-sp804.c
@@ -287,7 +287,7 @@ err:
 	iounmap(base);
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE_RET(sp804, "arm,sp804", sp804_of_init);
+CLOCKSOURCE_OF_DECLARE(sp804, "arm,sp804", sp804_of_init);
 
 static int __init integrator_cp_of_init(struct device_node *np)
 {
@@ -335,4 +335,4 @@ err:
 	iounmap(base);
 	return ret;
 }
-CLOCKSOURCE_OF_DECLARE_RET(intcp, "arm,integrator-cp-timer", integrator_cp_of_init);
+CLOCKSOURCE_OF_DECLARE(intcp, "arm,integrator-cp-timer", integrator_cp_of_init);
diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c
index d5bf352905c8..1b2574c4fb97 100644
--- a/drivers/clocksource/timer-stm32.c
+++ b/drivers/clocksource/timer-stm32.c
@@ -187,4 +187,4 @@ err_clk_get:
 	return ret;
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(stm32, "st,stm32-timer", stm32_clockevent_init);
+CLOCKSOURCE_OF_DECLARE(stm32, "st,stm32-timer", stm32_clockevent_init);
diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c
index f0a3ffbab431..c184eb84101e 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -346,7 +346,7 @@ static int __init sun5i_timer_init(struct device_node *node)
 
 	return sun5i_setup_clockevent(node, timer_base, clk, irq);
 }
-CLOCKSOURCE_OF_DECLARE_RET(sun5i_a13, "allwinner,sun5i-a13-hstimer",
+CLOCKSOURCE_OF_DECLARE(sun5i_a13, "allwinner,sun5i-a13-hstimer",
 			   sun5i_timer_init);
-CLOCKSOURCE_OF_DECLARE_RET(sun7i_a20, "allwinner,sun7i-a20-hstimer",
+CLOCKSOURCE_OF_DECLARE(sun7i_a20, "allwinner,sun7i-a20-hstimer",
 			   sun5i_timer_init);
diff --git a/drivers/clocksource/timer-ti-32k.c b/drivers/clocksource/timer-ti-32k.c
index e4ad3c6e03f9..92b7e390f6c8 100644
--- a/drivers/clocksource/timer-ti-32k.c
+++ b/drivers/clocksource/timer-ti-32k.c
@@ -124,5 +124,5 @@ static int __init ti_32k_timer_init(struct device_node *np)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE_RET(ti_32k_timer, "ti,omap-counter32k",
+CLOCKSOURCE_OF_DECLARE(ti_32k_timer, "ti,omap-counter32k",
 		ti_32k_timer_init);
diff --git a/drivers/clocksource/timer-u300.c b/drivers/clocksource/timer-u300.c
index a6a0dec41faa..704e40c6f151 100644
--- a/drivers/clocksource/timer-u300.c
+++ b/drivers/clocksource/timer-u300.c
@@ -458,5 +458,5 @@ static int __init u300_timer_init_of(struct device_node *np)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(u300_timer, "stericsson,u300-apptimer",
+CLOCKSOURCE_OF_DECLARE(u300_timer, "stericsson,u300-apptimer",
 		       u300_timer_init_of);
diff --git a/drivers/clocksource/versatile.c b/drivers/clocksource/versatile.c
index 8daeffac300b..220b490a8142 100644
--- a/drivers/clocksource/versatile.c
+++ b/drivers/clocksource/versatile.c
@@ -38,7 +38,7 @@ static int __init versatile_sched_clock_init(struct device_node *node)
 
 	return 0;
 }
-CLOCKSOURCE_OF_DECLARE_RET(vexpress, "arm,vexpress-sysreg",
+CLOCKSOURCE_OF_DECLARE(vexpress, "arm,vexpress-sysreg",
 		       versatile_sched_clock_init);
-CLOCKSOURCE_OF_DECLARE_RET(versatile, "arm,versatile-sysreg",
+CLOCKSOURCE_OF_DECLARE(versatile, "arm,versatile-sysreg",
 		       versatile_sched_clock_init);
diff --git a/drivers/clocksource/vf_pit_timer.c b/drivers/clocksource/vf_pit_timer.c
index ca4dff4d5684..55d8d8402d90 100644
--- a/drivers/clocksource/vf_pit_timer.c
+++ b/drivers/clocksource/vf_pit_timer.c
@@ -201,4 +201,4 @@ static int __init pit_timer_init(struct device_node *np)
 
 	return pit_clockevent_init(clk_rate, irq);
 }
-CLOCKSOURCE_OF_DECLARE_RET(vf610, "fsl,vf610-pit", pit_timer_init);
+CLOCKSOURCE_OF_DECLARE(vf610, "fsl,vf610-pit", pit_timer_init);
diff --git a/drivers/clocksource/vt8500_timer.c b/drivers/clocksource/vt8500_timer.c
index 1bc8707b4f9d..b15069483fbd 100644
--- a/drivers/clocksource/vt8500_timer.c
+++ b/drivers/clocksource/vt8500_timer.c
@@ -165,4 +165,4 @@ static int __init vt8500_timer_init(struct device_node *np)
 	return 0;
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(vt8500, "via,vt8500-timer", vt8500_timer_init);
+CLOCKSOURCE_OF_DECLARE(vt8500, "via,vt8500-timer", vt8500_timer_init);
diff --git a/drivers/clocksource/zevio-timer.c b/drivers/clocksource/zevio-timer.c
index cb4cf056d56d..9a53f5ef6157 100644
--- a/drivers/clocksource/zevio-timer.c
+++ b/drivers/clocksource/zevio-timer.c
@@ -215,4 +215,4 @@ static int __init zevio_timer_init(struct device_node *node)
 	return zevio_timer_add(node);
 }
 
-CLOCKSOURCE_OF_DECLARE_RET(zevio_timer, "lsi,zevio-timer", zevio_timer_init);
+CLOCKSOURCE_OF_DECLARE(zevio_timer, "lsi,zevio-timer", zevio_timer_init);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8c6c626285c0..6a67ab94b553 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -173,7 +173,6 @@
 	*(__##name##_of_table_end)
 
 #define CLKSRC_OF_TABLES()	OF_TABLE(CONFIG_CLKSRC_OF, clksrc)
-#define CLKSRC_RET_OF_TABLES()	OF_TABLE(CONFIG_CLKSRC_OF, clksrc_ret)
 #define IRQCHIP_OF_MATCH_TABLE() OF_TABLE(CONFIG_IRQCHIP, irqchip)
 #define CLK_OF_TABLES()		OF_TABLE(CONFIG_COMMON_CLK, clk)
 #define IOMMU_OF_TABLES()	OF_TABLE(CONFIG_OF_IOMMU, iommu)
@@ -532,7 +531,6 @@
 	CLK_OF_TABLES()							\
 	RESERVEDMEM_OF_TABLES()						\
 	CLKSRC_OF_TABLES()						\
-	CLKSRC_RET_OF_TABLES()						\
 	IOMMU_OF_TABLES()						\
 	CPU_METHOD_OF_TABLES()						\
 	CPUIDLE_METHOD_OF_TABLES()					\
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 15c3839850f4..08398182f56e 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -244,10 +244,7 @@ extern int clocksource_mmio_init(void __iomem *, const char *,
 extern int clocksource_i8253_init(void);
 
 #define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \
-	OF_DECLARE_1(clksrc, name, compat, fn)
-
-#define CLOCKSOURCE_OF_DECLARE_RET(name, compat, fn) \
-	OF_DECLARE_1_RET(clksrc_ret, name, compat, fn)
+	OF_DECLARE_1_RET(clksrc, name, compat, fn)
 
 #ifdef CONFIG_CLKSRC_PROBE
 extern void clocksource_probe(void);
-- 
cgit v1.2.3


From 2ea879a7cf544e05c7cd0bfd569857ec2a8a75a9 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Thu, 2 Jun 2016 18:35:38 +0200
Subject: clocksource/drivers/bcm2835: Add the COMPILE_TEST option

Change the Kconfig option logic to fullfil with the current approach.

A new Kconfig option is added, CONFIG_BCM2835_TIMER and is selected by the
platform. Then the clocksource's Kconfig is changed to make this option
selectable by the user if the COMPILE_TEST option is set. Otherwise, it
is up to the platform's Kconfig to select the timer.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/mach-bcm/Kconfig    | 1 +
 drivers/clocksource/Kconfig  | 7 +++++++
 drivers/clocksource/Makefile | 2 +-
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
index 68ab6412392a..1284ce1d03e0 100644
--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -143,6 +143,7 @@ config ARCH_BCM2835
 	select ARM_TIMER_SP804
 	select HAVE_ARM_ARCH_TIMER if ARCH_MULTI_V7
 	select CLKSRC_OF
+	select BCM2835_TIMER
 	select PINCTRL
 	select PINCTRL_BCM2835
 	help
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index d4b9e04ee1a4..0275b166a598 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -27,6 +27,13 @@ config CLKBLD_I8253
 config CLKSRC_MMIO
 	bool
 
+config BCM2835_TIMER
+	bool "BCM2835 timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	help
+	  Enables the support for the BCM2835 timer driver.
+
 config DIGICOLOR_TIMER
 	bool "Digicolor timer driver" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index bc66981c8cc7..008d8a0da752 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -19,7 +19,7 @@ obj-$(CONFIG_CLKSRC_NOMADIK_MTU)	+= nomadik-mtu.o
 obj-$(CONFIG_CLKSRC_DBX500_PRCMU)	+= clksrc-dbx500-prcmu.o
 obj-$(CONFIG_ARMADA_370_XP_TIMER)	+= time-armada-370-xp.o
 obj-$(CONFIG_ORION_TIMER)	+= time-orion.o
-obj-$(CONFIG_ARCH_BCM2835)	+= bcm2835_timer.o
+obj-$(CONFIG_BCM2835_TIMER)	+= bcm2835_timer.o
 obj-$(CONFIG_ARCH_CLPS711X)	+= clps711x-timer.o
 obj-$(CONFIG_ARCH_ATLAS7)	+= timer-atlas7.o
 obj-$(CONFIG_ARCH_MOXART)	+= moxart_timer.o
-- 
cgit v1.2.3


From 1cad71e35f88bd41b954e9984c7d2a8ce3924db0 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Thu, 2 Jun 2016 19:20:36 +0200
Subject: clocksource/drivers/bcm_kona: Add the COMPILE_TEST option

Change the Kconfig option logic to fullfil with the current approach.

A new Kconfig option is added, CONFIG_BCM_KONA_TIMER and is selected by the
platform. Then the clocksource's Kconfig is changed to make this option
selectable by the user if the COMPILE_TEST option is set. Otherwise, it is
up to the platform's Kconfig to select the timer.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/mach-bcm/Kconfig    | 1 +
 drivers/clocksource/Kconfig  | 7 +++++++
 drivers/clocksource/Makefile | 2 +-
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
index 1284ce1d03e0..4f1709b31822 100644
--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -89,6 +89,7 @@ config ARCH_BCM_MOBILE
 	select HAVE_ARM_ARCH_TIMER
 	select PINCTRL
 	select ARCH_BCM_MOBILE_SMP if SMP
+	select BCM_KONA_TIMER
 	help
 	  This enables support for systems based on Broadcom mobile SoCs.
 
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 01ecd8833ed1..a3cccf568a9f 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -34,6 +34,13 @@ config BCM2835_TIMER
 	help
 	  Enables the support for the BCM2835 timer driver.
 
+config BCM_KONA_TIMER
+	bool "BCM mobile timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	help
+	  Enables the support for the BCM Kona mobile timer driver.
+
 config DIGICOLOR_TIMER
 	bool "Digicolor timer driver" if COMPILE_TEST
 	depends on GENERIC_CLOCKEVENTS
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 008d8a0da752..df99c9279b6f 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -33,7 +33,7 @@ obj-$(CONFIG_MESON6_TIMER)	+= meson6_timer.o
 obj-$(CONFIG_TEGRA_TIMER)	+= tegra20_timer.o
 obj-$(CONFIG_VT8500_TIMER)	+= vt8500_timer.o
 obj-$(CONFIG_ARCH_NSPIRE)	+= zevio-timer.o
-obj-$(CONFIG_ARCH_BCM_MOBILE)	+= bcm_kona_timer.o
+obj-$(CONFIG_BCM_KONA_TIMER)	+= bcm_kona_timer.o
 obj-$(CONFIG_CADENCE_TTC_TIMER)	+= cadence_ttc_timer.o
 obj-$(CONFIG_CLKSRC_EFM32)	+= time-efm32.o
 obj-$(CONFIG_CLKSRC_STM32)	+= timer-stm32.o
-- 
cgit v1.2.3


From ecf0efdc98cd46886fa119492f052ab3eaba9ddf Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Thu, 2 Jun 2016 20:06:54 +0200
Subject: clocksource/drivers/clps_711x: Add the COMPILE_TEST option

Change the Kconfig option logic to fullfil with the current approach.

A new Kconfig option is added, CONFIG_CLPS711X_TIMER and is selected by the
platform. Then the clocksource's Kconfig is changed to make this option
selectable by the user if the COMPILE_TEST option is set. Otherwise, it is
up to the platform's Kconfig to select the timer.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/Kconfig             | 2 +-
 drivers/clocksource/Kconfig  | 7 +++++++
 drivers/clocksource/Makefile | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 90542db1220d..f0636ec94903 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -358,10 +358,10 @@ config ARCH_CLPS711X
 	bool "Cirrus Logic CLPS711x/EP721x/EP731x-based"
 	select ARCH_REQUIRE_GPIOLIB
 	select AUTO_ZRELADDR
-	select CLKSRC_MMIO
 	select COMMON_CLK
 	select CPU_ARM720T
 	select GENERIC_CLOCKEVENTS
+	select CLPS711X_TIMER
 	select MFD_SYSCON
 	select SOC_BUS
 	help
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index a3cccf568a9f..f41eef2fafde 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -155,6 +155,13 @@ config CLKSRC_DBX500_PRCMU
 	help
 	  Use the always on PRCMU Timer as clocksource
 
+config CLPS711X_TIMER
+	bool "Cirrus logic timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	help
+	  Enables support for the Cirrus Logic PS711 timer.
+
 config CLKSRC_DBX500_PRCMU_SCHED_CLOCK
 	bool "Clocksource PRCMU Timer sched_clock"
 	depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK)
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index df99c9279b6f..25e599c4eeb7 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -20,7 +20,7 @@ obj-$(CONFIG_CLKSRC_DBX500_PRCMU)	+= clksrc-dbx500-prcmu.o
 obj-$(CONFIG_ARMADA_370_XP_TIMER)	+= time-armada-370-xp.o
 obj-$(CONFIG_ORION_TIMER)	+= time-orion.o
 obj-$(CONFIG_BCM2835_TIMER)	+= bcm2835_timer.o
-obj-$(CONFIG_ARCH_CLPS711X)	+= clps711x-timer.o
+obj-$(CONFIG_CLPS711X_TIMER)	+= clps711x-timer.o
 obj-$(CONFIG_ARCH_ATLAS7)	+= timer-atlas7.o
 obj-$(CONFIG_ARCH_MOXART)	+= moxart_timer.o
 obj-$(CONFIG_ARCH_MXS)		+= mxs_timer.o
-- 
cgit v1.2.3


From b56d5d218499404b2abfd6f33a6d480312bf8a92 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 3 Jun 2016 13:11:39 +0200
Subject: clocksource/drivers/atlas7: Add the COMPILE_TEST option

Change the Kconfig option logic to fullfil with the current approach.

A new Kconfig option is added, CONFIG_ATLAS7_TIMER and is selected by the
platform. Then the clocksource's Kconfig is changed to make this option
selectable by the user if the COMPILE_TEST option is set. Otherwise, it is
up to the platform's Kconfig to select the timer.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/mach-prima2/Kconfig | 1 +
 drivers/clocksource/Kconfig  | 7 +++++++
 drivers/clocksource/Makefile | 2 +-
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-prima2/Kconfig b/arch/arm/mach-prima2/Kconfig
index 0cf4426183cf..2db8d24ab67a 100644
--- a/arch/arm/mach-prima2/Kconfig
+++ b/arch/arm/mach-prima2/Kconfig
@@ -28,6 +28,7 @@ config ARCH_ATLAS7
 	default y
 	select ARM_GIC
 	select CPU_V7
+	select ATLAS7_TIMER
 	select HAVE_ARM_SCU if SMP
 	select HAVE_SMP
 	help
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index f41eef2fafde..669dd3cd0291 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -162,6 +162,13 @@ config CLPS711X_TIMER
 	help
 	  Enables support for the Cirrus Logic PS711 timer.
 
+config ATLAS7_TIMER
+	bool "Atlas7 timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	help
+	  Enables support for the Atlas7 timer.
+
 config CLKSRC_DBX500_PRCMU_SCHED_CLOCK
 	bool "Clocksource PRCMU Timer sched_clock"
 	depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK)
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 25e599c4eeb7..7abf144ac6cc 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -21,7 +21,7 @@ obj-$(CONFIG_ARMADA_370_XP_TIMER)	+= time-armada-370-xp.o
 obj-$(CONFIG_ORION_TIMER)	+= time-orion.o
 obj-$(CONFIG_BCM2835_TIMER)	+= bcm2835_timer.o
 obj-$(CONFIG_CLPS711X_TIMER)	+= clps711x-timer.o
-obj-$(CONFIG_ARCH_ATLAS7)	+= timer-atlas7.o
+obj-$(CONFIG_ATLAS7_TIMER)	+= timer-atlas7.o
 obj-$(CONFIG_ARCH_MOXART)	+= moxart_timer.o
 obj-$(CONFIG_ARCH_MXS)		+= mxs_timer.o
 obj-$(CONFIG_CLKSRC_PXA)	+= pxa_timer.o
-- 
cgit v1.2.3


From 419be9e36cf2349f15d0b7280ba46be6f9da7a61 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 3 Jun 2016 13:29:03 +0200
Subject: clocksource/drivers/moxart: Add the COMPILE_TEST option

Change the Kconfig option logic to fullfil with the current approach.

A new Kconfig option is added, CONFIG_MOXART_TIMER and is selected by the
platform. Then the clocksource's Kconfig is changed to make this option
selectable by the user if the COMPILE_TEST option is set. Otherwise, it is
up to the platform's Kconfig to select the timer.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/mach-moxart/Kconfig | 2 +-
 drivers/clocksource/Kconfig  | 7 +++++++
 drivers/clocksource/Makefile | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-moxart/Kconfig b/arch/arm/mach-moxart/Kconfig
index 180d9d216719..ddc79cea32d3 100644
--- a/arch/arm/mach-moxart/Kconfig
+++ b/arch/arm/mach-moxart/Kconfig
@@ -3,7 +3,7 @@ menuconfig ARCH_MOXART
 	depends on ARCH_MULTI_V4
 	select CPU_FA526
 	select ARM_DMA_MEM_BUFFERABLE
-	select CLKSRC_MMIO
+	select MOXART_TIMER
 	select GENERIC_IRQ_CHIP
 	select ARCH_REQUIRE_GPIOLIB
 	select PHYLIB if NETDEVICES
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 669dd3cd0291..db2c5ff6957e 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -169,6 +169,13 @@ config ATLAS7_TIMER
 	help
 	  Enables support for the Atlas7 timer.
 
+config MOXART_TIMER
+	bool "Moxart timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	help
+	  Enables support for the Moxart timer.
+
 config CLKSRC_DBX500_PRCMU_SCHED_CLOCK
 	bool "Clocksource PRCMU Timer sched_clock"
 	depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK)
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 7abf144ac6cc..e64e37a5f8a0 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -22,7 +22,7 @@ obj-$(CONFIG_ORION_TIMER)	+= time-orion.o
 obj-$(CONFIG_BCM2835_TIMER)	+= bcm2835_timer.o
 obj-$(CONFIG_CLPS711X_TIMER)	+= clps711x-timer.o
 obj-$(CONFIG_ATLAS7_TIMER)	+= timer-atlas7.o
-obj-$(CONFIG_ARCH_MOXART)	+= moxart_timer.o
+obj-$(CONFIG_MOXART_TIMER)	+= moxart_timer.o
 obj-$(CONFIG_ARCH_MXS)		+= mxs_timer.o
 obj-$(CONFIG_CLKSRC_PXA)	+= pxa_timer.o
 obj-$(CONFIG_ARCH_PRIMA2)	+= timer-prima2.o
-- 
cgit v1.2.3


From d81c50a0360f8a186150b9bb572d5e0514c25ce9 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 3 Jun 2016 13:36:18 +0200
Subject: clocksource/drivers/mxs: Add the COMPILE_TEST option

Change the Kconfig option logic to fullfil with the current approach.

A new Kconfig option is added, CONFIG_MXS_TIMER and is selected by the
platform. Then the clocksource's Kconfig is changed to make this option
selectable by the user if the COMPILE_TEST option is set. Otherwise, it is
up to the platform's Kconfig to select the timer.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/mach-mxs/Kconfig    | 2 +-
 drivers/clocksource/Kconfig  | 8 ++++++++
 drivers/clocksource/Makefile | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-mxs/Kconfig b/arch/arm/mach-mxs/Kconfig
index 84794137b175..68a3a9ec605d 100644
--- a/arch/arm/mach-mxs/Kconfig
+++ b/arch/arm/mach-mxs/Kconfig
@@ -16,7 +16,7 @@ config ARCH_MXS
 	bool "Freescale MXS (i.MX23, i.MX28) support"
 	depends on ARCH_MULTI_V5
 	select ARCH_REQUIRE_GPIOLIB
-	select CLKSRC_MMIO
+	select MXS_TIMER
 	select PINCTRL
 	select SOC_BUS
 	select SOC_IMX23
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index db2c5ff6957e..5d70cdfb8f63 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -176,6 +176,14 @@ config MOXART_TIMER
 	help
 	  Enables support for the Moxart timer.
 
+config MXS_TIMER
+	bool "Mxs timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	select STMP_DEVICE
+	help
+	  Enables support for the Mxs timer.
+
 config CLKSRC_DBX500_PRCMU_SCHED_CLOCK
 	bool "Clocksource PRCMU Timer sched_clock"
 	depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK)
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index e64e37a5f8a0..1360bbab05e0 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -23,7 +23,7 @@ obj-$(CONFIG_BCM2835_TIMER)	+= bcm2835_timer.o
 obj-$(CONFIG_CLPS711X_TIMER)	+= clps711x-timer.o
 obj-$(CONFIG_ATLAS7_TIMER)	+= timer-atlas7.o
 obj-$(CONFIG_MOXART_TIMER)	+= moxart_timer.o
-obj-$(CONFIG_ARCH_MXS)		+= mxs_timer.o
+obj-$(CONFIG_MXS_TIMER)		+= mxs_timer.o
 obj-$(CONFIG_CLKSRC_PXA)	+= pxa_timer.o
 obj-$(CONFIG_ARCH_PRIMA2)	+= timer-prima2.o
 obj-$(CONFIG_ARCH_U300)		+= timer-u300.o
-- 
cgit v1.2.3


From f3550d499576c423db0d902930cf8d848fe09744 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 3 Jun 2016 14:28:38 +0200
Subject: clocksource/drivers/prima2: Add the COMPILE_TEST option

Change the Kconfig option logic to fullfil with the current approach.

A new Kconfig option is added, CONFIG_PRIMA2_TIMER and is selected by the
platform. Then the clocksource's Kconfig is changed to make this option
selectable by the user if the COMPILE_TEST option is set. Otherwise, it is
up to the platform's Kconfig to select the timer.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/mach-prima2/Kconfig | 1 +
 drivers/clocksource/Kconfig  | 7 +++++++
 drivers/clocksource/Makefile | 2 +-
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-prima2/Kconfig b/arch/arm/mach-prima2/Kconfig
index 2db8d24ab67a..9e938f2961cf 100644
--- a/arch/arm/mach-prima2/Kconfig
+++ b/arch/arm/mach-prima2/Kconfig
@@ -39,6 +39,7 @@ config ARCH_PRIMA2
 	default y
 	select SIRF_IRQ
 	select ZONE_DMA
+	select PRIMA2_TIMER
 	help
           Support for CSR SiRFSoC ARM Cortex A9 Platform
 
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 5d70cdfb8f63..c350fbd805d7 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -184,6 +184,13 @@ config MXS_TIMER
 	help
 	  Enables support for the Mxs timer.
 
+config PRIMA2_TIMER
+	bool "Prima2 timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	help
+	  Enables support for the Prima2 timer.
+
 config CLKSRC_DBX500_PRCMU_SCHED_CLOCK
 	bool "Clocksource PRCMU Timer sched_clock"
 	depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK)
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 1360bbab05e0..b419d5da11c7 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -25,7 +25,7 @@ obj-$(CONFIG_ATLAS7_TIMER)	+= timer-atlas7.o
 obj-$(CONFIG_MOXART_TIMER)	+= moxart_timer.o
 obj-$(CONFIG_MXS_TIMER)		+= mxs_timer.o
 obj-$(CONFIG_CLKSRC_PXA)	+= pxa_timer.o
-obj-$(CONFIG_ARCH_PRIMA2)	+= timer-prima2.o
+obj-$(CONFIG_PRIMA2_TIMER)	+= timer-prima2.o
 obj-$(CONFIG_ARCH_U300)		+= timer-u300.o
 obj-$(CONFIG_SUN4I_TIMER)	+= sun4i_timer.o
 obj-$(CONFIG_SUN5I_HSTIMER)	+= timer-sun5i.o
-- 
cgit v1.2.3


From 85f98db4adbbd3ec6b41537d31241b59bf47c66f Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 3 Jun 2016 14:31:16 +0200
Subject: clocksource/drivers/u300: Add the COMPILE_TEST option

Change the Kconfig option logic to fullfil with the current approach.

A new Kconfig option is added, CONFIG_U300_TIMER and is selected by the
platform. Then the clocksource's Kconfig is changed to make this option
selectable by the user if the COMPILE_TEST option is set. Otherwise, it is
up to the platform's Kconfig to select the timer.

Due on the delay specific code, this driver will compile only on the ARM
architecture.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/mach-u300/Kconfig   | 2 +-
 drivers/clocksource/Kconfig  | 8 ++++++++
 drivers/clocksource/Makefile | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-u300/Kconfig b/arch/arm/mach-u300/Kconfig
index 301a98498453..4fdc3425ffbd 100644
--- a/arch/arm/mach-u300/Kconfig
+++ b/arch/arm/mach-u300/Kconfig
@@ -4,7 +4,7 @@ menuconfig ARCH_U300
 	select ARCH_REQUIRE_GPIOLIB
 	select ARM_AMBA
 	select ARM_VIC
-	select CLKSRC_MMIO
+	select U300_TIMER
 	select CPU_ARM926T
 	select HAVE_TCM
 	select PINCTRL
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index c350fbd805d7..d425f80b1e42 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -191,6 +191,14 @@ config PRIMA2_TIMER
 	help
 	  Enables support for the Prima2 timer.
 
+config U300_TIMER
+	bool "U300 timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	depends on ARM
+	select CLKSRC_MMIO
+	help
+	  Enables support for the U300 timer.
+
 config CLKSRC_DBX500_PRCMU_SCHED_CLOCK
 	bool "Clocksource PRCMU Timer sched_clock"
 	depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK)
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index b419d5da11c7..adbc3a8082d0 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -26,7 +26,7 @@ obj-$(CONFIG_MOXART_TIMER)	+= moxart_timer.o
 obj-$(CONFIG_MXS_TIMER)		+= mxs_timer.o
 obj-$(CONFIG_CLKSRC_PXA)	+= pxa_timer.o
 obj-$(CONFIG_PRIMA2_TIMER)	+= timer-prima2.o
-obj-$(CONFIG_ARCH_U300)		+= timer-u300.o
+obj-$(CONFIG_U300_TIMER)	+= timer-u300.o
 obj-$(CONFIG_SUN4I_TIMER)	+= sun4i_timer.o
 obj-$(CONFIG_SUN5I_HSTIMER)	+= timer-sun5i.o
 obj-$(CONFIG_MESON6_TIMER)	+= meson6_timer.o
-- 
cgit v1.2.3


From d683b9dcc8a8d743f7b660ff3b77ccfbe652e4b9 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 3 Jun 2016 15:03:21 +0200
Subject: clocksource/drivers/nspire: Add the COMPILE_TEST option

Change the Kconfig option logic to fullfil with the current approach.

A new Kconfig option is added, CONFIG_NSPIRE_TIMER and is selected by the
platform. Then the clocksource's Kconfig is changed to make this option
selectable by the user if the COMPILE_TEST option is set. Otherwise, it is
up to the platform's Kconfig to select the timer.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/mach-nspire/Kconfig | 1 +
 drivers/clocksource/Kconfig  | 7 +++++++
 drivers/clocksource/Makefile | 2 +-
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-nspire/Kconfig b/arch/arm/mach-nspire/Kconfig
index bc41f26c1a12..d4985305cab2 100644
--- a/arch/arm/mach-nspire/Kconfig
+++ b/arch/arm/mach-nspire/Kconfig
@@ -7,5 +7,6 @@ config ARCH_NSPIRE
 	select ARM_AMBA
 	select ARM_VIC
 	select ARM_TIMER_SP804
+	select NSPIRE_TIMER
 	help
 	  This enables support for systems using the TI-NSPIRE CPU
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index d425f80b1e42..0cfc4bf1c8ac 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -199,6 +199,13 @@ config U300_TIMER
 	help
 	  Enables support for the U300 timer.
 
+config NSPIRE_TIMER
+	bool "NSpire timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	help
+	  Enables support for the Nspire timer.
+
 config CLKSRC_DBX500_PRCMU_SCHED_CLOCK
 	bool "Clocksource PRCMU Timer sched_clock"
 	depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK)
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index adbc3a8082d0..d888c986cc43 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -32,7 +32,7 @@ obj-$(CONFIG_SUN5I_HSTIMER)	+= timer-sun5i.o
 obj-$(CONFIG_MESON6_TIMER)	+= meson6_timer.o
 obj-$(CONFIG_TEGRA_TIMER)	+= tegra20_timer.o
 obj-$(CONFIG_VT8500_TIMER)	+= vt8500_timer.o
-obj-$(CONFIG_ARCH_NSPIRE)	+= zevio-timer.o
+obj-$(CONFIG_NSPIRE_TIMER)	+= zevio-timer.o
 obj-$(CONFIG_BCM_KONA_TIMER)	+= bcm_kona_timer.o
 obj-$(CONFIG_CADENCE_TTC_TIMER)	+= cadence_ttc_timer.o
 obj-$(CONFIG_CLKSRC_EFM32)	+= time-efm32.o
-- 
cgit v1.2.3


From c12547a00dfd3aaacfd5ce362ee4b9585c320054 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 3 Jun 2016 15:05:05 +0200
Subject: clocksource/drivers/keystone: Add the COMPILE_TEST option

Change the Kconfig option logic to fullfil with the current approach.

A new Kconfig option is added, CONFIG_KEYSTONE_TIMER and is selected by the
platform. Then the clocksource's Kconfig is changed to make this option
selectable by the user if the COMPILE_TEST option is set. Otherwise, it is
up to the platform's Kconfig to select the timer.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/mach-keystone/Kconfig | 2 +-
 drivers/clocksource/Kconfig    | 8 ++++++++
 drivers/clocksource/Makefile   | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-keystone/Kconfig b/arch/arm/mach-keystone/Kconfig
index ea955f6db8b7..bac577badc7e 100644
--- a/arch/arm/mach-keystone/Kconfig
+++ b/arch/arm/mach-keystone/Kconfig
@@ -4,7 +4,7 @@ config ARCH_KEYSTONE
 	depends on ARM_PATCH_PHYS_VIRT
 	select ARM_GIC
 	select HAVE_ARM_ARCH_TIMER
-	select CLKSRC_MMIO
+	select KEYSTONE_TIMER
 	select ARM_ERRATA_798181 if SMP
 	select COMMON_CLK_KEYSTONE
 	select ARCH_SUPPORTS_BIG_ENDIAN
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 0cfc4bf1c8ac..7e5709a1e4f0 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -206,6 +206,14 @@ config NSPIRE_TIMER
 	help
 	  Enables support for the Nspire timer.
 
+config KEYSTONE_TIMER
+	bool "Keystone timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	depends on ARM || ARM64
+	select CLKSRC_MMIO
+	help
+	  Enables support for the Keystone timer.
+
 config CLKSRC_DBX500_PRCMU_SCHED_CLOCK
 	bool "Clocksource PRCMU Timer sched_clock"
 	depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK)
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index d888c986cc43..a8184319c41f 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -56,7 +56,7 @@ obj-$(CONFIG_ARMV7M_SYSTICK)		+= armv7m_systick.o
 obj-$(CONFIG_ARM_TIMER_SP804)		+= timer-sp804.o
 obj-$(CONFIG_CLKSRC_METAG_GENERIC)	+= metag_generic.o
 obj-$(CONFIG_ARCH_HAS_TICK_BROADCAST)	+= dummy_timer.o
-obj-$(CONFIG_ARCH_KEYSTONE)		+= timer-keystone.o
+obj-$(CONFIG_KEYSTONE_TIMER)		+= timer-keystone.o
 obj-$(CONFIG_ARCH_INTEGRATOR_AP)	+= timer-integrator-ap.o
 obj-$(CONFIG_CLKSRC_VERSATILE)		+= versatile.o
 obj-$(CONFIG_CLKSRC_MIPS_GIC)		+= mips-gic-timer.o
-- 
cgit v1.2.3


From 568c0342e494fa4c05377c6c83c653afa350985a Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 3 Jun 2016 15:11:21 +0200
Subject: clocksource/drivers/integrator-ap: Add the COMPILE_TEST option

Change the Kconfig option logic to fullfil with the current approach.

A new Kconfig option is added, CONFIG_INTEGRATOR_AP_TIMER and is selected
by the platform. Then the clocksource's Kconfig is changed to make this
option selectable by the user if the COMPILE_TEST option is set. Otherwise,
it is up to the platform's Kconfig to select the timer.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 arch/arm/mach-integrator/Kconfig | 2 +-
 drivers/clocksource/Kconfig      | 7 +++++++
 drivers/clocksource/Makefile     | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-integrator/Kconfig b/arch/arm/mach-integrator/Kconfig
index b2a85ba13f08..291262e5aeaf 100644
--- a/arch/arm/mach-integrator/Kconfig
+++ b/arch/arm/mach-integrator/Kconfig
@@ -20,7 +20,7 @@ if ARCH_INTEGRATOR
 
 config ARCH_INTEGRATOR_AP
 	bool "Support Integrator/AP and Integrator/PP2 platforms"
-	select CLKSRC_MMIO
+	select INTEGRATOR_AP_TIMER
 	select MIGHT_HAVE_PCI
 	select SERIAL_AMBA_PL010 if TTY
 	select SERIAL_AMBA_PL010_CONSOLE if TTY
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 7e5709a1e4f0..8055b37347ff 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -214,6 +214,13 @@ config KEYSTONE_TIMER
 	help
 	  Enables support for the Keystone timer.
 
+config INTEGRATOR_AP_TIMER
+	bool "Integrator-ap timer driver" if COMPILE_TEST
+	depends on GENERIC_CLOCKEVENTS
+	select CLKSRC_MMIO
+	help
+	  Enables support for the Integrator-ap timer.
+
 config CLKSRC_DBX500_PRCMU_SCHED_CLOCK
 	bool "Clocksource PRCMU Timer sched_clock"
 	depends on (CLKSRC_DBX500_PRCMU && !CLKSRC_NOMADIK_MTU_SCHED_CLOCK)
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index a8184319c41f..fd9d6df0bbc0 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -57,7 +57,7 @@ obj-$(CONFIG_ARM_TIMER_SP804)		+= timer-sp804.o
 obj-$(CONFIG_CLKSRC_METAG_GENERIC)	+= metag_generic.o
 obj-$(CONFIG_ARCH_HAS_TICK_BROADCAST)	+= dummy_timer.o
 obj-$(CONFIG_KEYSTONE_TIMER)		+= timer-keystone.o
-obj-$(CONFIG_ARCH_INTEGRATOR_AP)	+= timer-integrator-ap.o
+obj-$(CONFIG_INTEGRATOR_AP_TIMER)	+= timer-integrator-ap.o
 obj-$(CONFIG_CLKSRC_VERSATILE)		+= versatile.o
 obj-$(CONFIG_CLKSRC_MIPS_GIC)		+= mips-gic-timer.o
 obj-$(CONFIG_CLKSRC_TANGO_XTAL)		+= tango_xtal.o
-- 
cgit v1.2.3


From eee25ab19de632af1ab4d2ac50bfc5006802e664 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 28 Jun 2016 22:11:14 +0200
Subject: ARM: dts: sun7i: Fix pll3x2 and pll7x2 not having a parent clock

Fix pll3x2 and pll7x2 not having a parent clock, specifically this
fixes the kernel turning of pll3 while simplefb is using it when
uboot has configured things to use pll3x2 as lcd ch clk parent.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm/boot/dts/sun7i-a20.dtsi | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index f480051c1f8a..2c34bbbb9570 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -232,6 +232,7 @@
 		pll3x2: pll3x2_clk {
 			#clock-cells = <0>;
 			compatible = "fixed-factor-clock";
+			clocks = <&pll3>;
 			clock-div = <1>;
 			clock-mult = <2>;
 			clock-output-names = "pll3-2x";
@@ -273,6 +274,7 @@
 		pll7x2: pll7x2_clk {
 			#clock-cells = <0>;
 			compatible = "fixed-factor-clock";
+			clocks = <&pll7>;
 			clock-div = <1>;
 			clock-mult = <2>;
 			clock-output-names = "pll7-2x";
-- 
cgit v1.2.3


From b44439e42912b9dcc510a0ff891809ea2cadc46b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 20 Jun 2016 18:08:22 +0200
Subject: ARM: mvebu: compile pm code conditionally

A cleanup to include the headers correctly caused another build problem:

arch/arm/mach-mvebu/kirkwood-pm.c:70:13: error: redefinition of 'kirkwood_pm_init'
arch/arm/mach-mvebu/kirkwood-pm.h:23:20: note: previous definition of 'kirkwood_pm_init' was here

The underlying issue is that kirkwood-pm.o is not actually meant to be
used when CONFIG_PM is disabled, so we should also leave it out of the
Makefile.

The same seems to be true for the PM code in MACH_MVEBU_V7, and I'm
treating it the same way here.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: d705c1a66e15 ("ARM: Kirkwood: fix kirkwood_pm_init() declaration/type")
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
---
 arch/arm/mach-mvebu/Makefile | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-mvebu/Makefile b/arch/arm/mach-mvebu/Makefile
index ecf9e0c3b107..e53c6cfcab51 100644
--- a/arch/arm/mach-mvebu/Makefile
+++ b/arch/arm/mach-mvebu/Makefile
@@ -7,9 +7,15 @@ CFLAGS_pmsu.o			:= -march=armv7-a
 obj-$(CONFIG_MACH_MVEBU_ANY)	 += system-controller.o mvebu-soc-id.o
 
 ifeq ($(CONFIG_MACH_MVEBU_V7),y)
-obj-y				 += cpu-reset.o board-v7.o coherency.o coherency_ll.o pmsu.o pmsu_ll.o pm.o pm-board.o
+obj-y				 += cpu-reset.o board-v7.o coherency.o coherency_ll.o pmsu.o pmsu_ll.o
+
+obj-$(CONFIG_PM)		 += pm.o pm-board.o
 obj-$(CONFIG_SMP)		 += platsmp.o headsmp.o platsmp-a9.o headsmp-a9.o
 endif
 
 obj-$(CONFIG_MACH_DOVE)		 += dove.o
-obj-$(CONFIG_MACH_KIRKWOOD)	 += kirkwood.o kirkwood-pm.o
+
+ifeq ($(CONFIG_MACH_KIRKWOOD),y)
+obj-y				 += kirkwood.o
+obj-$(CONFIG_PM)		 += kirkwood-pm.o
+endif
-- 
cgit v1.2.3


From 65c0554b73c920023cc8998802e508b798113b46 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 30 Jun 2016 18:11:41 +0200
Subject: x86/power/64: Fix kernel text mapping corruption during image
 restoration

Logan Gunthorpe reports that hibernation stopped working reliably for
him after commit ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table
and rodata).

That turns out to be a consequence of a long-standing issue with the
64-bit image restoration code on x86, which is that the temporary
page tables set up by it to avoid page tables corruption when the
last bits of the image kernel's memory contents are copied into
their original page frames re-use the boot kernel's text mapping,
but that mapping may very well get corrupted just like any other
part of the page tables.  Of course, if that happens, the final
jump to the image kernel's entry point will go to nowhere.

The exact reason why commit ab76f7b4ab23 matters here is that it
sometimes causes a PMD of a large page to be split into PTEs
that are allocated dynamically and get corrupted during image
restoration as described above.

To fix that issue note that the code copying the last bits of the
image kernel's memory contents to the page frames occupied by them
previoulsy doesn't use the kernel text mapping, because it runs from
a special page covered by the identity mapping set up for that code
from scratch.  Hence, the kernel text mapping is only needed before
that code starts to run and then it will only be used just for the
final jump to the image kernel's entry point.

Accordingly, the temporary page tables set up in swsusp_arch_resume()
on x86-64 need to contain the kernel text mapping too.  That mapping
is only going to be used for the final jump to the image kernel, so
it only needs to cover the image kernel's entry point, because the
first thing the image kernel does after getting control back is to
switch over to its own original page tables.  Moreover, the virtual
address of the image kernel's entry point in that mapping has to be
the same as the one mapped by the image kernel's page tables.

With that in mind, modify the x86-64's arch_hibernation_header_save()
and arch_hibernation_header_restore() routines to pass the physical
address of the image kernel's entry point (in addition to its virtual
address) to the boot kernel (a small piece of assembly code involved
in passing the entry point's virtual address to the image kernel is
not necessary any more after that, so drop it).  Update RESTORE_MAGIC
too to reflect the image header format change.

Next, in set_up_temporary_mappings(), use the physical and virtual
addresses of the image kernel's entry point passed in the image
header to set up a minimum kernel text mapping (using memory pages
that won't be overwritten by the image kernel's memory contents) that
will map those addresses to each other as appropriate.

This makes the concern about the possible corruption of the original
boot kernel text mapping go away and if the the minimum kernel text
mapping used for the final jump marks the image kernel's entry point
memory as executable, the jump to it is guaraneed to succeed.

Fixes: ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table and rodata)
Link: http://marc.info/?l=linux-pm&m=146372852823760&w=2
Reported-by: Logan Gunthorpe <logang@deltatee.com>
Reported-and-tested-by: Borislav Petkov <bp@suse.de>
Tested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/power/hibernate_64.c     | 97 ++++++++++++++++++++++++++++++++++-----
 arch/x86/power/hibernate_asm_64.S | 55 ++++++++++------------
 2 files changed, 109 insertions(+), 43 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 009947d419a6..f2b5e6a5cf95 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -19,6 +19,7 @@
 #include <asm/mtrr.h>
 #include <asm/sections.h>
 #include <asm/suspend.h>
+#include <asm/tlbflush.h>
 
 /* Defined in hibernate_asm_64.S */
 extern asmlinkage __visible int restore_image(void);
@@ -28,6 +29,7 @@ extern asmlinkage __visible int restore_image(void);
  * kernel's text (this value is passed in the image header).
  */
 unsigned long restore_jump_address __visible;
+unsigned long jump_address_phys;
 
 /*
  * Value of the cr3 register from before the hibernation (this value is passed
@@ -37,7 +39,43 @@ unsigned long restore_cr3 __visible;
 
 pgd_t *temp_level4_pgt __visible;
 
-void *relocated_restore_code __visible;
+unsigned long relocated_restore_code __visible;
+
+static int set_up_temporary_text_mapping(void)
+{
+	pmd_t *pmd;
+	pud_t *pud;
+
+	/*
+	 * The new mapping only has to cover the page containing the image
+	 * kernel's entry point (jump_address_phys), because the switch over to
+	 * it is carried out by relocated code running from a page allocated
+	 * specifically for this purpose and covered by the identity mapping, so
+	 * the temporary kernel text mapping is only needed for the final jump.
+	 * Moreover, in that mapping the virtual address of the image kernel's
+	 * entry point must be the same as its virtual address in the image
+	 * kernel (restore_jump_address), so the image kernel's
+	 * restore_registers() code doesn't find itself in a different area of
+	 * the virtual address space after switching over to the original page
+	 * tables used by the image kernel.
+	 */
+	pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+	if (!pud)
+		return -ENOMEM;
+
+	pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
+	if (!pmd)
+		return -ENOMEM;
+
+	set_pmd(pmd + pmd_index(restore_jump_address),
+		__pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
+	set_pud(pud + pud_index(restore_jump_address),
+		__pud(__pa(pmd) | _KERNPG_TABLE));
+	set_pgd(temp_level4_pgt + pgd_index(restore_jump_address),
+		__pgd(__pa(pud) | _KERNPG_TABLE));
+
+	return 0;
+}
 
 static void *alloc_pgt_page(void *context)
 {
@@ -59,9 +97,10 @@ static int set_up_temporary_mappings(void)
 	if (!temp_level4_pgt)
 		return -ENOMEM;
 
-	/* It is safe to reuse the original kernel mapping */
-	set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
-		init_level4_pgt[pgd_index(__START_KERNEL_map)]);
+	/* Prepare a temporary mapping for the kernel text */
+	result = set_up_temporary_text_mapping();
+	if (result)
+		return result;
 
 	/* Set up the direct mapping from scratch */
 	for (i = 0; i < nr_pfn_mapped; i++) {
@@ -78,19 +117,50 @@ static int set_up_temporary_mappings(void)
 	return 0;
 }
 
+static int relocate_restore_code(void)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+
+	relocated_restore_code = get_safe_page(GFP_ATOMIC);
+	if (!relocated_restore_code)
+		return -ENOMEM;
+
+	memcpy((void *)relocated_restore_code, &core_restore_code, PAGE_SIZE);
+
+	/* Make the page containing the relocated code executable */
+	pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code);
+	pud = pud_offset(pgd, relocated_restore_code);
+	if (pud_large(*pud)) {
+		set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
+	} else {
+		pmd_t *pmd = pmd_offset(pud, relocated_restore_code);
+
+		if (pmd_large(*pmd)) {
+			set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
+		} else {
+			pte_t *pte = pte_offset_kernel(pmd, relocated_restore_code);
+
+			set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
+		}
+	}
+	__flush_tlb_all();
+
+	return 0;
+}
+
 int swsusp_arch_resume(void)
 {
 	int error;
 
 	/* We have got enough memory and from now on we cannot recover */
-	if ((error = set_up_temporary_mappings()))
+	error = set_up_temporary_mappings();
+	if (error)
 		return error;
 
-	relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
-	if (!relocated_restore_code)
-		return -ENOMEM;
-	memcpy(relocated_restore_code, &core_restore_code,
-	       &restore_registers - &core_restore_code);
+	error = relocate_restore_code();
+	if (error)
+		return error;
 
 	restore_image();
 	return 0;
@@ -109,11 +179,12 @@ int pfn_is_nosave(unsigned long pfn)
 
 struct restore_data_record {
 	unsigned long jump_address;
+	unsigned long jump_address_phys;
 	unsigned long cr3;
 	unsigned long magic;
 };
 
-#define RESTORE_MAGIC	0x0123456789ABCDEFUL
+#define RESTORE_MAGIC	0x123456789ABCDEF0UL
 
 /**
  *	arch_hibernation_header_save - populate the architecture specific part
@@ -126,7 +197,8 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
 
 	if (max_size < sizeof(struct restore_data_record))
 		return -EOVERFLOW;
-	rdr->jump_address = restore_jump_address;
+	rdr->jump_address = (unsigned long)&restore_registers;
+	rdr->jump_address_phys = __pa_symbol(&restore_registers);
 	rdr->cr3 = restore_cr3;
 	rdr->magic = RESTORE_MAGIC;
 	return 0;
@@ -142,6 +214,7 @@ int arch_hibernation_header_restore(void *addr)
 	struct restore_data_record *rdr = addr;
 
 	restore_jump_address = rdr->jump_address;
+	jump_address_phys = rdr->jump_address_phys;
 	restore_cr3 = rdr->cr3;
 	return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
 }
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index 4400a43b9e28..3177c2bc26f6 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -44,9 +44,6 @@ ENTRY(swsusp_arch_suspend)
 	pushfq
 	popq	pt_regs_flags(%rax)
 
-	/* save the address of restore_registers */
-	movq	$restore_registers, %rax
-	movq	%rax, restore_jump_address(%rip)
 	/* save cr3 */
 	movq	%cr3, %rax
 	movq	%rax, restore_cr3(%rip)
@@ -57,31 +54,34 @@ ENTRY(swsusp_arch_suspend)
 ENDPROC(swsusp_arch_suspend)
 
 ENTRY(restore_image)
-	/* switch to temporary page tables */
-	movq	$__PAGE_OFFSET, %rdx
-	movq	temp_level4_pgt(%rip), %rax
-	subq	%rdx, %rax
-	movq	%rax, %cr3
-	/* Flush TLB */
-	movq	mmu_cr4_features(%rip), %rax
-	movq	%rax, %rdx
-	andq	$~(X86_CR4_PGE), %rdx
-	movq	%rdx, %cr4;  # turn off PGE
-	movq	%cr3, %rcx;  # flush TLB
-	movq	%rcx, %cr3;
-	movq	%rax, %cr4;  # turn PGE back on
-
 	/* prepare to jump to the image kernel */
-	movq	restore_jump_address(%rip), %rax
-	movq	restore_cr3(%rip), %rbx
+	movq	restore_jump_address(%rip), %r8
+	movq	restore_cr3(%rip), %r9
+
+	/* prepare to switch to temporary page tables */
+	movq	temp_level4_pgt(%rip), %rax
+	movq	mmu_cr4_features(%rip), %rbx
 
 	/* prepare to copy image data to their original locations */
 	movq	restore_pblist(%rip), %rdx
+
+	/* jump to relocated restore code */
 	movq	relocated_restore_code(%rip), %rcx
 	jmpq	*%rcx
 
 	/* code below has been relocated to a safe page */
 ENTRY(core_restore_code)
+	/* switch to temporary page tables */
+	movq	$__PAGE_OFFSET, %rcx
+	subq	%rcx, %rax
+	movq	%rax, %cr3
+	/* flush TLB */
+	movq	%rbx, %rcx
+	andq	$~(X86_CR4_PGE), %rcx
+	movq	%rcx, %cr4;  # turn off PGE
+	movq	%cr3, %rcx;  # flush TLB
+	movq	%rcx, %cr3;
+	movq	%rbx, %cr4;  # turn PGE back on
 .Lloop:
 	testq	%rdx, %rdx
 	jz	.Ldone
@@ -96,24 +96,17 @@ ENTRY(core_restore_code)
 	/* progress to the next pbe */
 	movq	pbe_next(%rdx), %rdx
 	jmp	.Lloop
+
 .Ldone:
 	/* jump to the restore_registers address from the image header */
-	jmpq	*%rax
-	/*
-	 * NOTE: This assumes that the boot kernel's text mapping covers the
-	 * image kernel's page containing restore_registers and the address of
-	 * this page is the same as in the image kernel's text mapping (it
-	 * should always be true, because the text mapping is linear, starting
-	 * from 0, and is supposed to cover the entire kernel text for every
-	 * kernel).
-	 *
-	 * code below belongs to the image kernel
-	 */
+	jmpq	*%r8
 
+	 /* code below belongs to the image kernel */
+	.align PAGE_SIZE
 ENTRY(restore_registers)
 	FRAME_BEGIN
 	/* go back to the original page tables */
-	movq    %rbx, %cr3
+	movq    %r9, %cr3
 
 	/* Flush TLB, including "global" things (vmalloc) */
 	movq	mmu_cr4_features(%rip), %rax
-- 
cgit v1.2.3


From 1ead852dd88779eda12cb09cc894a03d9abfe1ec Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 16 Jun 2016 19:13:49 +0200
Subject: x86/amd_nb: Fix boot crash on non-AMD systems

Fix boot crash that triggers if this driver is built into a kernel and
run on non-AMD systems.

AMD northbridges users call amd_cache_northbridges() and it returns
a negative value to signal that we weren't able to cache/detect any
northbridges on the system.

At least, it should do so as all its callers expect it to do so. But it
does return a negative value only when kmalloc() fails.

Fix it to return -ENODEV if there are no NBs cached as otherwise, amd_nb
users like amd64_edac, for example, which relies on it to know whether
it should load or not, gets loaded on systems like Intel Xeons where it
shouldn't.

Reported-and-tested-by: Tony Battersby <tonyb@cybernetics.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1466097230-5333-2-git-send-email-bp@alien8.de
Link: https://lkml.kernel.org/r/5761BEB0.9000807@cybernetics.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/amd_nb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index a147e676fc7b..e991d5c8bb3a 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -71,8 +71,8 @@ int amd_cache_northbridges(void)
 	while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
 		i++;
 
-	if (i == 0)
-		return 0;
+	if (!i)
+		return -ENODEV;
 
 	nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
 	if (!nb)
-- 
cgit v1.2.3


From 0519e8b4cb2bda598f941088948129f9fe9e6acd Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 29 Jun 2016 22:01:34 +0300
Subject: x86/platform/intel-mid: Add pinctrl for Intel Merrifield

Intel Merrifield uses a special address space reserved for Family-Level
Interface Shim (FLIS) that allows consumers to mux and configure pins.

Create a platform device for it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1467226894-107109-1-git-send-email-andriy.shevchenko@linux.intel.com
[ Fixed typo. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/intel-mid/device_libs/Makefile   |  2 +
 .../intel-mid/device_libs/platform_mrfld_pinctrl.c | 43 ++++++++++++++++++++++
 2 files changed, 45 insertions(+)
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c

(limited to 'arch')

diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index abe8ba87c970..79e97ed5be5b 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -1,3 +1,5 @@
+# Family-Level Interface Shim (FLIS)
+obj-$(subst m,y,$(CONFIG_PINCTRL_MERRIFIELD)) += platform_mrfld_pinctrl.o
 # IPC Devices
 obj-y += platform_ipc.o
 obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c
new file mode 100644
index 000000000000..4de8a664e6a1
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c
@@ -0,0 +1,43 @@
+/*
+ * Intel Merrifield FLIS platform device initialization file
+ *
+ * Copyright (C) 2016, Intel Corporation
+ *
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+
+#include <asm/intel-mid.h>
+
+#define FLIS_BASE_ADDR			0xff0c0000
+#define FLIS_LENGTH			0x8000
+
+static struct resource mrfld_pinctrl_mmio_resource = {
+	.start		= FLIS_BASE_ADDR,
+	.end		= FLIS_BASE_ADDR + FLIS_LENGTH - 1,
+	.flags		= IORESOURCE_MEM,
+};
+
+static struct platform_device mrfld_pinctrl_device = {
+	.name		= "pinctrl-merrifield",
+	.id		= PLATFORM_DEVID_NONE,
+	.resource	= &mrfld_pinctrl_mmio_resource,
+	.num_resources	= 1,
+};
+
+static int __init mrfld_pinctrl_init(void)
+{
+	if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER)
+		return platform_device_register(&mrfld_pinctrl_device);
+
+	return -ENODEV;
+}
+arch_initcall(mrfld_pinctrl_init);
-- 
cgit v1.2.3


From 487cf917ed0d12afaf403d9d77684bf44b8c13be Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Wed, 29 Jun 2016 04:27:36 -0400
Subject: Revert "ACPI, PCI, IRQ: remove redundant code in
 acpi_irq_penalty_init()"

Trying to make the ISA and PCI init functionality common turned out
to be a bad idea, because the ISA path depends on external
functionality.

Restore the previous behavior and limit the refactoring to PCI
interrupts only.

Fixes: 1fcb6a813c4f "ACPI,PCI,IRQ: remove redundant code in acpi_irq_penalty_init()"
Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Tested-by: Wim Osterholt <wim@djo.tudelft.nl>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/pci/acpi.c         |  1 +
 drivers/acpi/pci_link.c     | 36 ++++++++++++++++++++++++++++++++++++
 include/acpi/acpi_drivers.h |  1 +
 3 files changed, 38 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index b2a4e2a61f6b..3cd69832d7f4 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -396,6 +396,7 @@ int __init pci_acpi_init(void)
 		return -ENODEV;
 
 	printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
+	acpi_irq_penalty_init();
 	pcibios_enable_irq = acpi_pci_irq_enable;
 	pcibios_disable_irq = acpi_pci_irq_disable;
 	x86_init.pci.init_irq = x86_init_noop;
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index db7be62a8222..606083bb3f00 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -517,6 +517,42 @@ static int acpi_irq_get_penalty(int irq)
 	return penalty;
 }
 
+int __init acpi_irq_penalty_init(void)
+{
+	struct acpi_pci_link *link;
+	int i;
+
+	/*
+	 * Update penalties to facilitate IRQ balancing.
+	 */
+	list_for_each_entry(link, &acpi_link_list, list) {
+
+		/*
+		 * reflect the possible and active irqs in the penalty table --
+		 * useful for breaking ties.
+		 */
+		if (link->irq.possible_count) {
+			int penalty =
+			    PIRQ_PENALTY_PCI_POSSIBLE /
+			    link->irq.possible_count;
+
+			for (i = 0; i < link->irq.possible_count; i++) {
+				if (link->irq.possible[i] < ACPI_MAX_ISA_IRQS)
+					acpi_isa_irq_penalty[link->irq.
+							 possible[i]] +=
+					    penalty;
+			}
+
+		} else if (link->irq.active &&
+				(link->irq.active < ACPI_MAX_ISA_IRQS)) {
+			acpi_isa_irq_penalty[link->irq.active] +=
+			    PIRQ_PENALTY_PCI_POSSIBLE;
+		}
+	}
+
+	return 0;
+}
+
 static int acpi_irq_balance = -1;	/* 0: static, 1: balance */
 
 static int acpi_pci_link_allocate(struct acpi_pci_link *link)
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 797ae2ec8eee..29c691265b49 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -78,6 +78,7 @@
 
 /* ACPI PCI Interrupt Link (pci_link.c) */
 
+int acpi_irq_penalty_init(void);
 int acpi_pci_link_allocate_irq(acpi_handle handle, int index, int *triggering,
 			       int *polarity, char **name);
 int acpi_pci_link_free_irq(acpi_handle handle);
-- 
cgit v1.2.3


From 9010ae4a8dee29e5886e86682799dde0eee7f447 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Fri, 1 Jul 2016 15:22:22 -0700
Subject: perf/x86/intel: Update event constraints when HT is off

This patch updates the event constraints for non-PEBS mode for
Intel Broadwell and Skylake processors. When HT is off, each
CPU gets 8 generic counters. However, not all events can be
programmed on any of the 8 counters.  This patch adds the
constraints for the MEM_* events which can only be measured on the
bottom 4 counters. The constraints are also valid when HT is off
because, then, there are only 4 generic counters and they are the
bottom counters.

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: kan.liang@intel.com
Link: http://lkml.kernel.org/r/1467411742-13245-1-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7c666958a625..9b4f9d3ce465 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -115,6 +115,10 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
 	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
 	INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
 
+	/*
+	 * When HT is off these events can only run on the bottom 4 counters
+	 * When HT is on, they are impacted by the HT bug and require EXCL access
+	 */
 	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
 	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
 	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -139,6 +143,10 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
 	INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
 	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
 
+	/*
+	 * When HT is off these events can only run on the bottom 4 counters
+	 * When HT is on, they are impacted by the HT bug and require EXCL access
+	 */
 	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
 	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
 	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -182,6 +190,16 @@ struct event_constraint intel_skl_event_constraints[] = {
 	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
 	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
 	INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2),	/* INST_RETIRED.PREC_DIST */
+
+	/*
+	 * when HT is off, these can only run on the bottom 4 counters
+	 */
+	INTEL_EVENT_CONSTRAINT(0xd0, 0xf),	/* MEM_INST_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xd1, 0xf),	/* MEM_LOAD_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xd2, 0xf),	/* MEM_LOAD_L3_HIT_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xcd, 0xf),	/* MEM_TRANS_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xc6, 0xf),	/* FRONTEND_RETIRED.* */
+
 	EVENT_CONSTRAINT_END
 };
 
@@ -250,6 +268,10 @@ static struct event_constraint intel_hsw_event_constraints[] = {
 	/* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
 	INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
 
+	/*
+	 * When HT is off these events can only run on the bottom 4 counters
+	 * When HT is on, they are impacted by the HT bug and require EXCL access
+	 */
 	INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
 	INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
 	INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@ -264,6 +286,13 @@ struct event_constraint intel_bdw_event_constraints[] = {
 	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
 	INTEL_UEVENT_CONSTRAINT(0x148, 0x4),	/* L1D_PEND_MISS.PENDING */
 	INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4),	/* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
+	/*
+	 * when HT is off, these can only run on the bottom 4 counters
+	 */
+	INTEL_EVENT_CONSTRAINT(0xd0, 0xf),	/* MEM_INST_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xd1, 0xf),	/* MEM_LOAD_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xd2, 0xf),	/* MEM_LOAD_L3_HIT_RETIRED.* */
+	INTEL_EVENT_CONSTRAINT(0xcd, 0xf),	/* MEM_TRANS_RETIRED.* */
 	EVENT_CONSTRAINT_END
 };
 
-- 
cgit v1.2.3


From fc18822510721fe694d273c5211c71ea52796d76 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Fri, 1 Jul 2016 23:02:05 -0500
Subject: perf/x86: Fix 32-bit perf user callgraph collection

A basic perf callgraph record operation causes an immediate panic on a
32-bit kernel compiled with CONFIG_CC_STACKPROTECTOR=y:

  $ perf record -g ls
  Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: c0404fbd

  CPU: 0 PID: 998 Comm: ls Not tainted 4.7.0-rc5+ #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.1-1.fc24 04/01/2014
   c0dd5967 ff7afe1c 00000086 f41dbc2c c07445a0 464c457f f41dbca8 f41dbc44
   c05646f4 f41dbca8 464c457f f41dbca8 464c457f f41dbc54 c04625be c0ce56fc
   c0404fbd f41dbc88 c0404fbd b74668f0 f41dc000 00000000 c0000000 00000000
  Call Trace:
   [<c07445a0>] dump_stack+0x58/0x78
   [<c05646f4>] panic+0x8e/0x1c6
   [<c04625be>] __stack_chk_fail+0x1e/0x30
   [<c0404fbd>] ? perf_callchain_user+0x22d/0x230
   [<c0404fbd>] perf_callchain_user+0x22d/0x230
   [<c055f89f>] get_perf_callchain+0x1ff/0x270
   [<c055f988>] perf_callchain+0x78/0x90
   [<c055c7eb>] perf_prepare_sample+0x24b/0x370
   [<c055c934>] perf_event_output_forward+0x24/0x70
   [<c05531c0>] __perf_event_overflow+0xa0/0x210
   [<c0550a93>] ? cpu_clock_event_read+0x43/0x50
   [<c0553431>] perf_swevent_hrtimer+0x101/0x180
   [<c0456235>] ? kmap_atomic_prot+0x35/0x140
   [<c056dc69>] ? get_page_from_freelist+0x279/0x950
   [<c058fdd8>] ? vma_interval_tree_remove+0x158/0x230
   [<c05939f4>] ? wp_page_copy.isra.82+0x2f4/0x630
   [<c05a050d>] ? page_add_file_rmap+0x1d/0x50
   [<c0565611>] ? unlock_page+0x61/0x80
   [<c0566755>] ? filemap_map_pages+0x305/0x320
   [<c059769f>] ? handle_mm_fault+0xb7f/0x1560
   [<c074cbeb>] ? timerqueue_del+0x1b/0x70
   [<c04cfefe>] ? __remove_hrtimer+0x2e/0x60
   [<c04d017b>] __hrtimer_run_queues+0xcb/0x2a0
   [<c0553330>] ? __perf_event_overflow+0x210/0x210
   [<c04d0a2a>] hrtimer_interrupt+0x8a/0x180
   [<c043ecc2>] local_apic_timer_interrupt+0x32/0x60
   [<c043f643>] smp_apic_timer_interrupt+0x33/0x50
   [<c0b0cd38>] apic_timer_interrupt+0x34/0x3c
  Kernel Offset: disabled
  ---[ end Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: c0404fbd

The panic is caused by the fact that perf_callchain_user() mistakenly
assumes it's 64-bit only and ends up corrupting the stack.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: stable@vger.kernel.org # v4.5+
Fixes: 75925e1ad7f5 ("perf/x86: Optimize stack walk user accesses")
Link: http://lkml.kernel.org/r/1a547f5077ec30f75f9b57074837c3c80df86e5e.1467432113.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 33787ee817f0..26ced536005a 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2319,7 +2319,7 @@ void
 perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
 {
 	struct stack_frame frame;
-	const void __user *fp;
+	const unsigned long __user *fp;
 
 	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 		/* TODO: We don't support guest os callchain now */
@@ -2332,7 +2332,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
 	if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
 		return;
 
-	fp = (void __user *)regs->bp;
+	fp = (unsigned long __user *)regs->bp;
 
 	perf_callchain_store(entry, regs->ip);
 
@@ -2345,16 +2345,17 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
 	pagefault_disable();
 	while (entry->nr < entry->max_stack) {
 		unsigned long bytes;
+
 		frame.next_frame	     = NULL;
 		frame.return_address = 0;
 
-		if (!access_ok(VERIFY_READ, fp, 16))
+		if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2))
 			break;
 
-		bytes = __copy_from_user_nmi(&frame.next_frame, fp, 8);
+		bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp));
 		if (bytes != 0)
 			break;
-		bytes = __copy_from_user_nmi(&frame.return_address, fp+8, 8);
+		bytes = __copy_from_user_nmi(&frame.return_address, fp + 1, sizeof(*fp));
 		if (bytes != 0)
 			break;
 
-- 
cgit v1.2.3


From 86a8280a7fe007d61b05fa8a352edc0595283dad Mon Sep 17 00:00:00 2001
From: Andrea Gelmini <andrea.gelmini@gelma.net>
Date: Sat, 21 May 2016 13:57:20 +0200
Subject: m68k: Assorted spelling fixes

  - s/acccess/access/
  - s/accoding/according/
  - s/addad/added/
  - s/addreess/address/
  - s/allocatiom/allocation/
  - s/Assember/Assembler/
  - s/compactnes/compactness/
  - s/conneced/connected/
  - s/decending/descending/
  - s/diectly/directly/
  - s/diplacement/displacement/

Signed-off-by: Andrea Gelmini <andrea.gelmini@gelma.net>
[geert: Squashed, fix arch/m68k/ifpsp060/src/pfpsp.S]
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/coldfire/head.S          | 2 +-
 arch/m68k/coldfire/m5272.c         | 2 +-
 arch/m68k/coldfire/pci.c           | 2 +-
 arch/m68k/ifpsp060/src/fpsp.S      | 8 ++++----
 arch/m68k/ifpsp060/src/pfpsp.S     | 4 ++--
 arch/m68k/include/asm/dma.h        | 2 +-
 arch/m68k/include/asm/m525xsim.h   | 4 ++--
 arch/m68k/include/asm/mcfmmu.h     | 2 +-
 arch/m68k/include/asm/q40_master.h | 2 +-
 arch/m68k/mac/iop.c                | 2 +-
 arch/m68k/math-emu/fp_decode.h     | 2 +-
 11 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/m68k/coldfire/head.S b/arch/m68k/coldfire/head.S
index fa31be297b85..73d92ea0ce65 100644
--- a/arch/m68k/coldfire/head.S
+++ b/arch/m68k/coldfire/head.S
@@ -288,7 +288,7 @@ _clear_bss:
 #endif
 
 	/*
-	 *	Assember start up done, start code proper.
+	 *	Assembler start up done, start code proper.
 	 */
 	jsr	start_kernel			/* start Linux kernel */
 
diff --git a/arch/m68k/coldfire/m5272.c b/arch/m68k/coldfire/m5272.c
index c525e4c08f84..9abb1a441da0 100644
--- a/arch/m68k/coldfire/m5272.c
+++ b/arch/m68k/coldfire/m5272.c
@@ -111,7 +111,7 @@ void __init config_BSP(char *commandp, int size)
 /***************************************************************************/
 
 /*
- * Some 5272 based boards have the FEC ethernet diectly connected to
+ * Some 5272 based boards have the FEC ethernet directly connected to
  * an ethernet switch. In this case we need to use the fixed phy type,
  * and we need to declare it early in boot.
  */
diff --git a/arch/m68k/coldfire/pci.c b/arch/m68k/coldfire/pci.c
index 821de928dc3f..6a640be48568 100644
--- a/arch/m68k/coldfire/pci.c
+++ b/arch/m68k/coldfire/pci.c
@@ -42,7 +42,7 @@ static unsigned long iospace;
 
 /*
  * We need to be carefull probing on bus 0 (directly connected to host
- * bridge). We should only acccess the well defined possible devices in
+ * bridge). We should only access the well defined possible devices in
  * use, ignore aliases and the like.
  */
 static unsigned char mcf_host_slot2sid[32] = {
diff --git a/arch/m68k/ifpsp060/src/fpsp.S b/arch/m68k/ifpsp060/src/fpsp.S
index 78cb60f5bb4d..9bbffebe3eb5 100644
--- a/arch/m68k/ifpsp060/src/fpsp.S
+++ b/arch/m68k/ifpsp060/src/fpsp.S
@@ -10191,7 +10191,7 @@ xdnrm_con:
 xdnrm_sd:
 	mov.l		%a1,-(%sp)
 	tst.b		LOCAL_EX(%a0)		# is denorm pos or neg?
-	smi.b		%d1			# set d0 accodingly
+	smi.b		%d1			# set d0 accordingly
 	bsr.l		unf_sub
 	mov.l		(%sp)+,%a1
 xdnrm_exit:
@@ -10990,7 +10990,7 @@ src_qnan_m:
 # routines where an instruction is selected by an index into
 # a large jump table corresponding to a given instruction which
 # has been decoded. Flow continues here where we now decode
-# further accoding to the source operand type.
+# further according to the source operand type.
 #
 
 	global		fsinh
@@ -23196,14 +23196,14 @@ m_sign:
 #
 #  1. Branch on the sign of the adjusted exponent.
 #  2p.(positive exp)
-#   2. Check M16 and the digits in lwords 2 and 3 in decending order.
+#   2. Check M16 and the digits in lwords 2 and 3 in descending order.
 #   3. Add one for each zero encountered until a non-zero digit.
 #   4. Subtract the count from the exp.
 #   5. Check if the exp has crossed zero in #3 above; make the exp abs
 #	   and set SE.
 #	6. Multiply the mantissa by 10**count.
 #  2n.(negative exp)
-#   2. Check the digits in lwords 3 and 2 in decending order.
+#   2. Check the digits in lwords 3 and 2 in descending order.
 #   3. Add one for each zero encountered until a non-zero digit.
 #   4. Add the count to the exp.
 #   5. Check if the exp has crossed zero in #3 above; clear SE.
diff --git a/arch/m68k/ifpsp060/src/pfpsp.S b/arch/m68k/ifpsp060/src/pfpsp.S
index 4aedef973cf6..3535e6c87eec 100644
--- a/arch/m68k/ifpsp060/src/pfpsp.S
+++ b/arch/m68k/ifpsp060/src/pfpsp.S
@@ -13156,14 +13156,14 @@ m_sign:
 #
 #  1. Branch on the sign of the adjusted exponent.
 #  2p.(positive exp)
-#   2. Check M16 and the digits in lwords 2 and 3 in decending order.
+#   2. Check M16 and the digits in lwords 2 and 3 in descending order.
 #   3. Add one for each zero encountered until a non-zero digit.
 #   4. Subtract the count from the exp.
 #   5. Check if the exp has crossed zero in #3 above; make the exp abs
 #	   and set SE.
 #	6. Multiply the mantissa by 10**count.
 #  2n.(negative exp)
-#   2. Check the digits in lwords 3 and 2 in decending order.
+#   2. Check the digits in lwords 3 and 2 in descending order.
 #   3. Add one for each zero encountered until a non-zero digit.
 #   4. Add the count to the exp.
 #   5. Check if the exp has crossed zero in #3 above; clear SE.
diff --git a/arch/m68k/include/asm/dma.h b/arch/m68k/include/asm/dma.h
index 429fe26e320c..208b4daa14b3 100644
--- a/arch/m68k/include/asm/dma.h
+++ b/arch/m68k/include/asm/dma.h
@@ -18,7 +18,7 @@
  * AUG/22/2000 : added support for 32-bit Dual-Address-Mode (K) 2000
  *               Oliver Kamphenkel (O.Kamphenkel@tu-bs.de)
  *
- * AUG/25/2000 : addad support for 8, 16 and 32-bit Single-Address-Mode (K)2000
+ * AUG/25/2000 : added support for 8, 16 and 32-bit Single-Address-Mode (K)2000
  *               Oliver Kamphenkel (O.Kamphenkel@tu-bs.de)
  *
  * APR/18/2002 : added proper support for MCF5272 DMA controller.
diff --git a/arch/m68k/include/asm/m525xsim.h b/arch/m68k/include/asm/m525xsim.h
index f186459072e9..699f20c8a0fe 100644
--- a/arch/m68k/include/asm/m525xsim.h
+++ b/arch/m68k/include/asm/m525xsim.h
@@ -123,10 +123,10 @@
 /*
  *	I2C module.
  */
-#define MCFI2C_BASE0		(MCF_MBAR + 0x280)	/* Base addreess I2C0 */
+#define MCFI2C_BASE0		(MCF_MBAR + 0x280)	/* Base address I2C0 */
 #define MCFI2C_SIZE0		0x20			/* Register set size */
 
-#define MCFI2C_BASE1		(MCF_MBAR2 + 0x440)	/* Base addreess I2C1 */
+#define MCFI2C_BASE1		(MCF_MBAR2 + 0x440)	/* Base address I2C1 */
 #define MCFI2C_SIZE1		0x20			/* Register set size */
 
 /*
diff --git a/arch/m68k/include/asm/mcfmmu.h b/arch/m68k/include/asm/mcfmmu.h
index 26cc3d5a63f8..8824236e303f 100644
--- a/arch/m68k/include/asm/mcfmmu.h
+++ b/arch/m68k/include/asm/mcfmmu.h
@@ -38,7 +38,7 @@
 /*
  *	MMU Operation register.
  */
-#define	MMUOR_UAA	0x00000001		/* Update allocatiom address */
+#define	MMUOR_UAA	0x00000001		/* Update allocation address */
 #define	MMUOR_ACC	0x00000002		/* TLB access */
 #define	MMUOR_RD	0x00000004		/* TLB access read */
 #define	MMUOR_WR	0x00000000		/* TLB access write */
diff --git a/arch/m68k/include/asm/q40_master.h b/arch/m68k/include/asm/q40_master.h
index fc5b36278d04..c48d21b68f04 100644
--- a/arch/m68k/include/asm/q40_master.h
+++ b/arch/m68k/include/asm/q40_master.h
@@ -1,6 +1,6 @@
 /*
  * Q40 master Chip Control
- * RTC stuff merged for compactnes..
+ * RTC stuff merged for compactness.
 */
 
 #ifndef _Q40_MASTER_H
diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c
index 4d2adfb32a2a..7990b6f50105 100644
--- a/arch/m68k/mac/iop.c
+++ b/arch/m68k/mac/iop.c
@@ -60,7 +60,7 @@
  *
  * The host talks to the IOPs using a rather simple message-passing scheme via
  * a shared memory area in the IOP RAM. Each IOP has seven "channels"; each
- * channel is conneced to a specific software driver on the IOP. For example
+ * channel is connected to a specific software driver on the IOP. For example
  * on the SCC IOP there is one channel for each serial port. Each channel has
  * an incoming and and outgoing message queue with a depth of one.
  *
diff --git a/arch/m68k/math-emu/fp_decode.h b/arch/m68k/math-emu/fp_decode.h
index 759679d9ab96..6d1e760e2a0e 100644
--- a/arch/m68k/math-emu/fp_decode.h
+++ b/arch/m68k/math-emu/fp_decode.h
@@ -130,7 +130,7 @@ do_fscc=0
 	bfextu	%d2{#13,#3},%d0
 .endm
 
-| decode the 8bit diplacement from the brief extension word
+| decode the 8bit displacement from the brief extension word
 .macro	fp_decode_disp8
 	move.b	%d2,%d0
 	ext.w	%d0
-- 
cgit v1.2.3


From 06ee6d571f0e350253a8fc3492316b2be007fae2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Jul 2016 17:39:24 +0900
Subject: genirq: Add affinity hint to irq allocation

Add an extra argument to the irq(domain) allocation functions, so we can hand
down affinity hints to the allocator. Thats necessary to implement proper
support for multiqueue devices.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: linux-block@vger.kernel.org
Cc: linux-pci@vger.kernel.org
Cc: linux-nvme@lists.infradead.org
Cc: axboe@fb.com
Cc: agordeev@redhat.com
Link: http://lkml.kernel.org/r/1467621574-8277-4-git-send-email-hch@lst.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/sparc/kernel/irq_64.c     |  2 +-
 arch/x86/kernel/apic/io_apic.c |  5 +++--
 include/linux/irq.h            |  4 ++--
 include/linux/irqdomain.h      |  9 ++++++---
 kernel/irq/ipi.c               |  2 +-
 kernel/irq/irqdesc.c           | 12 ++++++++----
 kernel/irq/irqdomain.c         | 22 ++++++++++++++--------
 kernel/irq/manage.c            |  7 ++++---
 kernel/irq/msi.c               |  3 ++-
 9 files changed, 41 insertions(+), 25 deletions(-)

(limited to 'arch')

diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index e22416ce56ea..34a7930b76ef 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -242,7 +242,7 @@ unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino)
 {
 	int irq;
 
-	irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL);
+	irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL, NULL);
 	if (irq <= 0)
 		goto out;
 
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 446702ed99dc..7c4f90dd4c2a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -981,7 +981,7 @@ static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi,
 
 	return __irq_domain_alloc_irqs(domain, irq, 1,
 				       ioapic_alloc_attr_node(info),
-				       info, legacy);
+				       info, legacy, NULL);
 }
 
 /*
@@ -1014,7 +1014,8 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain,
 					  info->ioapic_pin))
 			return -ENOMEM;
 	} else {
-		irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true);
+		irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true,
+					      NULL);
 		if (irq >= 0) {
 			irq_data = irq_domain_get_irq_data(domain, irq);
 			data = irq_data->chip_data;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index f6074813688d..39ce46ac5c18 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -708,11 +708,11 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
 unsigned int arch_dynirq_lower_bound(unsigned int from);
 
 int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
-		struct module *owner);
+		      struct module *owner, const struct cpumask *affinity);
 
 /* use macros to avoid needing export.h for THIS_MODULE */
 #define irq_alloc_descs(irq, from, cnt, node)	\
-	__irq_alloc_descs(irq, from, cnt, node, THIS_MODULE)
+	__irq_alloc_descs(irq, from, cnt, node, THIS_MODULE, NULL)
 
 #define irq_alloc_desc(node)			\
 	irq_alloc_descs(-1, 0, 1, node)
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index f1f36e04d885..1aee0fbe900e 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -39,6 +39,7 @@ struct irq_domain;
 struct of_device_id;
 struct irq_chip;
 struct irq_data;
+struct cpumask;
 
 /* Number of irqs reserved for a legacy isa controller */
 #define NUM_ISA_INTERRUPTS	16
@@ -217,7 +218,8 @@ extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
 						   enum irq_domain_bus_token bus_token);
 extern void irq_set_default_host(struct irq_domain *host);
 extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
-				  irq_hw_number_t hwirq, int node);
+				  irq_hw_number_t hwirq, int node,
+				  const struct cpumask *affinity);
 
 static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
 {
@@ -389,7 +391,7 @@ static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *par
 
 extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 				   unsigned int nr_irqs, int node, void *arg,
-				   bool realloc);
+				   bool realloc, const struct cpumask *affinity);
 extern void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs);
 extern void irq_domain_activate_irq(struct irq_data *irq_data);
 extern void irq_domain_deactivate_irq(struct irq_data *irq_data);
@@ -397,7 +399,8 @@ extern void irq_domain_deactivate_irq(struct irq_data *irq_data);
 static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
 			unsigned int nr_irqs, int node, void *arg)
 {
-	return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false);
+	return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false,
+				       NULL);
 }
 
 extern int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c
index 89b49f6773f0..4fd23510d5f2 100644
--- a/kernel/irq/ipi.c
+++ b/kernel/irq/ipi.c
@@ -76,7 +76,7 @@ int irq_reserve_ipi(struct irq_domain *domain,
 		}
 	}
 
-	virq = irq_domain_alloc_descs(-1, nr_irqs, 0, NUMA_NO_NODE);
+	virq = irq_domain_alloc_descs(-1, nr_irqs, 0, NUMA_NO_NODE, NULL);
 	if (virq <= 0) {
 		pr_warn("Can't reserve IPI, failed to alloc descs\n");
 		return -ENOMEM;
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 8731e1c5d1e7..b8df4fcdbb5f 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -223,7 +223,7 @@ static void free_desc(unsigned int irq)
 }
 
 static int alloc_descs(unsigned int start, unsigned int cnt, int node,
-		       struct module *owner)
+		       const struct cpumask *affinity, struct module *owner)
 {
 	struct irq_desc *desc;
 	int i;
@@ -333,6 +333,7 @@ static void free_desc(unsigned int irq)
 }
 
 static inline int alloc_descs(unsigned int start, unsigned int cnt, int node,
+			      const struct cpumask *affinity,
 			      struct module *owner)
 {
 	u32 i;
@@ -453,12 +454,15 @@ EXPORT_SYMBOL_GPL(irq_free_descs);
  * @cnt:	Number of consecutive irqs to allocate.
  * @node:	Preferred node on which the irq descriptor should be allocated
  * @owner:	Owning module (can be NULL)
+ * @affinity:	Optional pointer to an affinity mask which hints where the
+ *		irq descriptors should be allocated and which default
+ *		affinities to use
  *
  * Returns the first irq number or error code
  */
 int __ref
 __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
-		  struct module *owner)
+		  struct module *owner, const struct cpumask *affinity)
 {
 	int start, ret;
 
@@ -494,7 +498,7 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
 
 	bitmap_set(allocated_irqs, start, cnt);
 	mutex_unlock(&sparse_irq_lock);
-	return alloc_descs(start, cnt, node, owner);
+	return alloc_descs(start, cnt, node, affinity, owner);
 
 err:
 	mutex_unlock(&sparse_irq_lock);
@@ -512,7 +516,7 @@ EXPORT_SYMBOL_GPL(__irq_alloc_descs);
  */
 unsigned int irq_alloc_hwirqs(int cnt, int node)
 {
-	int i, irq = __irq_alloc_descs(-1, 0, cnt, node, NULL);
+	int i, irq = __irq_alloc_descs(-1, 0, cnt, node, NULL, NULL);
 
 	if (irq < 0)
 		return 0;
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8798b6c9e945..79459b732dc9 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -481,7 +481,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
 	}
 
 	/* Allocate a virtual interrupt number */
-	virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node));
+	virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), NULL);
 	if (virq <= 0) {
 		pr_debug("-> virq allocation failed\n");
 		return 0;
@@ -835,19 +835,23 @@ const struct irq_domain_ops irq_domain_simple_ops = {
 EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
 
 int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq,
-			   int node)
+			   int node, const struct cpumask *affinity)
 {
 	unsigned int hint;
 
 	if (virq >= 0) {
-		virq = irq_alloc_descs(virq, virq, cnt, node);
+		virq = __irq_alloc_descs(virq, virq, cnt, node, THIS_MODULE,
+					 affinity);
 	} else {
 		hint = hwirq % nr_irqs;
 		if (hint == 0)
 			hint++;
-		virq = irq_alloc_descs_from(hint, cnt, node);
-		if (virq <= 0 && hint > 1)
-			virq = irq_alloc_descs_from(1, cnt, node);
+		virq = __irq_alloc_descs(-1, hint, cnt, node, THIS_MODULE,
+					 affinity);
+		if (virq <= 0 && hint > 1) {
+			virq = __irq_alloc_descs(-1, 1, cnt, node, THIS_MODULE,
+						 affinity);
+		}
 	}
 
 	return virq;
@@ -1160,6 +1164,7 @@ int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
  * @node:	NUMA node id for memory allocation
  * @arg:	domain specific argument
  * @realloc:	IRQ descriptors have already been allocated if true
+ * @affinity:	Optional irq affinity mask for multiqueue devices
  *
  * Allocate IRQ numbers and initialized all data structures to support
  * hierarchy IRQ domains.
@@ -1175,7 +1180,7 @@ int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
  */
 int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 			    unsigned int nr_irqs, int node, void *arg,
-			    bool realloc)
+			    bool realloc, const struct cpumask *affinity)
 {
 	int i, ret, virq;
 
@@ -1193,7 +1198,8 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 	if (realloc && irq_base >= 0) {
 		virq = irq_base;
 	} else {
-		virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node);
+		virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node,
+					      affinity);
 		if (virq < 0) {
 			pr_debug("cannot allocate IRQ(base %d, count %d)\n",
 				 irq_base, nr_irqs);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 30658e9827f0..ad0aac6d1248 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -353,10 +353,11 @@ static int setup_affinity(struct irq_desc *desc, struct cpumask *mask)
 		return 0;
 
 	/*
-	 * Preserve an userspace affinity setup, but make sure that
-	 * one of the targets is online.
+	 * Preserve the managed affinity setting and an userspace affinity
+	 * setup, but make sure that one of the targets is online.
 	 */
-	if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) {
+	if (irqd_affinity_is_managed(&desc->irq_data) ||
+	    irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) {
 		if (cpumask_intersects(desc->irq_common_data.affinity,
 				       cpu_online_mask))
 			set = desc->irq_common_data.affinity;
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index eb5bf2b50b07..58dbbacc6fbb 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -334,7 +334,8 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 		ops->set_desc(&arg, desc);
 
 		virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
-					       dev_to_node(dev), &arg, false);
+					       dev_to_node(dev), &arg, false,
+					       NULL);
 		if (virq < 0) {
 			ret = -ENOSPC;
 			if (ops->handle_error)
-- 
cgit v1.2.3


From 175a20c16fdb7700fcac63f1eeb2caa7e1dddd2d Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Thu, 23 Jun 2016 18:06:49 +0300
Subject: x86/perf/intel/rapl: Fix module name collision with powercap
 intel-rapl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 4b6e2571bf00 the rapl perf module calls itself intel-rapl. That
name was already in use by the rapl powercap driver, which now fails to load
if the perf module is loaded. Fix the problem by renaming the perf module to
intel-rapl-perf, so that both modules can coexist.

Fixes: 4b6e2571bf00 ("x86/perf/intel/rapl: Make the Intel RAPL PMU driver modular")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1466694409-3620-1-git-send-email-ville.syrjala@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/events/intel/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile
index 3660b2cf245a..06c2baa51814 100644
--- a/arch/x86/events/intel/Makefile
+++ b/arch/x86/events/intel/Makefile
@@ -1,8 +1,8 @@
 obj-$(CONFIG_CPU_SUP_INTEL)		+= core.o bts.o cqm.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= ds.o knc.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= lbr.o p4.o p6.o pt.o
-obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL)	+= intel-rapl.o
-intel-rapl-objs				:= rapl.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL)	+= intel-rapl-perf.o
+intel-rapl-perf-objs			:= rapl.o
 obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= intel-uncore.o
 intel-uncore-objs			:= uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
 obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE)	+= intel-cstate.o
-- 
cgit v1.2.3


From 88d02a2ba6c52350f9a73ff1b01a5be839c3ca17 Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Thu, 16 Jun 2016 15:50:31 -0700
Subject: MIPS: Fix page table corruption on THP permission changes.

When the core THP code is modifying the permissions of a huge page it
calls pmd_modify(), which unfortunately was clearing the _PAGE_HUGE bit
of the page table entry.  The result can be kernel messages like:

mm/memory.c:397: bad pmd 000000040080004d.
mm/memory.c:397: bad pmd 00000003ff00004d.
mm/memory.c:397: bad pmd 000000040100004d.

or:

------------[ cut here ]------------
WARNING: at mm/mmap.c:3200 exit_mmap+0x150/0x158()
Modules linked in: ipv6 at24 octeon3_ethernet octeon_srio_nexus m25p80
CPU: 12 PID: 1295 Comm: pmderr Not tainted 3.10.87-rt80-Cavium-Octeon #4
Stack : 0000000040808000 0000000014009ce1 0000000000400004 ffffffff81076ba0
          0000000000000000 0000000000000000 ffffffff85110000 0000000000000119
          0000000000000004 0000000000000000 0000000000000119 43617669756d2d4f
          0000000000000000 ffffffff850fda40 ffffffff85110000 0000000000000000
          0000000000000000 0000000000000009 ffffffff809207a0 0000000000000c80
          ffffffff80f1bf20 0000000000000001 000000ffeca36828 0000000000000001
          0000000000000000 0000000000000001 000000ffeca7e700 ffffffff80886924
          80000003fd7a0000 80000003fd7a39b0 80000003fdea8000 ffffffff80885780
          80000003fdea8000 ffffffff80f12218 000000000000000c 000000000000050f
          0000000000000000 ffffffff80865c4c 0000000000000000 0000000000000000
          ...
Call Trace:
[<ffffffff80865c4c>] show_stack+0x6c/0xf8
[<ffffffff80885780>] warn_slowpath_common+0x78/0xa8
[<ffffffff809207a0>] exit_mmap+0x150/0x158
[<ffffffff80882d44>] mmput+0x5c/0x110
[<ffffffff8088b450>] do_exit+0x230/0xa68
[<ffffffff8088be34>] do_group_exit+0x54/0x1d0
[<ffffffff8088bfc0>] __wake_up_parent+0x0/0x18

---[ end trace c7b38293191c57dc ]---
BUG: Bad rss-counter state mm:80000003fa168000 idx:1 val:1536

Fix by not clearing _PAGE_HUGE bit.

Signed-off-by: David Daney <david.daney@cavium.com>
Tested-by: Aaro Koskinen <aaro.koskinen@nokia.com>
Cc: stable@vger.kernel.org
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/13687/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index f53816744d60..7d44e888134f 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -633,7 +633,7 @@ static inline struct page *pmd_page(pmd_t pmd)
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
-	pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) |
+	pmd_val(pmd) = (pmd_val(pmd) & (_PAGE_CHG_MASK | _PAGE_HUGE)) |
 		       (pgprot_val(newprot) & ~_PAGE_CHG_MASK);
 	return pmd;
 }
-- 
cgit v1.2.3


From 5eb495349f5ec3b134f7341a2450392fc86d99d0 Mon Sep 17 00:00:00 2001
From: Lucas Stach <dev@lynxeye.de>
Date: Thu, 30 Jun 2016 17:32:08 +0200
Subject: ARM: tegra: beaver: Allow SD card voltage to be changed

This allows to switch the card signal voltage level to 1.8 V, which is
needed for any ultra high speed modes to work.

Signed-off-by: Lucas Stach <dev@lynxeye.de>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/boot/dts/tegra30-beaver.dts | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/tegra30-beaver.dts b/arch/arm/boot/dts/tegra30-beaver.dts
index 1eca3b28ac64..b6da15d823a6 100644
--- a/arch/arm/boot/dts/tegra30-beaver.dts
+++ b/arch/arm/boot/dts/tegra30-beaver.dts
@@ -1843,7 +1843,7 @@
 
 				ldo5_reg: ldo5 {
 					regulator-name = "vddio_sdmmc,avdd_vdac";
-					regulator-min-microvolt = <3300000>;
+					regulator-min-microvolt = <1800000>;
 					regulator-max-microvolt = <3300000>;
 					regulator-always-on;
 				};
@@ -1914,6 +1914,7 @@
 
 	sdhci@78000000 {
 		status = "okay";
+		vqmmc-supply = <&ldo5_reg>;
 		cd-gpios = <&gpio TEGRA_GPIO(I, 5) GPIO_ACTIVE_LOW>;
 		wp-gpios = <&gpio TEGRA_GPIO(T, 3) GPIO_ACTIVE_HIGH>;
 		power-gpios = <&gpio TEGRA_GPIO(D, 7) GPIO_ACTIVE_HIGH>;
-- 
cgit v1.2.3


From aefbc4d04c7b09cb6775a32cea7986c62e489ee2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 30 Jun 2016 11:49:08 +0200
Subject: perf/x86/intel: Fix rdlbr_to() MSR reading typo

It helps to actually read the right MSR..

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Fixes: d4cf1949f968 ("perf/x86/intel: Add {rd,wr}lbr_{to,from} wrappers")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/lbr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index cc4555a9e876..707d358e0dff 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -323,7 +323,7 @@ static inline u64 rdlbr_to(unsigned int idx)
 {
 	u64 val;
 
-	rdmsrl(x86_pmu.lbr_from + idx, val);
+	rdmsrl(x86_pmu.lbr_to + idx, val);
 
 	return val;
 }
-- 
cgit v1.2.3


From 46866b59dfbe9bf99bb1323ce1f3fd2073a81aa3 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Wed, 29 Jun 2016 07:01:51 -0700
Subject: perf/x86/intel/uncore: Add support for the Intel Skylake client
 uncore PMU

This patch adds full support for Intel SKL client uncore PMU:

 - Add support for SKL client CPU uncore PMU, which is similar to the
   BDW client PMU driver. (There are some differences in CBOX numbering
   and uncore control MSR.)
 - Add new support for SkyLake Mobile uncore PMUs, for both CPU and PCI
   uncore functionality.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1467208912-8179-1-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/uncore.c     |  2 ++
 arch/x86/events/intel/uncore.h     |  1 +
 arch/x86/events/intel/uncore_snb.c | 67 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 69 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index dc965d2cf076..59b4974c697f 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1379,6 +1379,7 @@ static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
 };
 
 static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
+	.cpu_init = skl_uncore_cpu_init,
 	.pci_init = skl_uncore_pci_init,
 };
 
@@ -1403,6 +1404,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL,	  knl_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
 	{},
 };
 
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 66c3a3657a10..d6063e438158 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -364,6 +364,7 @@ int bdw_uncore_pci_init(void);
 int skl_uncore_pci_init(void);
 void snb_uncore_cpu_init(void);
 void nhm_uncore_cpu_init(void);
+void skl_uncore_cpu_init(void);
 int snb_pci2phy_map_init(int devid);
 
 /* perf_event_intel_uncore_snbep.c */
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 96531d2b843f..97a69dbba649 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -1,4 +1,4 @@
-/* Nehalem/SandBridge/Haswell uncore support */
+/* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */
 #include "uncore.h"
 
 /* Uncore IMC PCI IDs */
@@ -9,6 +9,7 @@
 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC	0x0a04
 #define PCI_DEVICE_ID_INTEL_BDW_IMC	0x1604
 #define PCI_DEVICE_ID_INTEL_SKL_IMC	0x191f
+#define PCI_DEVICE_ID_INTEL_SKL_U_IMC	0x190c
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
@@ -64,6 +65,10 @@
 #define NHM_UNC_PERFEVTSEL0                     0x3c0
 #define NHM_UNC_UNCORE_PMC0                     0x3b0
 
+/* SKL uncore global control */
+#define SKL_UNC_PERF_GLOBAL_CTL			0xe01
+#define SKL_UNC_GLOBAL_CTL_CORE_ALL		((1 << 5) - 1)
+
 DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
 DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
 DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
@@ -179,6 +184,60 @@ void snb_uncore_cpu_init(void)
 		snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
 }
 
+static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+	if (box->pmu->pmu_idx == 0) {
+		wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
+			SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
+	}
+}
+
+static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+	if (box->pmu->pmu_idx == 0)
+		wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0);
+}
+
+static struct intel_uncore_ops skl_uncore_msr_ops = {
+	.init_box	= skl_uncore_msr_init_box,
+	.exit_box	= skl_uncore_msr_exit_box,
+	.disable_event	= snb_uncore_msr_disable_event,
+	.enable_event	= snb_uncore_msr_enable_event,
+	.read_counter	= uncore_msr_read_counter,
+};
+
+static struct intel_uncore_type skl_uncore_cbox = {
+	.name		= "cbox",
+	.num_counters   = 4,
+	.num_boxes	= 5,
+	.perf_ctr_bits	= 44,
+	.fixed_ctr_bits	= 48,
+	.perf_ctr	= SNB_UNC_CBO_0_PER_CTR0,
+	.event_ctl	= SNB_UNC_CBO_0_PERFEVTSEL0,
+	.fixed_ctr	= SNB_UNC_FIXED_CTR,
+	.fixed_ctl	= SNB_UNC_FIXED_CTR_CTRL,
+	.single_fixed	= 1,
+	.event_mask	= SNB_UNC_RAW_EVENT_MASK,
+	.msr_offset	= SNB_UNC_CBO_MSR_OFFSET,
+	.ops		= &skl_uncore_msr_ops,
+	.format_group	= &snb_uncore_format_group,
+	.event_descs	= snb_uncore_events,
+};
+
+static struct intel_uncore_type *skl_msr_uncores[] = {
+	&skl_uncore_cbox,
+	&snb_uncore_arb,
+	NULL,
+};
+
+void skl_uncore_cpu_init(void)
+{
+	uncore_msr_uncores = skl_msr_uncores;
+	if (skl_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+		skl_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+	snb_uncore_arb.ops = &skl_uncore_msr_ops;
+}
+
 enum {
 	SNB_PCI_UNCORE_IMC,
 };
@@ -544,6 +603,11 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
 		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
 	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+
 	{ /* end: all zeroes */ },
 };
 
@@ -587,6 +651,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
 	IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
 	IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
 	IMC_DEV(SKL_IMC, &skl_uncore_pci_driver),    /* 6th Gen Core */
+	IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core U */
 	{  /* end marker */ }
 };
 
-- 
cgit v1.2.3


From 03e3c2b7edbe1e8758196b2c7843333eb328063d Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 27 Jun 2016 18:43:54 +0100
Subject: locking/barriers, arch/arm64: Implement LDXR+WFE based
 smp_cond_load_acquire()

smp_cond_load_acquire() is used to spin on a variable until some
expression involving that variable becomes true.

On arm64, we can build this using the LDXR and WFE instructions, since
clearing of the exclusive monitor as a result of the variable being
changed by another CPU generates an event, which will wake us up out of WFE.

This patch implements smp_cond_load_acquire() using LDXR and WFE, which
themselves are contained in an internal __cmpwait() function.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: catalin.marinas@arm.com
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1467049434-30451-1-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm64/include/asm/barrier.h | 13 ++++++++++
 arch/arm64/include/asm/cmpxchg.h | 51 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index dae5c49618db..4eea7f618dce 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -91,6 +91,19 @@ do {									\
 	__u.__val;							\
 })
 
+#define smp_cond_load_acquire(ptr, cond_expr)				\
+({									\
+	typeof(ptr) __PTR = (ptr);					\
+	typeof(*ptr) VAL;						\
+	for (;;) {							\
+		VAL = smp_load_acquire(__PTR);				\
+		if (cond_expr)						\
+			break;						\
+		__cmpwait_relaxed(__PTR, VAL);				\
+	}								\
+	VAL;								\
+})
+
 #include <asm-generic/barrier.h>
 
 #endif	/* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 510c7b404454..bd86a79491bc 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -224,4 +224,55 @@ __CMPXCHG_GEN(_mb)
 	__ret;								\
 })
 
+#define __CMPWAIT_CASE(w, sz, name)					\
+static inline void __cmpwait_case_##name(volatile void *ptr,		\
+					 unsigned long val)		\
+{									\
+	unsigned long tmp;						\
+									\
+	asm volatile(							\
+	"	ldxr" #sz "\t%" #w "[tmp], %[v]\n"		\
+	"	eor	%" #w "[tmp], %" #w "[tmp], %" #w "[val]\n"	\
+	"	cbnz	%" #w "[tmp], 1f\n"				\
+	"	wfe\n"							\
+	"1:"								\
+	: [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr)		\
+	: [val] "r" (val));						\
+}
+
+__CMPWAIT_CASE(w, b, 1);
+__CMPWAIT_CASE(w, h, 2);
+__CMPWAIT_CASE(w,  , 4);
+__CMPWAIT_CASE( ,  , 8);
+
+#undef __CMPWAIT_CASE
+
+#define __CMPWAIT_GEN(sfx)						\
+static inline void __cmpwait##sfx(volatile void *ptr,			\
+				  unsigned long val,			\
+				  int size)				\
+{									\
+	switch (size) {							\
+	case 1:								\
+		return __cmpwait_case##sfx##_1(ptr, (u8)val);		\
+	case 2:								\
+		return __cmpwait_case##sfx##_2(ptr, (u16)val);		\
+	case 4:								\
+		return __cmpwait_case##sfx##_4(ptr, val);		\
+	case 8:								\
+		return __cmpwait_case##sfx##_8(ptr, val);		\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+									\
+	unreachable();							\
+}
+
+__CMPWAIT_GEN()
+
+#undef __CMPWAIT_GEN
+
+#define __cmpwait_relaxed(ptr, val) \
+	__cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
+
 #endif	/* __ASM_CMPXCHG_H */
-- 
cgit v1.2.3


From 920a4a70c55058a9997f2e35bf41503acf87c301 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Jul 2016 09:50:16 +0000
Subject: timers, x86/apic/uv: Initialize the UV heartbeat timer as pinned

Pinned timers must carry the pinned attribute in the timer structure
itself, so convert the code to the new API.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Chris Mason <clm@fb.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: George Spelvin <linux@sciencehorizons.net>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: rt@linutronix.de
Link: http://lkml.kernel.org/r/20160704094341.133837204@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 29003154fafd..7a50519e6afc 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -919,7 +919,7 @@ static void uv_heartbeat(unsigned long ignored)
 	uv_set_scir_bits(bits);
 
 	/* enable next timer period */
-	mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL);
+	mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
 }
 
 static void uv_heartbeat_enable(int cpu)
@@ -928,7 +928,7 @@ static void uv_heartbeat_enable(int cpu)
 		struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
 
 		uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
-		setup_timer(timer, uv_heartbeat, cpu);
+		setup_pinned_timer(timer, uv_heartbeat, cpu);
 		timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
 		add_timer_on(timer, cpu);
 		uv_cpu_scir_info(cpu)->enabled = 1;
-- 
cgit v1.2.3


From f9c287ba3861714a1959accf14e815c44291bec4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 4 Jul 2016 09:50:17 +0000
Subject: timers, x86/mce: Initialize MCE restart timer as pinned

Pinned timers must carry the pinned attribute in the timer structure
itself, so convert the code to the new API.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Chris Mason <clm@fb.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: George Spelvin <linux@sciencehorizons.net>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: rt@linutronix.de
Link: http://lkml.kernel.org/r/20160704094341.215783439@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 92e5e37d97bf..b80a6361a9e1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1309,7 +1309,7 @@ static void __restart_timer(struct timer_list *t, unsigned long interval)
 
 	if (timer_pending(t)) {
 		if (time_before(when, t->expires))
-			mod_timer_pinned(t, when);
+			mod_timer(t, when);
 	} else {
 		t->expires = round_jiffies(when);
 		add_timer_on(t, smp_processor_id());
@@ -1735,7 +1735,7 @@ static void __mcheck_cpu_init_timer(void)
 	struct timer_list *t = this_cpu_ptr(&mce_timer);
 	unsigned int cpu = smp_processor_id();
 
-	setup_timer(t, mce_timer_fn, cpu);
+	setup_pinned_timer(t, mce_timer_fn, cpu);
 	mce_start_timer(cpu, t);
 }
 
-- 
cgit v1.2.3


From 54b880caf15034644b564e378abf67b7f9eaf4dc Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Fri, 24 Jun 2016 23:42:23 +0100
Subject: kbuild, x86: Track generated headers with generated-y

Track generated header files which aren't already in genhdr-y, alongside
generic-y wrappers in the */include/generated/[uapi/]asm/ directories.
Currently only x86 generates extra headers in these directories, for the
purposes of enumerating system calls for different ABIs, and xen
hypercalls.

This will allow the asm-generic wrapper handling code to remove stale
wrappers when files are removed from generic-y, without also removing
these headers which are generated separately.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: linux-kbuild@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: Michal Marek <mmarek@suse.com>
Link: http://lkml.kernel.org/r/1466808144-23209-2-git-send-email-james.hogan@imgtec.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/kbuild/makefiles.txt | 14 ++++++++++++++
 arch/x86/include/asm/Kbuild        |  6 ++++++
 2 files changed, 20 insertions(+)

(limited to 'arch')

diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index 13f888a02a3d..385a5ef41c17 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -47,6 +47,7 @@ This document describes the Linux kernel Makefiles.
 		--- 7.2 genhdr-y
 		--- 7.3 destination-y
 		--- 7.4 generic-y
+		--- 7.5 generated-y
 
 	=== 8 Kbuild Variables
 	=== 9 Makefile language
@@ -1319,6 +1320,19 @@ See subsequent chapter for the syntax of the Kbuild file.
 		Example: termios.h
 			#include <asm-generic/termios.h>
 
+	--- 7.5 generated-y
+
+	If an architecture generates other header files alongside generic-y
+	wrappers, and not included in genhdr-y, then generated-y specifies
+	them.
+
+	This prevents them being treated as stale asm-generic wrappers and
+	removed.
+
+		Example:
+			#arch/x86/include/asm/Kbuild
+			generated-y += syscalls_32.h
+
 === 8 Kbuild Variables
 
 The top Makefile exports the following variables:
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index aeac434c9feb..2cfed174e3c9 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -1,5 +1,11 @@
 
 
+generated-y += syscalls_32.h
+generated-y += syscalls_64.h
+generated-y += unistd_32_ia32.h
+generated-y += unistd_64_x32.h
+generated-y += xen-hypercalls.h
+
 genhdr-y += unistd_32.h
 genhdr-y += unistd_64.h
 genhdr-y += unistd_x32.h
-- 
cgit v1.2.3


From e19a6ee2460bdd0d0055a6029383422773f9999a Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Mon, 20 Jun 2016 18:28:01 +0100
Subject: arm64: kernel: Save and restore UAO and addr_limit on exception entry

If we take an exception while at EL1, the exception handler inherits
the original context's addr_limit and PSTATE.UAO values. To be consistent
always reset addr_limit and PSTATE.UAO on (re-)entry to EL1. This
prevents accidental re-use of the original context's addr_limit.

Based on a similar patch for arm from Russell King.

Cc: <stable@vger.kernel.org> # 4.6-
Acked-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/ptrace.h |  2 ++
 arch/arm64/kernel/asm-offsets.c |  1 +
 arch/arm64/kernel/entry.S       | 19 +++++++++++++++++--
 arch/arm64/mm/fault.c           |  3 ++-
 4 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index a307eb6e7fa8..7f94755089e2 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -117,6 +117,8 @@ struct pt_regs {
 	};
 	u64 orig_x0;
 	u64 syscallno;
+	u64 orig_addr_limit;
+	u64 unused;	// maintain 16 byte alignment
 };
 
 #define arch_has_single_step()	(1)
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index f8e5d47f0880..2f4ba774488a 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -60,6 +60,7 @@ int main(void)
   DEFINE(S_PC,			offsetof(struct pt_regs, pc));
   DEFINE(S_ORIG_X0,		offsetof(struct pt_regs, orig_x0));
   DEFINE(S_SYSCALLNO,		offsetof(struct pt_regs, syscallno));
+  DEFINE(S_ORIG_ADDR_LIMIT,	offsetof(struct pt_regs, orig_addr_limit));
   DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs));
   BLANK();
   DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id.counter));
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 12e8d2bcb3f9..6c3b7345a6c4 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -28,6 +28,7 @@
 #include <asm/errno.h>
 #include <asm/esr.h>
 #include <asm/irq.h>
+#include <asm/memory.h>
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
 
@@ -97,7 +98,14 @@
 	mov	x29, xzr			// fp pointed to user-space
 	.else
 	add	x21, sp, #S_FRAME_SIZE
-	.endif
+	get_thread_info tsk
+	/* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
+	ldr	x20, [tsk, #TI_ADDR_LIMIT]
+	str	x20, [sp, #S_ORIG_ADDR_LIMIT]
+	mov	x20, #TASK_SIZE_64
+	str	x20, [tsk, #TI_ADDR_LIMIT]
+	ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO)
+	.endif /* \el == 0 */
 	mrs	x22, elr_el1
 	mrs	x23, spsr_el1
 	stp	lr, x21, [sp, #S_LR]
@@ -128,6 +136,14 @@
 	.endm
 
 	.macro	kernel_exit, el
+	.if	\el != 0
+	/* Restore the task's original addr_limit. */
+	ldr	x20, [sp, #S_ORIG_ADDR_LIMIT]
+	str	x20, [tsk, #TI_ADDR_LIMIT]
+
+	/* No need to restore UAO, it will be restored from SPSR_EL1 */
+	.endif
+
 	ldp	x21, x22, [sp, #S_PC]		// load ELR, SPSR
 	.if	\el == 0
 	ct_user_enter
@@ -406,7 +422,6 @@ el1_irq:
 	bl	trace_hardirqs_off
 #endif
 
-	get_thread_info tsk
 	irq_handler
 
 #ifdef CONFIG_PREEMPT
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 013e2cbe7924..b1166d1e5955 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -280,7 +280,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	}
 
 	if (permission_fault(esr) && (addr < USER_DS)) {
-		if (get_fs() == KERNEL_DS)
+		/* regs->orig_addr_limit may be 0 if we entered from EL0 */
+		if (regs->orig_addr_limit == KERNEL_DS)
 			die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
 
 		if (!search_exception_tables(regs->pc))
-- 
cgit v1.2.3


From 47c459beabe969c6751e2ea8d1f85c5fa1652d6c Mon Sep 17 00:00:00 2001
From: Ganapatrao Kulkarni <gkulkarni@caviumnetworks.com>
Date: Thu, 7 Jul 2016 10:18:17 +0530
Subject: arm64: Enable workaround for Cavium erratum 27456 on thunderx-81xx

Cavium erratum 27456 commit 104a0c02e8b1
("arm64: Add workaround for Cavium erratum 27456")
is applicable for thunderx-81xx pass1.0 SoC as well.
Adding code to enable to 81xx.

Signed-off-by: Ganapatrao Kulkarni <gkulkarni@cavium.com>
Reviewed-by: Andrew Pinski <apinski@cavium.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cputype.h | 2 ++
 arch/arm64/kernel/cpu_errata.c   | 6 ++++++
 2 files changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 87e1985f3be8..9d9fd4b9a72e 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -80,12 +80,14 @@
 #define APM_CPU_PART_POTENZA		0x000
 
 #define CAVIUM_CPU_PART_THUNDERX	0x0A1
+#define CAVIUM_CPU_PART_THUNDERX_81XX	0x0A2
 
 #define BRCM_CPU_PART_VULCAN		0x516
 
 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
 #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
 #define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
+#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index d42789499f17..af716b65110d 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -98,6 +98,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		MIDR_RANGE(MIDR_THUNDERX, 0x00,
 			   (1 << MIDR_VARIANT_SHIFT) | 1),
 	},
+	{
+	/* Cavium ThunderX, T81 pass 1.0 */
+		.desc = "Cavium erratum 27456",
+		.capability = ARM64_WORKAROUND_CAVIUM_27456,
+		MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00),
+	},
 #endif
 	{
 	}
-- 
cgit v1.2.3


From e99a0745bdf8a5f7e3126a686846af4aeb852cc9 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 5 Jul 2016 23:09:07 +0300
Subject: x86/pci, x86/platform/intel_mid_pci: Remove duplicate power off code

Intel MID platforms (Moorestown, Medfield, Clovertrail, Merrifield) are
sharing the code in the intel_mid_pci.c module. There is no need to
power off specific Moorestown devices after the following commit:

  5823d0893ec2 ("x86/platform/intel-mid: Add Power Management Unit driver")

... because the condition in mrfld_power_off_dev() is true for any platform
from the above list.

Remove duplicate power off certain devices on Intel Moorestown and rename
the affected functions to show that they are applied to any of Intel MID
platforms.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1467749348-100518-1-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/pci/intel_mid_pci.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index a9710433be4d..5413d6a9817c 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -316,7 +316,7 @@ static void pci_d3delay_fixup(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup);
 
-static void mid_power_off_dev(struct pci_dev *dev)
+static void mid_power_off_one_device(struct pci_dev *dev)
 {
 	u16 pmcsr;
 
@@ -330,12 +330,7 @@ static void mid_power_off_dev(struct pci_dev *dev)
 	pci_set_power_state(dev, PCI_D3hot);
 }
 
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mid_power_off_dev);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mid_power_off_dev);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mid_power_off_dev);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mid_power_off_dev);
-
-static void mrfld_power_off_dev(struct pci_dev *dev)
+static void mid_power_off_devices(struct pci_dev *dev)
 {
 	int id;
 
@@ -350,10 +345,10 @@ static void mrfld_power_off_dev(struct pci_dev *dev)
 	 * This sets only PMCSR bits. The actual power off will happen in
 	 * arch/x86/platform/intel-mid/pwr.c.
 	 */
-	mid_power_off_dev(dev);
+	mid_power_off_one_device(dev);
 }
 
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, mrfld_power_off_dev);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, mid_power_off_devices);
 
 /*
  * Langwell devices reside at fixed offsets, don't try to move them.
-- 
cgit v1.2.3


From ca22312dc840065206285626829ceed8bb4df88c Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 5 Jul 2016 23:09:08 +0300
Subject: x86/platform/intel-mid: Extend PWRMU to support Penwell

Intel Penwell is one of the first SoCs in Intel MID series. It has slightly
older version of PWRMU IP, though it is compatible with one found on Intel
Tangier. Since we are not using (yet) any advanced stuff in the driver we may
safely re-use what it's done for Intel Tangier for now.

Extend PWRMU driver to support Intel Penwell by adding PCI ID and re-using
existing ->set_initial_state() function.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1467749348-100518-2-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/intel-mid/pwr.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c
index 59faf05d23f5..5bc90dd102d4 100644
--- a/arch/x86/platform/intel-mid/pwr.c
+++ b/arch/x86/platform/intel-mid/pwr.c
@@ -75,6 +75,7 @@
 #define LSS_PWS_BITS		2	/* power state width */
 
 /* Supported device IDs */
+#define PCI_DEVICE_ID_PENWELL	0x0828
 #define PCI_DEVICE_ID_TANGIER	0x11a1
 
 struct mid_pwr_dev {
@@ -354,7 +355,7 @@ static int mid_pwr_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return 0;
 }
 
-static int tng_set_initial_state(struct mid_pwr *pwr)
+static int mid_set_initial_state(struct mid_pwr *pwr)
 {
 	unsigned int i, j;
 	int ret;
@@ -397,12 +398,13 @@ static int tng_set_initial_state(struct mid_pwr *pwr)
 	return 0;
 }
 
-static const struct mid_pwr_device_info tng_info = {
-	.set_initial_state = tng_set_initial_state,
+static const struct mid_pwr_device_info mid_info = {
+	.set_initial_state = mid_set_initial_state,
 };
 
 static const struct pci_device_id mid_pwr_pci_ids[] = {
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_TANGIER), (kernel_ulong_t)&tng_info },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PENWELL), (kernel_ulong_t)&mid_info },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_TANGIER), (kernel_ulong_t)&mid_info },
 	{}
 };
 MODULE_DEVICE_TABLE(pci, mid_pwr_pci_ids);
-- 
cgit v1.2.3


From e81e11bc71573709352a5275e175a4b2ee1325e5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 5 Jul 2016 03:14:50 +0300
Subject: x86/platform/intel-mid: Enable spidev on Intel Edison boards

Intel Edison board provides one of the SPI bus for user's connected devices.

Append platform data to get spidev enumerated over it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Dan O'Donovan <dan@emutex.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1467677690-90007-1-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/intel-mid/device_libs/Makefile   |  2 +
 .../intel-mid/device_libs/platform_spidev.c        | 50 ++++++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_spidev.c

(limited to 'arch')

diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index 79e97ed5be5b..fc135bf70511 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -10,6 +10,8 @@ obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic_battery.o
 obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_msic_power_btn.o
 obj-$(subst m,y,$(CONFIG_GPIO_INTEL_PMIC)) += platform_pmic_gpio.o
 obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o
+# SPI Devices
+obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_spidev.o
 # I2C Devices
 obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o
 obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c b/arch/x86/platform/intel-mid/device_libs/platform_spidev.c
new file mode 100644
index 000000000000..30c601b399ee
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_spidev.c
@@ -0,0 +1,50 @@
+/*
+ * spidev platform data initilization file
+ *
+ * (C) Copyright 2014, 2016 Intel Corporation
+ * Authors: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *	    Dan O'Donovan <dan@emutex.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/sfi.h>
+#include <linux/spi/pxa2xx_spi.h>
+#include <linux/spi/spi.h>
+
+#include <asm/intel-mid.h>
+
+#define MRFLD_SPI_DEFAULT_DMA_BURST	8
+#define MRFLD_SPI_DEFAULT_TIMEOUT	500
+
+/* GPIO pin for spidev chipselect */
+#define MRFLD_SPIDEV_GPIO_CS		111
+
+static struct pxa2xx_spi_chip spidev_spi_chip = {
+	.dma_burst_size		= MRFLD_SPI_DEFAULT_DMA_BURST,
+	.timeout		= MRFLD_SPI_DEFAULT_TIMEOUT,
+	.gpio_cs		= MRFLD_SPIDEV_GPIO_CS,
+};
+
+static void __init *spidev_platform_data(void *info)
+{
+	struct spi_board_info *spi_info = info;
+
+	spi_info->mode = SPI_MODE_0;
+	spi_info->controller_data = &spidev_spi_chip;
+
+	return NULL;
+}
+
+static const struct devs_id spidev_dev_id __initconst = {
+	.name			= "spidev",
+	.type			= SFI_DEV_TYPE_SPI,
+	.delay			= 0,
+	.get_platform_data	= &spidev_platform_data,
+};
+
+sfi_device(spidev_dev_id);
-- 
cgit v1.2.3


From 955d1427a91b18f53e082bd7c19c40ce13b0a0f4 Mon Sep 17 00:00:00 2001
From: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
Date: Fri, 8 Jul 2016 11:09:38 +0200
Subject: x86/mce/AMD: Increase size of the bank_map type

Change bank_map type from 'char' to 'int' since we now have more than eight
banks in a system.

Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
Signed-off-by: Yazen Ghannam <Yazen.Ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aravind Gopalakrishnan <aravindksg.lkml@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/1467968983-4874-2-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 10b0661651e0..7b7f3be783d4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -93,7 +93,7 @@ const char * const amd_df_mcablock_names[] = {
 EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
 
 static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
-static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */
+static DEFINE_PER_CPU(unsigned int, bank_map);	/* see which banks are on */
 
 static void amd_threshold_interrupt(void);
 static void amd_deferred_error_interrupt(void);
-- 
cgit v1.2.3


From 340e983ab8afd02b59d698dd1365d7773bf136b3 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <Yazen.Ghannam@amd.com>
Date: Fri, 8 Jul 2016 11:09:39 +0200
Subject: x86/RAS/AMD: Reduce the number of IPIs when prepping error injection

We currently use wrmsr_on_cpu() 4 times when prepping for an error
injection. This will generate 4 IPIs for each MSR write. We can reduce
the number of IPIs to 1 by grouping the MSR writes and executing them
serially on the appropriate CPU.

Suggested-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Yazen Ghannam <Yazen.Ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aravind Gopalakrishnan <aravindksg.lkml@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/1467968983-4874-3-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/ras/mce_amd_inj.c | 58 ++++++++++++++++++++++------------------------
 1 file changed, 28 insertions(+), 30 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
index e69f4701a076..1104515d5ad2 100644
--- a/arch/x86/ras/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c
@@ -241,6 +241,31 @@ static void toggle_nb_mca_mst_cpu(u16 nid)
 		       __func__, PCI_FUNC(F3->devfn), NBCFG);
 }
 
+static void prepare_msrs(void *info)
+{
+	struct mce i_mce = *(struct mce *)info;
+	u8 b = i_mce.bank;
+
+	wrmsrl(MSR_IA32_MCG_STATUS, i_mce.mcgstatus);
+
+	if (boot_cpu_has(X86_FEATURE_SMCA)) {
+		if (i_mce.inject_flags == DFR_INT_INJ) {
+			wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), i_mce.status);
+			wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), i_mce.addr);
+		} else {
+			wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), i_mce.status);
+			wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), i_mce.addr);
+		}
+
+		wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), i_mce.misc);
+	} else {
+		wrmsrl(MSR_IA32_MCx_STATUS(b), i_mce.status);
+		wrmsrl(MSR_IA32_MCx_ADDR(b), i_mce.addr);
+		wrmsrl(MSR_IA32_MCx_MISC(b), i_mce.misc);
+	}
+
+}
+
 static void do_inject(void)
 {
 	u64 mcg_status = 0;
@@ -287,36 +312,9 @@ static void do_inject(void)
 
 	toggle_hw_mce_inject(cpu, true);
 
-	wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
-		     (u32)mcg_status, (u32)(mcg_status >> 32));
-
-	if (boot_cpu_has(X86_FEATURE_SMCA)) {
-		if (inj_type == DFR_INT_INJ) {
-			wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DESTAT(b),
-				     (u32)i_mce.status, (u32)(i_mce.status >> 32));
-
-			wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DEADDR(b),
-				     (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
-		} else {
-			wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_STATUS(b),
-				     (u32)i_mce.status, (u32)(i_mce.status >> 32));
-
-			wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_ADDR(b),
-				     (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
-		}
-
-		wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(b),
-			     (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
-	} else {
-		wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b),
-			     (u32)i_mce.status, (u32)(i_mce.status >> 32));
-
-		wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b),
-			     (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
-
-		wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b),
-			     (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
-	}
+	i_mce.mcgstatus = mcg_status;
+	i_mce.inject_flags = inj_type;
+	smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
 
 	toggle_hw_mce_inject(cpu, false);
 
-- 
cgit v1.2.3


From 38c54ccb2ded3e93d8a353baeb7b9e12e1b77e23 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 8 Jul 2016 11:09:41 +0200
Subject: x86/mce: Fix mce_rdmsrl() warning message

The MSR address we're dumping in there should be in hex, otherwise we
get funsies like:

  [    0.016000] WARNING: CPU: 1 PID: 0 at arch/x86/kernel/cpu/mcheck/mce.c:428 mce_rdmsrl+0xd9/0xe0
  [    0.016000] mce: Unable to read msr -1073733631!
				       ^^^^^^^^^^^

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1467968983-4874-5-git-send-email-bp@alien8.de
[ Fixed capitalization of 'MSR'. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 92e5e37d97bf..58af6300992d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -425,7 +425,7 @@ static u64 mce_rdmsrl(u32 msr)
 	}
 
 	if (rdmsrl_safe(msr, &v)) {
-		WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr);
+		WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr);
 		/*
 		 * Return zero in case the access faulted. This should
 		 * not happen normally but can happen if the CPU does
-- 
cgit v1.2.3


From ef16dd0c2a523d2e3975bb1bea9f5727e3e7146f Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Tue, 5 Jul 2016 00:31:25 +0200
Subject: x86/dumpstack: Honor supplied @regs arg

The comment suggests that show_stack(NULL, NULL) should backtrace the
current context, but the code doesn't match the comment. If regs are
given, start the "Stack:" hexdump at regs->sp.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1467671487-10344-2-git-send-email-bp@alien8.de
Link: http://lkml.kernel.org/r/efcd79bf4106d61f1cd258c2caa87f3a0618eeac.1466036668.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/dumpstack_32.c | 4 +++-
 arch/x86/kernel/dumpstack_64.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index fef917e79b9d..948d77da3881 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -96,7 +96,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	int i;
 
 	if (sp == NULL) {
-		if (task)
+		if (regs)
+			sp = (unsigned long *)regs->sp;
+		else if (task)
 			sp = (unsigned long *)task->thread.sp;
 		else
 			sp = (unsigned long *)&sp;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index d558a8a49016..a81e1ef73bf2 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -264,7 +264,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	 * back trace for this cpu:
 	 */
 	if (sp == NULL) {
-		if (task)
+		if (regs)
+			sp = (unsigned long *)regs->sp;
+		else if (task)
 			sp = (unsigned long *)task->thread.sp;
 		else
 			sp = (unsigned long *)&sp;
-- 
cgit v1.2.3


From 81c2949f7fdcf8ff681326669afde24962232670 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Tue, 5 Jul 2016 00:31:27 +0200
Subject: x86/dumpstack: Add show_stack_regs() and use it

Add a helper to dump supplied pt_regs and use it in the MSR exception
handling code to have precise stack traces pointing to the actual
function causing the MSR access exception and not the stack frame of the
exception handler itself.

The new output looks like this:

 unchecked MSR access error: RDMSR from 0xdeadbeef at rIP: 0xffffffff8102ddb6 (early_init_intel+0x16/0x3a0)
  00000000756e6547 ffffffff81c03f68 ffffffff81dd0940 ffffffff81c03f10
  ffffffff81d42e65 0000000001000000 ffffffff81c03f58 ffffffff81d3e5a3
  0000800000000000 ffffffff81800080 ffffffffffffffff 0000000000000000
 Call Trace:
  [<ffffffff81d42e65>] early_cpu_init+0xe7/0x136
  [<ffffffff81d3e5a3>] setup_arch+0xa5/0x9df
  [<ffffffff81d38bb9>] start_kernel+0x9f/0x43a
  [<ffffffff81d38294>] x86_64_start_reservations+0x2f/0x31
  [<ffffffff81d383fe>] x86_64_start_kernel+0x168/0x176

Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1467671487-10344-4-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/kdebug.h |  1 +
 arch/x86/kernel/dumpstack.c   |  5 +++++
 arch/x86/mm/extable.c         | 13 ++++++++-----
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index e5f5dc9787d5..1ef9d581b5d9 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_trace(struct task_struct *t, struct pt_regs *regs,
 		       unsigned long *sp, unsigned long bp);
+extern void show_stack_regs(struct pt_regs *regs);
 extern void __show_regs(struct pt_regs *regs, int all);
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long, struct pt_regs *, int signr);
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index ef8017ca5ba9..d66e5ac823b2 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -197,6 +197,11 @@ void show_stack(struct task_struct *task, unsigned long *sp)
 	show_stack_log_lvl(task, NULL, sp, bp, "");
 }
 
+void show_stack_regs(struct pt_regs *regs)
+{
+	show_stack_log_lvl(current, regs, (unsigned long *)regs->sp, regs->bp, "");
+}
+
 static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 static int die_owner = -1;
 static unsigned int die_nest_count;
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 4bb53b89f3c5..fafc771568c7 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,6 +1,7 @@
 #include <linux/module.h>
 #include <asm/uaccess.h>
 #include <asm/traps.h>
+#include <asm/kdebug.h>
 
 typedef bool (*ex_handler_t)(const struct exception_table_entry *,
 			    struct pt_regs *, int);
@@ -46,8 +47,9 @@ EXPORT_SYMBOL(ex_handler_ext);
 bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
 			     struct pt_regs *regs, int trapnr)
 {
-	WARN_ONCE(1, "unchecked MSR access error: RDMSR from 0x%x\n",
-		  (unsigned int)regs->cx);
+	if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n",
+			 (unsigned int)regs->cx, regs->ip, (void *)regs->ip))
+		show_stack_regs(regs);
 
 	/* Pretend that the read succeeded and returned 0. */
 	regs->ip = ex_fixup_addr(fixup);
@@ -60,9 +62,10 @@ EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
 bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
 			     struct pt_regs *regs, int trapnr)
 {
-	WARN_ONCE(1, "unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x)\n",
-		  (unsigned int)regs->cx,
-		  (unsigned int)regs->dx, (unsigned int)regs->ax);
+	if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n",
+			 (unsigned int)regs->cx, (unsigned int)regs->dx,
+			 (unsigned int)regs->ax,  regs->ip, (void *)regs->ip))
+		show_stack_regs(regs);
 
 	/* Pretend that the write succeeded. */
 	regs->ip = ex_fixup_addr(fixup);
-- 
cgit v1.2.3


From 39380b80d72723282f0ea1d1bbf2294eae45013e Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 8 Jul 2016 11:38:28 +0200
Subject: x86/mm/pat, /dev/mem: Remove superfluous error message

Currently it's possible for broken (or malicious) userspace to flood a
kernel log indefinitely with messages a-la

	Program dmidecode tried to access /dev/mem between f0000->100000

because range_is_allowed() is case of CONFIG_STRICT_DEVMEM being turned on
dumps this information each and every time devmem_is_allowed() fails.

Reportedly userspace that is able to trigger contignuous flow of these
messages exists.

It would be possible to rate limit this message, but that'd have a
questionable value; the administrator wouldn't get information about all
the failing accessess, so then the information would be both superfluous
and incomplete at the same time :)

Returning EPERM (which is what is actually happening) is enough indication
for userspace what has happened; no need to log this particular error as
some sort of special condition.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Link: http://lkml.kernel.org/r/alpine.LNX.2.00.1607081137020.24757@cbobk.fhfr.pm
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/pat.c  | 5 +----
 drivers/char/mem.c | 6 +-----
 2 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index fb0604f11eec..db00e3e2f3dc 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -755,11 +755,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 		return 1;
 
 	while (cursor < to) {
-		if (!devmem_is_allowed(pfn)) {
-			pr_info("x86/PAT: Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
-				current->comm, from, to - 1);
+		if (!devmem_is_allowed(pfn))
 			return 0;
-		}
 		cursor += PAGE_SIZE;
 		pfn++;
 	}
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 71025c2f6bbb..d633974e7f8b 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -66,12 +66,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 	u64 cursor = from;
 
 	while (cursor < to) {
-		if (!devmem_is_allowed(pfn)) {
-			printk(KERN_INFO
-		"Program %s tried to access /dev/mem between %Lx->%Lx.\n",
-				current->comm, from, to);
+		if (!devmem_is_allowed(pfn))
 			return 0;
-		}
 		cursor += PAGE_SIZE;
 		pfn++;
 	}
-- 
cgit v1.2.3


From b059a453b1cf1c8453c2b2ed373d3147d6264ebd Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dsafonov@virtuozzo.com>
Date: Tue, 28 Jun 2016 14:35:38 +0300
Subject: x86/vdso: Add mremap hook to vm_special_mapping

Add possibility for 32-bit user-space applications to move
the vDSO mapping.

Previously, when a user-space app called mremap() for the vDSO
address, in the syscall return path it would land on the previous
address of the vDSOpage, resulting in segmentation violation.

Now it lands fine and returns to userspace with a remapped vDSO.

This will also fix the context.vdso pointer for 64-bit, which does
not affect the user of vDSO after mremap() currently, but this
may change in the future.

As suggested by Andy, return -EINVAL for mremap() that would
split the vDSO image: that operation cannot possibly result in
a working system so reject it.

Renamed and moved the text_mapping structure declaration inside
map_vdso(), as it used only there and now it complements the
vvar_mapping variable.

There is still a problem for remapping the vDSO in glibc
applications: the linker relocates addresses for syscalls
on the vDSO page, so you need to relink with the new
addresses.

Without that the next syscall through glibc may fail:

  Program received signal SIGSEGV, Segmentation fault.
  #0  0xf7fd9b80 in __kernel_vsyscall ()
  #1  0xf7ec8238 in _exit () from /usr/lib32/libc.so.6

Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: 0x7f454c46@gmail.com
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20160628113539.13606-2-dsafonov@virtuozzo.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/vdso/vma.c | 47 ++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/mm_types.h  |  3 +++
 mm/mmap.c                 | 10 ++++++++++
 3 files changed, 55 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index ab220ac9b3b9..3329844e3c43 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -12,6 +12,7 @@
 #include <linux/random.h>
 #include <linux/elf.h>
 #include <linux/cpu.h>
+#include <linux/ptrace.h>
 #include <asm/pvclock.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
@@ -97,10 +98,40 @@ static int vdso_fault(const struct vm_special_mapping *sm,
 	return 0;
 }
 
-static const struct vm_special_mapping text_mapping = {
-	.name = "[vdso]",
-	.fault = vdso_fault,
-};
+static void vdso_fix_landing(const struct vdso_image *image,
+		struct vm_area_struct *new_vma)
+{
+#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+	if (in_ia32_syscall() && image == &vdso_image_32) {
+		struct pt_regs *regs = current_pt_regs();
+		unsigned long vdso_land = image->sym_int80_landing_pad;
+		unsigned long old_land_addr = vdso_land +
+			(unsigned long)current->mm->context.vdso;
+
+		/* Fixing userspace landing - look at do_fast_syscall_32 */
+		if (regs->ip == old_land_addr)
+			regs->ip = new_vma->vm_start + vdso_land;
+	}
+#endif
+}
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		struct vm_area_struct *new_vma)
+{
+	unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+	const struct vdso_image *image = current->mm->context.vdso_image;
+
+	if (image->size != new_size)
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
+		return -EFAULT;
+
+	vdso_fix_landing(image, new_vma);
+	current->mm->context.vdso = (void __user *)new_vma->vm_start;
+
+	return 0;
+}
 
 static int vvar_fault(const struct vm_special_mapping *sm,
 		      struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -151,6 +182,12 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 	struct vm_area_struct *vma;
 	unsigned long addr, text_start;
 	int ret = 0;
+
+	static const struct vm_special_mapping vdso_mapping = {
+		.name = "[vdso]",
+		.fault = vdso_fault,
+		.mremap = vdso_mremap,
+	};
 	static const struct vm_special_mapping vvar_mapping = {
 		.name = "[vvar]",
 		.fault = vvar_fault,
@@ -185,7 +222,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 				       image->size,
 				       VM_READ|VM_EXEC|
 				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				       &text_mapping);
+				       &vdso_mapping);
 
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ca3e517980a0..917f2b6a0cde 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -594,6 +594,9 @@ struct vm_special_mapping {
 	int (*fault)(const struct vm_special_mapping *sm,
 		     struct vm_area_struct *vma,
 		     struct vm_fault *vmf);
+
+	int (*mremap)(const struct vm_special_mapping *sm,
+		     struct vm_area_struct *new_vma);
 };
 
 enum tlb_flush_reason {
diff --git a/mm/mmap.c b/mm/mmap.c
index de2c1769cc68..234edffec1d0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2943,9 +2943,19 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
 	return ((struct vm_special_mapping *)vma->vm_private_data)->name;
 }
 
+static int special_mapping_mremap(struct vm_area_struct *new_vma)
+{
+	struct vm_special_mapping *sm = new_vma->vm_private_data;
+
+	if (sm->mremap)
+		return sm->mremap(sm, new_vma);
+	return 0;
+}
+
 static const struct vm_operations_struct special_mapping_vmops = {
 	.close = special_mapping_close,
 	.fault = special_mapping_fault,
+	.mremap = special_mapping_mremap,
 	.name = special_mapping_name,
 };
 
-- 
cgit v1.2.3


From 6daa2ec0b3e3808c55329d12de3c157cf38b17b0 Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe@redhat.com>
Date: Fri, 1 Jul 2016 15:34:40 +0800
Subject: x86/KASLR: Fix boot crash with certain memory configurations

Ye Xiaolong reported this boot crash:

|
|  XZ-compressed data is corrupt
|
|   -- System halted
|

Fix the bug in mem_avoid_overlap() of finding the earliest overlap.

Reported-and-tested-by: Ye Xiaolong <xiaolong.ye@intel.com>
Signed-off-by: Baoquan He <bhe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/compressed/kaslr.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 749c9e00c674..010ea16e5f77 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -285,6 +285,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,
 		if (mem_overlaps(img, &mem_avoid[i]) &&
 		    mem_avoid[i].start < earliest) {
 			*overlap = mem_avoid[i];
+			earliest = overlap->start;
 			is_overlapping = true;
 		}
 	}
@@ -299,6 +300,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,
 
 		if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
 			*overlap = avoid;
+			earliest = overlap->start;
 			is_overlapping = true;
 		}
 
-- 
cgit v1.2.3


From 9a7e7b571826c4399aa639af4a670642d96d935c Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 8 Jul 2016 16:01:48 +0200
Subject: x86/asm/entry: Make thunk's restore a local label

No need to have it appear in objdump output.

No functionality change.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20160708141016.GH3808@pd.tnic
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/thunk_64.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index 027aec4a74df..627ecbcb2e62 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -33,7 +33,7 @@
 	.endif
 
 	call \func
-	jmp  restore
+	jmp  .L_restore
 	_ASM_NOKPROBE(\name)
 	.endm
 
@@ -54,7 +54,7 @@
 #if defined(CONFIG_TRACE_IRQFLAGS) \
  || defined(CONFIG_DEBUG_LOCK_ALLOC) \
  || defined(CONFIG_PREEMPT)
-restore:
+.L_restore:
 	popq %r11
 	popq %r10
 	popq %r9
@@ -66,5 +66,5 @@ restore:
 	popq %rdi
 	popq %rbp
 	ret
-	_ASM_NOKPROBE(restore)
+	_ASM_NOKPROBE(.L_restore)
 #endif
-- 
cgit v1.2.3


From d899a7d146a2ed8a7e6c2f61bcd232908bcbaabc Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Tue, 21 Jun 2016 17:46:58 -0700
Subject: x86/mm: Refactor KASLR entropy functions

Move the KASLR entropy functions into arch/x86/lib to be used in early
kernel boot for KASLR memory randomization.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
Cc: Alexander Popov <alpopov@ptsecurity.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lv Zheng <lv.zheng@intel.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: kernel-hardening@lists.openwall.com
Cc: linux-doc@vger.kernel.org
Link: http://lkml.kernel.org/r/1466556426-32664-2-git-send-email-keescook@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/compressed/kaslr.c | 76 +++------------------------------
 arch/x86/include/asm/kaslr.h     |  6 +++
 arch/x86/lib/Makefile            |  1 +
 arch/x86/lib/kaslr.c             | 90 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 102 insertions(+), 71 deletions(-)
 create mode 100644 arch/x86/include/asm/kaslr.h
 create mode 100644 arch/x86/lib/kaslr.c

(limited to 'arch')

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 010ea16e5f77..a66854d99ee1 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -12,10 +12,6 @@
 #include "misc.h"
 #include "error.h"
 
-#include <asm/msr.h>
-#include <asm/archrandom.h>
-#include <asm/e820.h>
-
 #include <generated/compile.h>
 #include <linux/module.h>
 #include <linux/uts.h>
@@ -26,26 +22,6 @@
 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
 		LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
 
-#define I8254_PORT_CONTROL	0x43
-#define I8254_PORT_COUNTER0	0x40
-#define I8254_CMD_READBACK	0xC0
-#define I8254_SELECT_COUNTER0	0x02
-#define I8254_STATUS_NOTREADY	0x40
-static inline u16 i8254(void)
-{
-	u16 status, timer;
-
-	do {
-		outb(I8254_PORT_CONTROL,
-		     I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
-		status = inb(I8254_PORT_COUNTER0);
-		timer  = inb(I8254_PORT_COUNTER0);
-		timer |= inb(I8254_PORT_COUNTER0) << 8;
-	} while (status & I8254_STATUS_NOTREADY);
-
-	return timer;
-}
-
 static unsigned long rotate_xor(unsigned long hash, const void *area,
 				size_t size)
 {
@@ -62,7 +38,7 @@ static unsigned long rotate_xor(unsigned long hash, const void *area,
 }
 
 /* Attempt to create a simple but unpredictable starting entropy. */
-static unsigned long get_random_boot(void)
+static unsigned long get_boot_seed(void)
 {
 	unsigned long hash = 0;
 
@@ -72,50 +48,8 @@ static unsigned long get_random_boot(void)
 	return hash;
 }
 
-static unsigned long get_random_long(const char *purpose)
-{
-#ifdef CONFIG_X86_64
-	const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
-#else
-	const unsigned long mix_const = 0x3f39e593UL;
-#endif
-	unsigned long raw, random = get_random_boot();
-	bool use_i8254 = true;
-
-	debug_putstr(purpose);
-	debug_putstr(" KASLR using");
-
-	if (has_cpuflag(X86_FEATURE_RDRAND)) {
-		debug_putstr(" RDRAND");
-		if (rdrand_long(&raw)) {
-			random ^= raw;
-			use_i8254 = false;
-		}
-	}
-
-	if (has_cpuflag(X86_FEATURE_TSC)) {
-		debug_putstr(" RDTSC");
-		raw = rdtsc();
-
-		random ^= raw;
-		use_i8254 = false;
-	}
-
-	if (use_i8254) {
-		debug_putstr(" i8254");
-		random ^= i8254();
-	}
-
-	/* Circular multiply for better bit diffusion */
-	asm("mul %3"
-	    : "=a" (random), "=d" (raw)
-	    : "a" (random), "rm" (mix_const));
-	random += raw;
-
-	debug_putstr("...\n");
-
-	return random;
-}
+#define KASLR_COMPRESSED_BOOT
+#include "../../lib/kaslr.c"
 
 struct mem_vector {
 	unsigned long start;
@@ -349,7 +283,7 @@ static unsigned long slots_fetch_random(void)
 	if (slot_max == 0)
 		return 0;
 
-	slot = get_random_long("Physical") % slot_max;
+	slot = kaslr_get_random_long("Physical") % slot_max;
 
 	for (i = 0; i < slot_area_index; i++) {
 		if (slot >= slot_areas[i].num) {
@@ -479,7 +413,7 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
 	slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
 		 CONFIG_PHYSICAL_ALIGN + 1;
 
-	random_addr = get_random_long("Virtual") % slots;
+	random_addr = kaslr_get_random_long("Virtual") % slots;
 
 	return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
 }
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
new file mode 100644
index 000000000000..5547438db5ea
--- /dev/null
+++ b/arch/x86/include/asm/kaslr.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_KASLR_H_
+#define _ASM_KASLR_H_
+
+unsigned long kaslr_get_random_long(const char *purpose);
+
+#endif
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 72a576752a7e..cfa6d076f4f2 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -24,6 +24,7 @@ lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
 lib-y += memcpy_$(BITS).o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
+lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
 
 obj-y += msr.o msr-reg.o msr-reg-export.o
 
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
new file mode 100644
index 000000000000..f7dfeda83e5c
--- /dev/null
+++ b/arch/x86/lib/kaslr.c
@@ -0,0 +1,90 @@
+/*
+ * Entropy functions used on early boot for KASLR base and memory
+ * randomization. The base randomization is done in the compressed
+ * kernel and memory randomization is done early when the regular
+ * kernel starts. This file is included in the compressed kernel and
+ * normally linked in the regular.
+ */
+#include <asm/kaslr.h>
+#include <asm/msr.h>
+#include <asm/archrandom.h>
+#include <asm/e820.h>
+#include <asm/io.h>
+
+/*
+ * When built for the regular kernel, several functions need to be stubbed out
+ * or changed to their regular kernel equivalent.
+ */
+#ifndef KASLR_COMPRESSED_BOOT
+#include <asm/cpufeature.h>
+#include <asm/setup.h>
+
+#define debug_putstr(v) early_printk(v)
+#define has_cpuflag(f) boot_cpu_has(f)
+#define get_boot_seed() kaslr_offset()
+#endif
+
+#define I8254_PORT_CONTROL	0x43
+#define I8254_PORT_COUNTER0	0x40
+#define I8254_CMD_READBACK	0xC0
+#define I8254_SELECT_COUNTER0	0x02
+#define I8254_STATUS_NOTREADY	0x40
+static inline u16 i8254(void)
+{
+	u16 status, timer;
+
+	do {
+		outb(I8254_PORT_CONTROL,
+		     I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
+		status = inb(I8254_PORT_COUNTER0);
+		timer  = inb(I8254_PORT_COUNTER0);
+		timer |= inb(I8254_PORT_COUNTER0) << 8;
+	} while (status & I8254_STATUS_NOTREADY);
+
+	return timer;
+}
+
+unsigned long kaslr_get_random_long(const char *purpose)
+{
+#ifdef CONFIG_X86_64
+	const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
+#else
+	const unsigned long mix_const = 0x3f39e593UL;
+#endif
+	unsigned long raw, random = get_boot_seed();
+	bool use_i8254 = true;
+
+	debug_putstr(purpose);
+	debug_putstr(" KASLR using");
+
+	if (has_cpuflag(X86_FEATURE_RDRAND)) {
+		debug_putstr(" RDRAND");
+		if (rdrand_long(&raw)) {
+			random ^= raw;
+			use_i8254 = false;
+		}
+	}
+
+	if (has_cpuflag(X86_FEATURE_TSC)) {
+		debug_putstr(" RDTSC");
+		raw = rdtsc();
+
+		random ^= raw;
+		use_i8254 = false;
+	}
+
+	if (use_i8254) {
+		debug_putstr(" i8254");
+		random ^= i8254();
+	}
+
+	/* Circular multiply for better bit diffusion */
+	asm("mul %3"
+	    : "=a" (random), "=d" (raw)
+	    : "a" (random), "rm" (mix_const));
+	random += raw;
+
+	debug_putstr("...\n");
+
+	return random;
+}
-- 
cgit v1.2.3


From 59b3d0206d74a700069e49160e8194b2ca93b703 Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Tue, 21 Jun 2016 17:46:59 -0700
Subject: x86/mm: Update physical mapping variable names

Change the variable names in kernel_physical_mapping_init() and related
functions to correctly reflect physical and virtual memory addresses.
Also add comments on each function to describe usage and alignment
constraints.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
Cc: Alexander Popov <alpopov@ptsecurity.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lv Zheng <lv.zheng@intel.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: kernel-hardening@lists.openwall.com
Cc: linux-doc@vger.kernel.org
Link: http://lkml.kernel.org/r/1466556426-32664-3-git-send-email-keescook@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/init_64.c | 162 ++++++++++++++++++++++++++++++--------------------
 1 file changed, 96 insertions(+), 66 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bce2e5d9edd4..6714712bd5da 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -328,22 +328,30 @@ void __init cleanup_highmap(void)
 	}
 }
 
+/*
+ * Create PTE level page table mapping for physical addresses.
+ * It returns the last physical address mapped.
+ */
 static unsigned long __meminit
-phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
+phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
 	      pgprot_t prot)
 {
-	unsigned long pages = 0, next;
-	unsigned long last_map_addr = end;
+	unsigned long pages = 0, paddr_next;
+	unsigned long paddr_last = paddr_end;
+	pte_t *pte;
 	int i;
 
-	pte_t *pte = pte_page + pte_index(addr);
+	pte = pte_page + pte_index(paddr);
+	i = pte_index(paddr);
 
-	for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) {
-		next = (addr & PAGE_MASK) + PAGE_SIZE;
-		if (addr >= end) {
+	for (; i < PTRS_PER_PTE; i++, paddr = paddr_next, pte++) {
+		paddr_next = (paddr & PAGE_MASK) + PAGE_SIZE;
+		if (paddr >= paddr_end) {
 			if (!after_bootmem &&
-			    !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) &&
-			    !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN))
+			    !e820_any_mapped(paddr & PAGE_MASK, paddr_next,
+					     E820_RAM) &&
+			    !e820_any_mapped(paddr & PAGE_MASK, paddr_next,
+					     E820_RESERVED_KERN))
 				set_pte(pte, __pte(0));
 			continue;
 		}
@@ -361,37 +369,44 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
 		}
 
 		if (0)
-			printk("   pte=%p addr=%lx pte=%016lx\n",
-			       pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
+			pr_info("   pte=%p addr=%lx pte=%016lx\n", pte, paddr,
+				pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL).pte);
 		pages++;
-		set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot));
-		last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
+		set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
+		paddr_last = (paddr & PAGE_MASK) + PAGE_SIZE;
 	}
 
 	update_page_count(PG_LEVEL_4K, pages);
 
-	return last_map_addr;
+	return paddr_last;
 }
 
+/*
+ * Create PMD level page table mapping for physical addresses. The virtual
+ * and physical address have to be aligned at this level.
+ * It returns the last physical address mapped.
+ */
 static unsigned long __meminit
-phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
+phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
 	      unsigned long page_size_mask, pgprot_t prot)
 {
-	unsigned long pages = 0, next;
-	unsigned long last_map_addr = end;
+	unsigned long pages = 0, paddr_next;
+	unsigned long paddr_last = paddr_end;
 
-	int i = pmd_index(address);
+	int i = pmd_index(paddr);
 
-	for (; i < PTRS_PER_PMD; i++, address = next) {
-		pmd_t *pmd = pmd_page + pmd_index(address);
+	for (; i < PTRS_PER_PMD; i++, paddr = paddr_next) {
+		pmd_t *pmd = pmd_page + pmd_index(paddr);
 		pte_t *pte;
 		pgprot_t new_prot = prot;
 
-		next = (address & PMD_MASK) + PMD_SIZE;
-		if (address >= end) {
+		paddr_next = (paddr & PMD_MASK) + PMD_SIZE;
+		if (paddr >= paddr_end) {
 			if (!after_bootmem &&
-			    !e820_any_mapped(address & PMD_MASK, next, E820_RAM) &&
-			    !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN))
+			    !e820_any_mapped(paddr & PMD_MASK, paddr_next,
+					     E820_RAM) &&
+			    !e820_any_mapped(paddr & PMD_MASK, paddr_next,
+					     E820_RESERVED_KERN))
 				set_pmd(pmd, __pmd(0));
 			continue;
 		}
@@ -400,8 +415,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
 			if (!pmd_large(*pmd)) {
 				spin_lock(&init_mm.page_table_lock);
 				pte = (pte_t *)pmd_page_vaddr(*pmd);
-				last_map_addr = phys_pte_init(pte, address,
-								end, prot);
+				paddr_last = phys_pte_init(pte, paddr,
+							   paddr_end, prot);
 				spin_unlock(&init_mm.page_table_lock);
 				continue;
 			}
@@ -420,7 +435,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
 			if (page_size_mask & (1 << PG_LEVEL_2M)) {
 				if (!after_bootmem)
 					pages++;
-				last_map_addr = next;
+				paddr_last = paddr_next;
 				continue;
 			}
 			new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
@@ -430,42 +445,49 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
 			pages++;
 			spin_lock(&init_mm.page_table_lock);
 			set_pte((pte_t *)pmd,
-				pfn_pte((address & PMD_MASK) >> PAGE_SHIFT,
+				pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
 					__pgprot(pgprot_val(prot) | _PAGE_PSE)));
 			spin_unlock(&init_mm.page_table_lock);
-			last_map_addr = next;
+			paddr_last = paddr_next;
 			continue;
 		}
 
 		pte = alloc_low_page();
-		last_map_addr = phys_pte_init(pte, address, end, new_prot);
+		paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot);
 
 		spin_lock(&init_mm.page_table_lock);
 		pmd_populate_kernel(&init_mm, pmd, pte);
 		spin_unlock(&init_mm.page_table_lock);
 	}
 	update_page_count(PG_LEVEL_2M, pages);
-	return last_map_addr;
+	return paddr_last;
 }
 
+/*
+ * Create PUD level page table mapping for physical addresses. The virtual
+ * and physical address have to be aligned at this level.
+ * It returns the last physical address mapped.
+ */
 static unsigned long __meminit
-phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
-			 unsigned long page_size_mask)
+phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
+	      unsigned long page_size_mask)
 {
-	unsigned long pages = 0, next;
-	unsigned long last_map_addr = end;
-	int i = pud_index(addr);
+	unsigned long pages = 0, paddr_next;
+	unsigned long paddr_last = paddr_end;
+	int i = pud_index(paddr);
 
-	for (; i < PTRS_PER_PUD; i++, addr = next) {
-		pud_t *pud = pud_page + pud_index(addr);
+	for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) {
+		pud_t *pud = pud_page + pud_index(paddr);
 		pmd_t *pmd;
 		pgprot_t prot = PAGE_KERNEL;
 
-		next = (addr & PUD_MASK) + PUD_SIZE;
-		if (addr >= end) {
+		paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
+		if (paddr >= paddr_end) {
 			if (!after_bootmem &&
-			    !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) &&
-			    !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN))
+			    !e820_any_mapped(paddr & PUD_MASK, paddr_next,
+					     E820_RAM) &&
+			    !e820_any_mapped(paddr & PUD_MASK, paddr_next,
+					     E820_RESERVED_KERN))
 				set_pud(pud, __pud(0));
 			continue;
 		}
@@ -473,8 +495,10 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 		if (pud_val(*pud)) {
 			if (!pud_large(*pud)) {
 				pmd = pmd_offset(pud, 0);
-				last_map_addr = phys_pmd_init(pmd, addr, end,
-							 page_size_mask, prot);
+				paddr_last = phys_pmd_init(pmd, paddr,
+							   paddr_end,
+							   page_size_mask,
+							   prot);
 				__flush_tlb_all();
 				continue;
 			}
@@ -493,7 +517,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 			if (page_size_mask & (1 << PG_LEVEL_1G)) {
 				if (!after_bootmem)
 					pages++;
-				last_map_addr = next;
+				paddr_last = paddr_next;
 				continue;
 			}
 			prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
@@ -503,16 +527,16 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 			pages++;
 			spin_lock(&init_mm.page_table_lock);
 			set_pte((pte_t *)pud,
-				pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT,
+				pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
 					PAGE_KERNEL_LARGE));
 			spin_unlock(&init_mm.page_table_lock);
-			last_map_addr = next;
+			paddr_last = paddr_next;
 			continue;
 		}
 
 		pmd = alloc_low_page();
-		last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
-					      prot);
+		paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
+					   page_size_mask, prot);
 
 		spin_lock(&init_mm.page_table_lock);
 		pud_populate(&init_mm, pud, pmd);
@@ -522,38 +546,44 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 
 	update_page_count(PG_LEVEL_1G, pages);
 
-	return last_map_addr;
+	return paddr_last;
 }
 
+/*
+ * Create page table mapping for the physical memory for specific physical
+ * addresses. The virtual and physical addresses have to be aligned on PUD level
+ * down. It returns the last physical address mapped.
+ */
 unsigned long __meminit
-kernel_physical_mapping_init(unsigned long start,
-			     unsigned long end,
+kernel_physical_mapping_init(unsigned long paddr_start,
+			     unsigned long paddr_end,
 			     unsigned long page_size_mask)
 {
 	bool pgd_changed = false;
-	unsigned long next, last_map_addr = end;
-	unsigned long addr;
+	unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
 
-	start = (unsigned long)__va(start);
-	end = (unsigned long)__va(end);
-	addr = start;
+	paddr_last = paddr_end;
+	vaddr = (unsigned long)__va(paddr_start);
+	vaddr_end = (unsigned long)__va(paddr_end);
+	vaddr_start = vaddr;
 
-	for (; start < end; start = next) {
-		pgd_t *pgd = pgd_offset_k(start);
+	for (; vaddr < vaddr_end; vaddr = vaddr_next) {
+		pgd_t *pgd = pgd_offset_k(vaddr);
 		pud_t *pud;
 
-		next = (start & PGDIR_MASK) + PGDIR_SIZE;
+		vaddr_next = (vaddr & PGDIR_MASK) + PGDIR_SIZE;
 
 		if (pgd_val(*pgd)) {
 			pud = (pud_t *)pgd_page_vaddr(*pgd);
-			last_map_addr = phys_pud_init(pud, __pa(start),
-						 __pa(end), page_size_mask);
+			paddr_last = phys_pud_init(pud, __pa(vaddr),
+						   __pa(vaddr_end),
+						   page_size_mask);
 			continue;
 		}
 
 		pud = alloc_low_page();
-		last_map_addr = phys_pud_init(pud, __pa(start), __pa(end),
-						 page_size_mask);
+		paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end),
+					   page_size_mask);
 
 		spin_lock(&init_mm.page_table_lock);
 		pgd_populate(&init_mm, pgd, pud);
@@ -562,11 +592,11 @@ kernel_physical_mapping_init(unsigned long start,
 	}
 
 	if (pgd_changed)
-		sync_global_pgds(addr, end - 1, 0);
+		sync_global_pgds(vaddr_start, vaddr_end - 1, 0);
 
 	__flush_tlb_all();
 
-	return last_map_addr;
+	return paddr_last;
 }
 
 #ifndef CONFIG_NUMA
-- 
cgit v1.2.3


From faa379332f3cb3375db1849e27386f8bc9b97da4 Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Tue, 21 Jun 2016 17:47:00 -0700
Subject: x86/mm: Add PUD VA support for physical mapping

Minor change that allows early boot physical mapping of PUD level virtual
addresses. The current implementation expects the virtual address to be
PUD aligned. For KASLR memory randomization, we need to be able to
randomize the offset used on the PUD table.

It has no impact on current usage.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
Cc: Alexander Popov <alpopov@ptsecurity.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lv Zheng <lv.zheng@intel.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: kernel-hardening@lists.openwall.com
Cc: linux-doc@vger.kernel.org
Link: http://lkml.kernel.org/r/1466556426-32664-4-git-send-email-keescook@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/init_64.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 6714712bd5da..7bf1ddb54537 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -465,7 +465,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
 
 /*
  * Create PUD level page table mapping for physical addresses. The virtual
- * and physical address have to be aligned at this level.
+ * and physical address do not have to be aligned at this level. KASLR can
+ * randomize virtual addresses up to this level.
  * It returns the last physical address mapped.
  */
 static unsigned long __meminit
@@ -474,14 +475,18 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
 {
 	unsigned long pages = 0, paddr_next;
 	unsigned long paddr_last = paddr_end;
-	int i = pud_index(paddr);
+	unsigned long vaddr = (unsigned long)__va(paddr);
+	int i = pud_index(vaddr);
 
 	for (; i < PTRS_PER_PUD; i++, paddr = paddr_next) {
-		pud_t *pud = pud_page + pud_index(paddr);
+		pud_t *pud;
 		pmd_t *pmd;
 		pgprot_t prot = PAGE_KERNEL;
 
+		vaddr = (unsigned long)__va(paddr);
+		pud = pud_page + pud_index(vaddr);
 		paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
+
 		if (paddr >= paddr_end) {
 			if (!after_bootmem &&
 			    !e820_any_mapped(paddr & PUD_MASK, paddr_next,
@@ -551,7 +556,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
 
 /*
  * Create page table mapping for the physical memory for specific physical
- * addresses. The virtual and physical addresses have to be aligned on PUD level
+ * addresses. The virtual and physical addresses have to be aligned on PMD level
  * down. It returns the last physical address mapped.
  */
 unsigned long __meminit
-- 
cgit v1.2.3


From b234e8a09003af108d3573f0369e25c080676b14 Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Tue, 21 Jun 2016 17:47:01 -0700
Subject: x86/mm: Separate variable for trampoline PGD

Use a separate global variable to define the trampoline PGD used to
start other processors. This change will allow KALSR memory
randomization to change the trampoline PGD to be correctly aligned with
physical memory.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
Cc: Alexander Popov <alpopov@ptsecurity.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lv Zheng <lv.zheng@intel.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: kernel-hardening@lists.openwall.com
Cc: linux-doc@vger.kernel.org
Link: http://lkml.kernel.org/r/1466556426-32664-5-git-send-email-keescook@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/pgtable.h | 12 ++++++++++++
 arch/x86/mm/init.c             |  3 +++
 arch/x86/realmode/init.c       |  5 ++++-
 3 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1a27396b6ea0..d455bef39e9c 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -729,6 +729,18 @@ extern int direct_gbpages;
 void init_mem_mapping(void);
 void early_alloc_pgt_buf(void);
 
+#ifdef CONFIG_X86_64
+/* Realmode trampoline initialization. */
+extern pgd_t trampoline_pgd_entry;
+static inline void __meminit init_trampoline(void)
+{
+	/* Default trampoline pgd value */
+	trampoline_pgd_entry = init_level4_pgt[pgd_index(__PAGE_OFFSET)];
+}
+#else
+static inline void init_trampoline(void) { }
+#endif
+
 /* local pte updates need not use xchg for locking */
 static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
 {
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 372aad2b3291..4252acdfcbbd 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -590,6 +590,9 @@ void __init init_mem_mapping(void)
 	/* the ISA range is always mapped regardless of memory holes */
 	init_memory_mapping(0, ISA_END_ADDRESS);
 
+	/* Init the trampoline, possibly with KASLR memory offset */
+	init_trampoline();
+
 	/*
 	 * If the allocation is in bottom-up direction, we setup direct mapping
 	 * in bottom-up, otherwise we setup direct mapping in top-down.
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 0b7a63d98440..705e3fffb4a1 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -8,6 +8,9 @@
 struct real_mode_header *real_mode_header;
 u32 *trampoline_cr4_features;
 
+/* Hold the pgd entry used on booting additional CPUs */
+pgd_t trampoline_pgd_entry;
+
 void __init reserve_real_mode(void)
 {
 	phys_addr_t mem;
@@ -84,7 +87,7 @@ void __init setup_real_mode(void)
 	*trampoline_cr4_features = __read_cr4();
 
 	trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
-	trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
+	trampoline_pgd[0] = trampoline_pgd_entry.pgd;
 	trampoline_pgd[511] = init_level4_pgt[511].pgd;
 #endif
 }
-- 
cgit v1.2.3


From 0483e1fa6e09d4948272680f691dccb1edb9677f Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Tue, 21 Jun 2016 17:47:02 -0700
Subject: x86/mm: Implement ASLR for kernel memory regions

Randomizes the virtual address space of kernel memory regions for
x86_64. This first patch adds the infrastructure and does not randomize
any region. The following patches will randomize the physical memory
mapping, vmalloc and vmemmap regions.

This security feature mitigates exploits relying on predictable kernel
addresses. These addresses can be used to disclose the kernel modules
base addresses or corrupt specific structures to elevate privileges
bypassing the current implementation of KASLR. This feature can be
enabled with the CONFIG_RANDOMIZE_MEMORY option.

The order of each memory region is not changed. The feature looks at the
available space for the regions based on different configuration options
and randomizes the base and space between each. The size of the physical
memory mapping is the available physical memory. No performance impact
was detected while testing the feature.

Entropy is generated using the KASLR early boot functions now shared in
the lib directory (originally written by Kees Cook). Randomization is
done on PGD & PUD page table levels to increase possible addresses. The
physical memory mapping code was adapted to support PUD level virtual
addresses. This implementation on the best configuration provides 30,000
possible virtual addresses in average for each memory region.  An
additional low memory page is used to ensure each CPU can start with a
PGD aligned virtual address (for realmode).

x86/dump_pagetable was updated to correctly display each region.

Updated documentation on x86_64 memory layout accordingly.

Performance data, after all patches in the series:

Kernbench shows almost no difference (-+ less than 1%):

Before:

Average Optimal load -j 12 Run (std deviation): Elapsed Time 102.63 (1.2695)
User Time 1034.89 (1.18115) System Time 87.056 (0.456416) Percent CPU 1092.9
(13.892) Context Switches 199805 (3455.33) Sleeps 97907.8 (900.636)

After:

Average Optimal load -j 12 Run (std deviation): Elapsed Time 102.489 (1.10636)
User Time 1034.86 (1.36053) System Time 87.764 (0.49345) Percent CPU 1095
(12.7715) Context Switches 199036 (4298.1) Sleeps 97681.6 (1031.11)

Hackbench shows 0% difference on average (hackbench 90 repeated 10 times):

attemp,before,after 1,0.076,0.069 2,0.072,0.069 3,0.066,0.066 4,0.066,0.068
5,0.066,0.067 6,0.066,0.069 7,0.067,0.066 8,0.063,0.067 9,0.067,0.065
10,0.068,0.071 average,0.0677,0.0677

Signed-off-by: Thomas Garnier <thgarnie@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
Cc: Alexander Popov <alpopov@ptsecurity.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lv Zheng <lv.zheng@intel.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: kernel-hardening@lists.openwall.com
Cc: linux-doc@vger.kernel.org
Link: http://lkml.kernel.org/r/1466556426-32664-6-git-send-email-keescook@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/x86/x86_64/mm.txt |   4 ++
 arch/x86/Kconfig                |  17 +++++
 arch/x86/include/asm/kaslr.h    |   6 ++
 arch/x86/include/asm/pgtable.h  |   7 +-
 arch/x86/kernel/setup.c         |   3 +
 arch/x86/mm/Makefile            |   1 +
 arch/x86/mm/dump_pagetables.c   |  16 +++--
 arch/x86/mm/init.c              |   1 +
 arch/x86/mm/kaslr.c             | 152 ++++++++++++++++++++++++++++++++++++++++
 9 files changed, 202 insertions(+), 5 deletions(-)
 create mode 100644 arch/x86/mm/kaslr.c

(limited to 'arch')

diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 5aa738346062..8c7dd5957ae1 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -39,4 +39,8 @@ memory window (this size is arbitrary, it can be raised later if needed).
 The mappings are not part of any other kernel PGD and are only available
 during EFI runtime calls.
 
+Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
+physical memory, vmalloc/ioremap space and virtual memory map are randomized.
+Their order is preserved but their base will be offset early at boot time.
+
 -Andi Kleen, Jul 2004
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 930fe88095d3..9719b8eb38d3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1993,6 +1993,23 @@ config PHYSICAL_ALIGN
 
 	  Don't change this unless you know what you are doing.
 
+config RANDOMIZE_MEMORY
+	bool "Randomize the kernel memory sections"
+	depends on X86_64
+	depends on RANDOMIZE_BASE
+	default RANDOMIZE_BASE
+	---help---
+	   Randomizes the base virtual address of kernel memory sections
+	   (physical memory mapping, vmalloc & vmemmap). This security feature
+	   makes exploits relying on predictable memory locations less reliable.
+
+	   The order of allocations remains unchanged. Entropy is generated in
+	   the same way as RANDOMIZE_BASE. Current implementation in the optimal
+	   configuration have in average 30,000 different possible virtual
+	   addresses for each memory section.
+
+	   If unsure, say N.
+
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs"
 	depends on SMP
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index 5547438db5ea..683c9d736314 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -3,4 +3,10 @@
 
 unsigned long kaslr_get_random_long(const char *purpose);
 
+#ifdef CONFIG_RANDOMIZE_MEMORY
+void kernel_randomize_memory(void);
+#else
+static inline void kernel_randomize_memory(void) { }
+#endif /* CONFIG_RANDOMIZE_MEMORY */
+
 #endif
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index d455bef39e9c..5472682a307f 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -732,11 +732,16 @@ void early_alloc_pgt_buf(void);
 #ifdef CONFIG_X86_64
 /* Realmode trampoline initialization. */
 extern pgd_t trampoline_pgd_entry;
-static inline void __meminit init_trampoline(void)
+static inline void __meminit init_trampoline_default(void)
 {
 	/* Default trampoline pgd value */
 	trampoline_pgd_entry = init_level4_pgt[pgd_index(__PAGE_OFFSET)];
 }
+# ifdef CONFIG_RANDOMIZE_MEMORY
+void __meminit init_trampoline(void);
+# else
+#  define init_trampoline init_trampoline_default
+# endif
 #else
 static inline void init_trampoline(void) { }
 #endif
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c4e7b3991b60..a2616584b6e9 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -113,6 +113,7 @@
 #include <asm/prom.h>
 #include <asm/microcode.h>
 #include <asm/mmu_context.h>
+#include <asm/kaslr.h>
 
 /*
  * max_low_pfn_mapped: highest direct mapped pfn under 4GB
@@ -942,6 +943,8 @@ void __init setup_arch(char **cmdline_p)
 
 	x86_init.oem.arch_setup();
 
+	kernel_randomize_memory();
+
 	iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
 	setup_memory_map();
 	parse_setup_data();
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 62c0043a5fd5..96d2b847e09e 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -37,4 +37,5 @@ obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
 
 obj-$(CONFIG_X86_INTEL_MPX)	+= mpx.o
 obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
 
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 99bfb192803f..9a17250bcbe0 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -72,9 +72,9 @@ static struct addr_marker address_markers[] = {
 	{ 0, "User Space" },
 #ifdef CONFIG_X86_64
 	{ 0x8000000000000000UL, "Kernel Space" },
-	{ PAGE_OFFSET,		"Low Kernel Mapping" },
-	{ VMALLOC_START,        "vmalloc() Area" },
-	{ VMEMMAP_START,        "Vmemmap" },
+	{ 0/* PAGE_OFFSET */,   "Low Kernel Mapping" },
+	{ 0/* VMALLOC_START */, "vmalloc() Area" },
+	{ 0/* VMEMMAP_START */, "Vmemmap" },
 # ifdef CONFIG_X86_ESPFIX64
 	{ ESPFIX_BASE_ADDR,	"ESPfix Area", 16 },
 # endif
@@ -434,8 +434,16 @@ void ptdump_walk_pgd_level_checkwx(void)
 
 static int __init pt_dump_init(void)
 {
+	/*
+	 * Various markers are not compile-time constants, so assign them
+	 * here.
+	 */
+#ifdef CONFIG_X86_64
+	address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
+	address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
+	address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
+#endif
 #ifdef CONFIG_X86_32
-	/* Not a compile-time constant on x86-32 */
 	address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
 	address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
 # ifdef CONFIG_HIGHMEM
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 4252acdfcbbd..cc82830bc8c4 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -17,6 +17,7 @@
 #include <asm/proto.h>
 #include <asm/dma.h>		/* for MAX_DMA_PFN */
 #include <asm/microcode.h>
+#include <asm/kaslr.h>
 
 /*
  * We need to define the tracepoints somewhere, and tlb.c
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
new file mode 100644
index 000000000000..d5380a48e8fb
--- /dev/null
+++ b/arch/x86/mm/kaslr.c
@@ -0,0 +1,152 @@
+/*
+ * This file implements KASLR memory randomization for x86_64. It randomizes
+ * the virtual address space of kernel memory regions (physical memory
+ * mapping, vmalloc & vmemmap) for x86_64. This security feature mitigates
+ * exploits relying on predictable kernel addresses.
+ *
+ * Entropy is generated using the KASLR early boot functions now shared in
+ * the lib directory (originally written by Kees Cook). Randomization is
+ * done on PGD & PUD page table levels to increase possible addresses. The
+ * physical memory mapping code was adapted to support PUD level virtual
+ * addresses. This implementation on the best configuration provides 30,000
+ * possible virtual addresses in average for each memory region. An additional
+ * low memory page is used to ensure each CPU can start with a PGD aligned
+ * virtual address (for realmode).
+ *
+ * The order of each memory region is not changed. The feature looks at
+ * the available space for the regions based on different configuration
+ * options and randomizes the base and space between each. The size of the
+ * physical memory mapping is the available physical memory.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/random.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/setup.h>
+#include <asm/kaslr.h>
+
+#include "mm_internal.h"
+
+#define TB_SHIFT 40
+
+/*
+ * Virtual address start and end range for randomization. The end changes base
+ * on configuration to have the highest amount of space for randomization.
+ * It increases the possible random position for each randomized region.
+ *
+ * You need to add an if/def entry if you introduce a new memory region
+ * compatible with KASLR. Your entry must be in logical order with memory
+ * layout. For example, ESPFIX is before EFI because its virtual address is
+ * before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to
+ * ensure that this order is correct and won't be changed.
+ */
+static const unsigned long vaddr_start;
+static const unsigned long vaddr_end;
+
+/*
+ * Memory regions randomized by KASLR (except modules that use a separate logic
+ * earlier during boot). The list is ordered based on virtual addresses. This
+ * order is kept after randomization.
+ */
+static __initdata struct kaslr_memory_region {
+	unsigned long *base;
+	unsigned long size_tb;
+} kaslr_regions[] = {
+};
+
+/* Get size in bytes used by the memory region */
+static inline unsigned long get_padding(struct kaslr_memory_region *region)
+{
+	return (region->size_tb << TB_SHIFT);
+}
+
+/*
+ * Apply no randomization if KASLR was disabled at boot or if KASAN
+ * is enabled. KASAN shadow mappings rely on regions being PGD aligned.
+ */
+static inline bool kaslr_memory_enabled(void)
+{
+	return kaslr_enabled() && !config_enabled(CONFIG_KASAN);
+}
+
+/* Initialize base and padding for each memory region randomized with KASLR */
+void __init kernel_randomize_memory(void)
+{
+	size_t i;
+	unsigned long vaddr = vaddr_start;
+	unsigned long rand;
+	struct rnd_state rand_state;
+	unsigned long remain_entropy;
+
+	if (!kaslr_memory_enabled())
+		return;
+
+	/* Calculate entropy available between regions */
+	remain_entropy = vaddr_end - vaddr_start;
+	for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++)
+		remain_entropy -= get_padding(&kaslr_regions[i]);
+
+	prandom_seed_state(&rand_state, kaslr_get_random_long("Memory"));
+
+	for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) {
+		unsigned long entropy;
+
+		/*
+		 * Select a random virtual address using the extra entropy
+		 * available.
+		 */
+		entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
+		prandom_bytes_state(&rand_state, &rand, sizeof(rand));
+		entropy = (rand % (entropy + 1)) & PUD_MASK;
+		vaddr += entropy;
+		*kaslr_regions[i].base = vaddr;
+
+		/*
+		 * Jump the region and add a minimum padding based on
+		 * randomization alignment.
+		 */
+		vaddr += get_padding(&kaslr_regions[i]);
+		vaddr = round_up(vaddr + 1, PUD_SIZE);
+		remain_entropy -= entropy;
+	}
+}
+
+/*
+ * Create PGD aligned trampoline table to allow real mode initialization
+ * of additional CPUs. Consume only 1 low memory page.
+ */
+void __meminit init_trampoline(void)
+{
+	unsigned long paddr, paddr_next;
+	pgd_t *pgd;
+	pud_t *pud_page, *pud_page_tramp;
+	int i;
+
+	if (!kaslr_memory_enabled()) {
+		init_trampoline_default();
+		return;
+	}
+
+	pud_page_tramp = alloc_low_page();
+
+	paddr = 0;
+	pgd = pgd_offset_k((unsigned long)__va(paddr));
+	pud_page = (pud_t *) pgd_page_vaddr(*pgd);
+
+	for (i = pud_index(paddr); i < PTRS_PER_PUD; i++, paddr = paddr_next) {
+		pud_t *pud, *pud_tramp;
+		unsigned long vaddr = (unsigned long)__va(paddr);
+
+		pud_tramp = pud_page_tramp + pud_index(paddr);
+		pud = pud_page + pud_index(vaddr);
+		paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
+
+		*pud_tramp = *pud;
+	}
+
+	set_pgd(&trampoline_pgd_entry,
+		__pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
+}
-- 
cgit v1.2.3


From 021182e52fe01c1f7b126f97fd6ba048dc4234fd Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Tue, 21 Jun 2016 17:47:03 -0700
Subject: x86/mm: Enable KASLR for physical mapping memory regions

Add the physical mapping in the list of randomized memory regions.

The physical memory mapping holds most allocations from boot and heap
allocators. Knowing the base address and physical memory size, an attacker
can deduce the PDE virtual address for the vDSO memory page. This attack
was demonstrated at CanSecWest 2016, in the following presentation:

  "Getting Physical: Extreme Abuse of Intel Based Paged Systems":
  https://github.com/n3k/CansecWest2016_Getting_Physical_Extreme_Abuse_of_Intel_Based_Paging_Systems/blob/master/Presentation/CanSec2016_Presentation.pdf

(See second part of the presentation).

The exploits used against Linux worked successfully against 4.6+ but
fail with KASLR memory enabled:

  https://github.com/n3k/CansecWest2016_Getting_Physical_Extreme_Abuse_of_Intel_Based_Paging_Systems/tree/master/Demos/Linux/exploits

Similar research was done at Google leading to this patch proposal.

Variants exists to overwrite /proc or /sys objects ACLs leading to
elevation of privileges. These variants were tested against 4.6+.

The page offset used by the compressed kernel retains the static value
since it is not yet randomized during this boot stage.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
Cc: Alexander Popov <alpopov@ptsecurity.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lv Zheng <lv.zheng@intel.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: kernel-hardening@lists.openwall.com
Cc: linux-doc@vger.kernel.org
Link: http://lkml.kernel.org/r/1466556426-32664-7-git-send-email-keescook@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/compressed/pagetable.c |  3 +++
 arch/x86/include/asm/kaslr.h         |  2 ++
 arch/x86/include/asm/page_64_types.h | 11 ++++++++++-
 arch/x86/kernel/head_64.S            |  2 +-
 arch/x86/mm/kaslr.c                  | 18 +++++++++++++++---
 5 files changed, 31 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index 6e31a6aac4d3..56589d0a804b 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -20,6 +20,9 @@
 /* These actually do the work of building the kernel identity maps. */
 #include <asm/init.h>
 #include <asm/pgtable.h>
+/* Use the static base for this part of the boot process */
+#undef __PAGE_OFFSET
+#define __PAGE_OFFSET __PAGE_OFFSET_BASE
 #include "../../mm/ident_map.c"
 
 /* Used by pgtable.h asm code to force instruction serialization. */
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index 683c9d736314..62b1b815a83a 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -4,6 +4,8 @@
 unsigned long kaslr_get_random_long(const char *purpose);
 
 #ifdef CONFIG_RANDOMIZE_MEMORY
+extern unsigned long page_offset_base;
+
 void kernel_randomize_memory(void);
 #else
 static inline void kernel_randomize_memory(void) { }
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index d5c2f8b40faa..9215e0527647 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -1,6 +1,10 @@
 #ifndef _ASM_X86_PAGE_64_DEFS_H
 #define _ASM_X86_PAGE_64_DEFS_H
 
+#ifndef __ASSEMBLY__
+#include <asm/kaslr.h>
+#endif
+
 #ifdef CONFIG_KASAN
 #define KASAN_STACK_ORDER 1
 #else
@@ -32,7 +36,12 @@
  * hypervisor to fit.  Choosing 16 slots here is arbitrary, but it's
  * what Xen requires.
  */
-#define __PAGE_OFFSET           _AC(0xffff880000000000, UL)
+#define __PAGE_OFFSET_BASE      _AC(0xffff880000000000, UL)
+#ifdef CONFIG_RANDOMIZE_MEMORY
+#define __PAGE_OFFSET           page_offset_base
+#else
+#define __PAGE_OFFSET           __PAGE_OFFSET_BASE
+#endif /* CONFIG_RANDOMIZE_MEMORY */
 
 #define __START_KERNEL_map	_AC(0xffffffff80000000, UL)
 
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index c7920ba69563..9f8efc9f0075 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -38,7 +38,7 @@
 
 #define pud_index(x)	(((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 
-L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
+L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
 L4_START_KERNEL = pgd_index(__START_KERNEL_map)
 L3_START_KERNEL = pud_index(__START_KERNEL_map)
 
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index d5380a48e8fb..609ecf2b37ed 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -43,8 +43,12 @@
  * before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to
  * ensure that this order is correct and won't be changed.
  */
-static const unsigned long vaddr_start;
-static const unsigned long vaddr_end;
+static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
+static const unsigned long vaddr_end = VMALLOC_START;
+
+/* Default values */
+unsigned long page_offset_base = __PAGE_OFFSET_BASE;
+EXPORT_SYMBOL(page_offset_base);
 
 /*
  * Memory regions randomized by KASLR (except modules that use a separate logic
@@ -55,6 +59,7 @@ static __initdata struct kaslr_memory_region {
 	unsigned long *base;
 	unsigned long size_tb;
 } kaslr_regions[] = {
+	{ &page_offset_base, 64/* Maximum */ },
 };
 
 /* Get size in bytes used by the memory region */
@@ -77,13 +82,20 @@ void __init kernel_randomize_memory(void)
 {
 	size_t i;
 	unsigned long vaddr = vaddr_start;
-	unsigned long rand;
+	unsigned long rand, memory_tb;
 	struct rnd_state rand_state;
 	unsigned long remain_entropy;
 
 	if (!kaslr_memory_enabled())
 		return;
 
+	BUG_ON(kaslr_regions[0].base != &page_offset_base);
+	memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT);
+
+	/* Adapt phyiscal memory region size based on available memory */
+	if (memory_tb < kaslr_regions[0].size_tb)
+		kaslr_regions[0].size_tb = memory_tb;
+
 	/* Calculate entropy available between regions */
 	remain_entropy = vaddr_end - vaddr_start;
 	for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++)
-- 
cgit v1.2.3


From a95ae27c2ee1cba5f4f6b9dea43ffe88252e79b1 Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Tue, 21 Jun 2016 17:47:04 -0700
Subject: x86/mm: Enable KASLR for vmalloc memory regions

Add vmalloc to the list of randomized memory regions.

The vmalloc memory region contains the allocation made through the vmalloc()
API. The allocations are done sequentially to prevent fragmentation and
each allocation address can easily be deduced especially from boot.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
Cc: Alexander Popov <alpopov@ptsecurity.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lv Zheng <lv.zheng@intel.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: kernel-hardening@lists.openwall.com
Cc: linux-doc@vger.kernel.org
Link: http://lkml.kernel.org/r/1466556426-32664-8-git-send-email-keescook@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/kaslr.h            |  1 +
 arch/x86/include/asm/pgtable_64_types.h | 15 +++++++++++----
 arch/x86/mm/kaslr.c                     |  5 ++++-
 3 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index 62b1b815a83a..2674ee3de748 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -5,6 +5,7 @@ unsigned long kaslr_get_random_long(const char *purpose);
 
 #ifdef CONFIG_RANDOMIZE_MEMORY
 extern unsigned long page_offset_base;
+extern unsigned long vmalloc_base;
 
 void kernel_randomize_memory(void);
 #else
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index e6844dfb4471..6fdef9eef2d5 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -5,6 +5,7 @@
 
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
+#include <asm/kaslr.h>
 
 /*
  * These are used to make use of C type-checking..
@@ -53,10 +54,16 @@ typedef struct { pteval_t pte; } pte_t;
 #define PGDIR_MASK	(~(PGDIR_SIZE - 1))
 
 /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
-#define MAXMEM		 _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
-#define VMALLOC_START    _AC(0xffffc90000000000, UL)
-#define VMALLOC_END      _AC(0xffffe8ffffffffff, UL)
-#define VMEMMAP_START	 _AC(0xffffea0000000000, UL)
+#define MAXMEM		_AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+#define VMALLOC_SIZE_TB	_AC(32, UL)
+#define __VMALLOC_BASE	_AC(0xffffc90000000000, UL)
+#define VMEMMAP_START	_AC(0xffffea0000000000, UL)
+#ifdef CONFIG_RANDOMIZE_MEMORY
+#define VMALLOC_START	vmalloc_base
+#else
+#define VMALLOC_START	__VMALLOC_BASE
+#endif /* CONFIG_RANDOMIZE_MEMORY */
+#define VMALLOC_END	(VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
 #define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
 #define MODULES_END      _AC(0xffffffffff000000, UL)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 609ecf2b37ed..c939cfe1b516 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -44,11 +44,13 @@
  * ensure that this order is correct and won't be changed.
  */
 static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
-static const unsigned long vaddr_end = VMALLOC_START;
+static const unsigned long vaddr_end = VMEMMAP_START;
 
 /* Default values */
 unsigned long page_offset_base = __PAGE_OFFSET_BASE;
 EXPORT_SYMBOL(page_offset_base);
+unsigned long vmalloc_base = __VMALLOC_BASE;
+EXPORT_SYMBOL(vmalloc_base);
 
 /*
  * Memory regions randomized by KASLR (except modules that use a separate logic
@@ -60,6 +62,7 @@ static __initdata struct kaslr_memory_region {
 	unsigned long size_tb;
 } kaslr_regions[] = {
 	{ &page_offset_base, 64/* Maximum */ },
+	{ &vmalloc_base, VMALLOC_SIZE_TB },
 };
 
 /* Get size in bytes used by the memory region */
-- 
cgit v1.2.3


From 90397a41779645d3abba5599f6bb538fdcab9339 Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Tue, 21 Jun 2016 17:47:06 -0700
Subject: x86/mm: Add memory hotplug support for KASLR memory randomization

Add a new option (CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING) to define
the padding used for the physical memory mapping section when KASLR
memory is enabled. It ensures there is enough virtual address space when
CONFIG_MEMORY_HOTPLUG is used. The default value is 10 terabytes. If
CONFIG_MEMORY_HOTPLUG is not used, no space is reserved increasing the
entropy available.

Signed-off-by: Thomas Garnier <thgarnie@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Alexander Kuleshov <kuleshovmail@gmail.com>
Cc: Alexander Popov <alpopov@ptsecurity.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jan Beulich <JBeulich@suse.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lv Zheng <lv.zheng@intel.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: kernel-hardening@lists.openwall.com
Cc: linux-doc@vger.kernel.org
Link: http://lkml.kernel.org/r/1466556426-32664-10-git-send-email-keescook@chromium.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig    | 15 +++++++++++++++
 arch/x86/mm/kaslr.c |  7 ++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9719b8eb38d3..703413fb233a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2010,6 +2010,21 @@ config RANDOMIZE_MEMORY
 
 	   If unsure, say N.
 
+config RANDOMIZE_MEMORY_PHYSICAL_PADDING
+	hex "Physical memory mapping padding" if EXPERT
+	depends on RANDOMIZE_MEMORY
+	default "0xa" if MEMORY_HOTPLUG
+	default "0x0"
+	range 0x1 0x40 if MEMORY_HOTPLUG
+	range 0x0 0x40
+	---help---
+	   Define the padding in terabytes added to the existing physical
+	   memory size during kernel memory randomization. It is useful
+	   for memory hotplug support but reduces the entropy available for
+	   address randomization.
+
+	   If unsure, leave at the default value.
+
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs"
 	depends on SMP
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index c939cfe1b516..26dccd6c0df1 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -92,8 +92,13 @@ void __init kernel_randomize_memory(void)
 	if (!kaslr_memory_enabled())
 		return;
 
+	/*
+	 * Update Physical memory mapping to available and
+	 * add padding if needed (especially for memory hotplug support).
+	 */
 	BUG_ON(kaslr_regions[0].base != &page_offset_base);
-	memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT);
+	memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT) +
+		CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
 
 	/* Adapt phyiscal memory region size based on available memory */
 	if (memory_tb < kaslr_regions[0].size_tb)
-- 
cgit v1.2.3


From 62d855d3e725f4e4b0d2786f7cad3f0660a03a59 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Sat, 18 Jun 2016 18:51:34 +0300
Subject: x86/platform/intel-mid: Rename mrfl.c to mrfld.c

Use mrfld as an abbreviation of Merrifield to be consistent with the rest of
the code.

In the future we are going to add more files here prefixed with 'mrfld'.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1466265094-146113-1-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/intel-mid/Makefile |   2 +-
 arch/x86/platform/intel-mid/mrfl.c   | 100 -----------------------------------
 arch/x86/platform/intel-mid/mrfld.c  | 100 +++++++++++++++++++++++++++++++++++
 3 files changed, 101 insertions(+), 101 deletions(-)
 delete mode 100644 arch/x86/platform/intel-mid/mrfl.c
 create mode 100644 arch/x86/platform/intel-mid/mrfld.c

(limited to 'arch')

diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile
index aebb5b9ea80a..fa021dfab088 100644
--- a/arch/x86/platform/intel-mid/Makefile
+++ b/arch/x86/platform/intel-mid/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o pwr.o
+obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfld.o pwr.o
 
 # SFI specific code
 ifdef CONFIG_X86_INTEL_MID
diff --git a/arch/x86/platform/intel-mid/mrfl.c b/arch/x86/platform/intel-mid/mrfl.c
deleted file mode 100644
index bd1adc621781..000000000000
--- a/arch/x86/platform/intel-mid/mrfl.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * mrfl.c: Intel Merrifield platform specific setup code
- *
- * (C) Copyright 2013 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-#include <linux/init.h>
-
-#include <asm/apic.h>
-#include <asm/intel-mid.h>
-
-#include "intel_mid_weak_decls.h"
-
-static unsigned long __init tangier_calibrate_tsc(void)
-{
-	unsigned long fast_calibrate;
-	u32 lo, hi, ratio, fsb, bus_freq;
-
-	/* *********************** */
-	/* Compute TSC:Ratio * FSB */
-	/* *********************** */
-
-	/* Compute Ratio */
-	rdmsr(MSR_PLATFORM_INFO, lo, hi);
-	pr_debug("IA32 PLATFORM_INFO is 0x%x : %x\n", hi, lo);
-
-	ratio = (lo >> 8) & 0xFF;
-	pr_debug("ratio is %d\n", ratio);
-	if (!ratio) {
-		pr_err("Read a zero ratio, force tsc ratio to 4 ...\n");
-		ratio = 4;
-	}
-
-	/* Compute FSB */
-	rdmsr(MSR_FSB_FREQ, lo, hi);
-	pr_debug("Actual FSB frequency detected by SOC 0x%x : %x\n",
-			hi, lo);
-
-	bus_freq = lo & 0x7;
-	pr_debug("bus_freq = 0x%x\n", bus_freq);
-
-	if (bus_freq == 0)
-		fsb = FSB_FREQ_100SKU;
-	else if (bus_freq == 1)
-		fsb = FSB_FREQ_100SKU;
-	else if (bus_freq == 2)
-		fsb = FSB_FREQ_133SKU;
-	else if (bus_freq == 3)
-		fsb = FSB_FREQ_167SKU;
-	else if (bus_freq == 4)
-		fsb = FSB_FREQ_83SKU;
-	else if (bus_freq == 5)
-		fsb = FSB_FREQ_400SKU;
-	else if (bus_freq == 6)
-		fsb = FSB_FREQ_267SKU;
-	else if (bus_freq == 7)
-		fsb = FSB_FREQ_333SKU;
-	else {
-		BUG();
-		pr_err("Invalid bus_freq! Setting to minimal value!\n");
-		fsb = FSB_FREQ_100SKU;
-	}
-
-	/* TSC = FSB Freq * Resolved HFM Ratio */
-	fast_calibrate = ratio * fsb;
-	pr_debug("calculate tangier tsc %lu KHz\n", fast_calibrate);
-
-	/* ************************************ */
-	/* Calculate Local APIC Timer Frequency */
-	/* ************************************ */
-	lapic_timer_frequency = (fsb * 1000) / HZ;
-
-	pr_debug("Setting lapic_timer_frequency = %d\n",
-			lapic_timer_frequency);
-
-	/* mark tsc clocksource as reliable */
-	set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
-
-	return fast_calibrate;
-}
-
-static void __init tangier_arch_setup(void)
-{
-	x86_platform.calibrate_tsc = tangier_calibrate_tsc;
-}
-
-/* tangier arch ops */
-static struct intel_mid_ops tangier_ops = {
-	.arch_setup = tangier_arch_setup,
-};
-
-void *get_tangier_ops(void)
-{
-	return &tangier_ops;
-}
diff --git a/arch/x86/platform/intel-mid/mrfld.c b/arch/x86/platform/intel-mid/mrfld.c
new file mode 100644
index 000000000000..59253db41bbc
--- /dev/null
+++ b/arch/x86/platform/intel-mid/mrfld.c
@@ -0,0 +1,100 @@
+/*
+ * Intel Merrifield platform specific setup code
+ *
+ * (C) Copyright 2013 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+
+#include <asm/apic.h>
+#include <asm/intel-mid.h>
+
+#include "intel_mid_weak_decls.h"
+
+static unsigned long __init tangier_calibrate_tsc(void)
+{
+	unsigned long fast_calibrate;
+	u32 lo, hi, ratio, fsb, bus_freq;
+
+	/* *********************** */
+	/* Compute TSC:Ratio * FSB */
+	/* *********************** */
+
+	/* Compute Ratio */
+	rdmsr(MSR_PLATFORM_INFO, lo, hi);
+	pr_debug("IA32 PLATFORM_INFO is 0x%x : %x\n", hi, lo);
+
+	ratio = (lo >> 8) & 0xFF;
+	pr_debug("ratio is %d\n", ratio);
+	if (!ratio) {
+		pr_err("Read a zero ratio, force tsc ratio to 4 ...\n");
+		ratio = 4;
+	}
+
+	/* Compute FSB */
+	rdmsr(MSR_FSB_FREQ, lo, hi);
+	pr_debug("Actual FSB frequency detected by SOC 0x%x : %x\n",
+			hi, lo);
+
+	bus_freq = lo & 0x7;
+	pr_debug("bus_freq = 0x%x\n", bus_freq);
+
+	if (bus_freq == 0)
+		fsb = FSB_FREQ_100SKU;
+	else if (bus_freq == 1)
+		fsb = FSB_FREQ_100SKU;
+	else if (bus_freq == 2)
+		fsb = FSB_FREQ_133SKU;
+	else if (bus_freq == 3)
+		fsb = FSB_FREQ_167SKU;
+	else if (bus_freq == 4)
+		fsb = FSB_FREQ_83SKU;
+	else if (bus_freq == 5)
+		fsb = FSB_FREQ_400SKU;
+	else if (bus_freq == 6)
+		fsb = FSB_FREQ_267SKU;
+	else if (bus_freq == 7)
+		fsb = FSB_FREQ_333SKU;
+	else {
+		BUG();
+		pr_err("Invalid bus_freq! Setting to minimal value!\n");
+		fsb = FSB_FREQ_100SKU;
+	}
+
+	/* TSC = FSB Freq * Resolved HFM Ratio */
+	fast_calibrate = ratio * fsb;
+	pr_debug("calculate tangier tsc %lu KHz\n", fast_calibrate);
+
+	/* ************************************ */
+	/* Calculate Local APIC Timer Frequency */
+	/* ************************************ */
+	lapic_timer_frequency = (fsb * 1000) / HZ;
+
+	pr_debug("Setting lapic_timer_frequency = %d\n",
+			lapic_timer_frequency);
+
+	/* mark tsc clocksource as reliable */
+	set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
+
+	return fast_calibrate;
+}
+
+static void __init tangier_arch_setup(void)
+{
+	x86_platform.calibrate_tsc = tangier_calibrate_tsc;
+}
+
+/* tangier arch ops */
+static struct intel_mid_ops tangier_ops = {
+	.arch_setup = tangier_arch_setup,
+};
+
+void *get_tangier_ops(void)
+{
+	return &tangier_ops;
+}
-- 
cgit v1.2.3


From 8709ed4d4b0eab04561c1ec9e6ea50fd1e3897ff Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 17 Jun 2016 17:15:03 -0700
Subject: x86/cpu: Fix duplicated X86_BUG(9) macro

cpufeatures.h currently defines X86_BUG(9) twice on 32-bit:

	#define X86_BUG_NULL_SEG        X86_BUG(9) /* Nulling a selector preserves the base */
	...
	#ifdef CONFIG_X86_32
	#define X86_BUG_ESPFIX          X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
	#endif

I think what happened was that this added the X86_BUG_ESPFIX, but
in an #ifdef below most of the bugs:

	58a5aac53313 x86/entry/32: Introduce and use X86_BUG_ESPFIX instead of paravirt_enabled

Then this came along and added X86_BUG_NULL_SEG, but collided
with the earlier one that did the bug below the main block
defining all the X86_BUG()s.

	7a5d67048745 x86/cpu: Probe the behavior of nulling out a segment at boot time

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20160618001503.CEE1B141@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/cpufeatures.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4a413485f9eb..c64b1e9c5d1a 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -301,10 +301,6 @@
 #define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
 #define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
 #define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
-#define X86_BUG_NULL_SEG	X86_BUG(9) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE	X86_BUG(10) /* SWAPGS without input dep on GS */
-
-
 #ifdef CONFIG_X86_32
 /*
  * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
@@ -312,5 +308,7 @@
  */
 #define X86_BUG_ESPFIX		X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
 #endif
+#define X86_BUG_NULL_SEG	X86_BUG(10) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE	X86_BUG(11) /* SWAPGS without input dep on GS */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
-- 
cgit v1.2.3


From a11836fa5a67ba56d8338138e37b42384af73e5e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Sat, 9 Jul 2016 16:45:29 +0300
Subject: x86/platform/intel-mid: Mark regulators explicitly defined

Intel MID platforms are using explicitly defined regulators.

Let the regulator core know that we do not have any additional
regulators left. This lets it substitute unprovided regulators with
dummy ones.

Without this change when CONFIG_REGULATOR=y the USB driver fails on getting
"vbus" regulator and SDHCI can't get "vmmc" and "vqmmc" regulators either.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1468071929-77383-1-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/intel-mid/intel-mid.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 90bb997ed0a2..abbf49c6e9d3 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/regulator/machine.h>
 #include <linux/scatterlist.h>
 #include <linux/sfi.h>
 #include <linux/irq.h>
@@ -144,6 +145,15 @@ static void intel_mid_arch_setup(void)
 out:
 	if (intel_mid_ops->arch_setup)
 		intel_mid_ops->arch_setup();
+
+	/*
+	 * Intel MID platforms are using explicitly defined regulators.
+	 *
+	 * Let the regulator core know that we do not have any additional
+	 * regulators left. This lets it substitute unprovided regulators with
+	 * dummy ones:
+	 */
+	regulator_has_full_constraints();
 }
 
 /* MID systems don't have i8042 controller */
-- 
cgit v1.2.3


From 2e9d1e150abf88cb63e5d34ca286edbb95b4c53d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 20 Jun 2016 16:58:29 +0200
Subject: x86/entry: Avoid interrupt flag save and restore

Thanks to all the work that was done by Andy Lutomirski and others,
enter_from_user_mode() and prepare_exit_to_usermode() are now called only with
interrupts disabled.  Let's provide them a version of user_enter()/user_exit()
that skips saving and restoring the interrupt flag.

On an AMD-based machine I tested this patch on, with force-enabled
context tracking, the speed-up in system calls was 90 clock cycles or 6%,
measured with the following simple benchmark:

    #include <sys/signal.h>
    #include <time.h>
    #include <unistd.h>
    #include <stdio.h>

    unsigned long rdtsc()
    {
        unsigned long result;
        asm volatile("rdtsc; shl $32, %%rdx; mov %%eax, %%eax\n"
                     "or %%rdx, %%rax" : "=a" (result) : : "rdx");
        return result;
    }

    int main()
    {
        unsigned long tsc1, tsc2;
        int pid = getpid();
        int i;

        tsc1 = rdtsc();
        for (i = 0; i < 100000000; i++)
            kill(pid, SIGWINCH);
        tsc2 = rdtsc();

        printf("%ld\n", tsc2 - tsc1);
    }

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kvm@vger.kernel.org
Link: http://lkml.kernel.org/r/1466434712-31440-2-git-send-email-pbonzini@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/common.c          |  4 ++--
 include/linux/context_tracking.h | 15 +++++++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index ec138e538c44..618bc61d35b7 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -43,7 +43,7 @@ static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
 __visible void enter_from_user_mode(void)
 {
 	CT_WARN_ON(ct_state() != CONTEXT_USER);
-	user_exit();
+	user_exit_irqoff();
 }
 #else
 static inline void enter_from_user_mode(void) {}
@@ -274,7 +274,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 	ti->status &= ~TS_COMPAT;
 #endif
 
-	user_enter();
+	user_enter_irqoff();
 }
 
 #define SYSCALL_EXIT_WORK_FLAGS				\
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index d259274238db..d9aef2a0ec8e 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -31,6 +31,19 @@ static inline void user_exit(void)
 		context_tracking_exit(CONTEXT_USER);
 }
 
+/* Called with interrupts disabled.  */
+static inline void user_enter_irqoff(void)
+{
+	if (context_tracking_is_enabled())
+		__context_tracking_enter(CONTEXT_USER);
+
+}
+static inline void user_exit_irqoff(void)
+{
+	if (context_tracking_is_enabled())
+		__context_tracking_exit(CONTEXT_USER);
+}
+
 static inline enum ctx_state exception_enter(void)
 {
 	enum ctx_state prev_ctx;
@@ -69,6 +82,8 @@ static inline enum ctx_state ct_state(void)
 #else
 static inline void user_enter(void) { }
 static inline void user_exit(void) { }
+static inline void user_enter_irqoff(void) { }
+static inline void user_exit_irqoff(void) { }
 static inline enum ctx_state exception_enter(void) { return 0; }
 static inline void exception_exit(enum ctx_state prev_ctx) { }
 static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }
-- 
cgit v1.2.3


From be8a18e2e98e04a5def5887d913b267865562448 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 20 Jun 2016 16:58:30 +0200
Subject: x86/entry: Inline enter_from_user_mode()

This matches what is already done for prepare_exit_to_usermode(),
and saves about 60 clock cycles (4% speedup) with the benchmark
in the previous commit message.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kvm@vger.kernel.org
Link: http://lkml.kernel.org/r/1466434712-31440-3-git-send-email-pbonzini@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 618bc61d35b7..9e1e27d31c6d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -40,7 +40,7 @@ static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
 
 #ifdef CONFIG_CONTEXT_TRACKING
 /* Called on entry from user mode with IRQs off. */
-__visible void enter_from_user_mode(void)
+__visible inline void enter_from_user_mode(void)
 {
 	CT_WARN_ON(ct_state() != CONTEXT_USER);
 	user_exit_irqoff();
-- 
cgit v1.2.3


From fc5f3ac24720012909c224a63ca3217f4759967d Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 17 Jun 2016 01:22:43 -0400
Subject: Revert "x86/tsc: Add missing Cherrytrail frequency to the table"

This reverts commit:

  e2724e9d9692 ("x86/tsc: Add missing Cherrytrail frequency to the table")

... as it is incomplete, and is replaced by a more complete patch
later in this series.

Signed-off-by: Len Brown <len.brown@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/2199d0e959f7f71a18827268b5d060f8d3831639.1466138954.git.len.brown@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/tsc_msr.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 9911a0620f9a..6aa0f4d9eea6 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -23,7 +23,6 @@
 #include <asm/param.h>
 
 /* CPU reference clock frequency: in KHz */
-#define FREQ_80		80000
 #define FREQ_83		83200
 #define FREQ_100	99840
 #define FREQ_133	133200
@@ -57,8 +56,6 @@ static struct freq_desc freq_desc_tables[] = {
 	{ 6, 0x37, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } },
 	/* ANN */
 	{ 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } },
-	/* AIRMONT */
-	{ 6, 0x4c, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, FREQ_80,	0, 0, 0 } },
 };
 
 static int match_cpu(u8 family, u8 model)
-- 
cgit v1.2.3


From ba8268330dc18d309a39175ea4d2c5d86c2cef09 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 17 Jun 2016 01:22:44 -0400
Subject: x86/tsc_msr: Identify Intel-specific code

try_msr_calibrate_tsc() is currently Intel-specific,
and should not execute on any other vendor's parts.

Signed-off-by: Len Brown <len.brown@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1fe23c052826bdcfeb3d45045aa02246078cb5a7.1466138954.git.len.brown@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/tsc_msr.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 6aa0f4d9eea6..4ec5e560ed73 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -86,6 +86,9 @@ unsigned long try_msr_calibrate_tsc(void)
 	unsigned long res;
 	int cpu_index;
 
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		return 0;
+
 	cpu_index = match_cpu(boot_cpu_data.x86, boot_cpu_data.x86_model);
 	if (cpu_index < 0)
 		return 0;
-- 
cgit v1.2.3


From 14bb4e34860af48ef1ea0f52b11611ce4db987fe Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 17 Jun 2016 01:22:45 -0400
Subject: x86/tsc_msr: Remove debugging messages

Debugging messages are not necessary after all of the
possible hardware failures that never occur.
Instead, this code can simply return 0.

This code also doesn't need to print in the success case.
tsc_init() already prints the TSC frequency,
and apic=debug is available if anybody really is
interested in printing the LAPIC frequency.

Signed-off-by: Len Brown <len.brown@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/cf03279a125b95dfa9b8d3d5b4a66de09cd04050.1466138954.git.len.brown@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/tsc_msr.c | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 4ec5e560ed73..f7ba44b89cc4 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -76,9 +76,10 @@ static int match_cpu(u8 family, u8 model)
 	(freq_desc_tables[cpu_index].freqs[freq_id])
 
 /*
- * Do MSR calibration only for known/supported CPUs.
+ * MSR-based CPU/TSC frequency discovery for certain CPUs.
  *
- * Returns the calibration value or 0 if MSR calibration failed.
+ * Set global "lapic_timer_frequency" to bus_clock_cycles/jiffy
+ * Return processor base frequency in KHz, or 0 on failure.
  */
 unsigned long try_msr_calibrate_tsc(void)
 {
@@ -100,31 +101,17 @@ unsigned long try_msr_calibrate_tsc(void)
 		rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
 		ratio = (hi >> 8) & 0x1f;
 	}
-	pr_info("Maximum core-clock to bus-clock ratio: 0x%x\n", ratio);
-
-	if (!ratio)
-		goto fail;
 
 	/* Get FSB FREQ ID */
 	rdmsr(MSR_FSB_FREQ, lo, hi);
 	freq_id = lo & 0x7;
 	freq = id_to_freq(cpu_index, freq_id);
-	pr_info("Resolved frequency ID: %u, frequency: %u KHz\n",
-				freq_id, freq);
-	if (!freq)
-		goto fail;
 
 	/* TSC frequency = maximum resolved freq * maximum resolved bus ratio */
 	res = freq * ratio;
-	pr_info("TSC runs at %lu KHz\n", res);
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	lapic_timer_frequency = (freq * 1000) / HZ;
-	pr_info("lapic_timer_frequency = %d\n", lapic_timer_frequency);
 #endif
 	return res;
-
-fail:
-	pr_warn("Fast TSC calibration using MSR failed\n");
-	return 0;
 }
-- 
cgit v1.2.3


From 9e0cae9f6227f946fb0076b6a68c88156137f618 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 17 Jun 2016 01:22:46 -0400
Subject: x86/tsc_msr: Update comments, expand definitions

Syntax only, no functional change.

Signed-off-by: Len Brown <len.brown@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/8653a2dba21fef122fc7b29eafb750e2004d3976.1466138954.git.len.brown@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/tsc_msr.c | 36 ++++++++++--------------------------
 1 file changed, 10 insertions(+), 26 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index f7ba44b89cc4..4110f723fd0f 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -1,14 +1,5 @@
 /*
- * tsc_msr.c - MSR based TSC calibration on Intel Atom SoC platforms.
- *
- * TSC in Intel Atom SoC runs at a constant rate which can be figured
- * by this formula:
- * <maximum core-clock to bus-clock ratio> * <maximum resolved frequency>
- * See Intel 64 and IA-32 System Programming Guid section 16.12 and 30.11.5
- * for details.
- * Especially some Intel Atom SoCs don't have PIT(i8254) or HPET, so MSR
- * based calibration is the only option.
- *
+ * tsc_msr.c - TSC frequency enumeration via MSR
  *
  * Copyright (C) 2013 Intel Corporation
  * Author: Bin Gao <bin.gao@intel.com>
@@ -22,17 +13,10 @@
 #include <asm/apic.h>
 #include <asm/param.h>
 
-/* CPU reference clock frequency: in KHz */
-#define FREQ_83		83200
-#define FREQ_100	99840
-#define FREQ_133	133200
-#define FREQ_166	166400
-
 #define MAX_NUM_FREQS	8
 
 /*
- * According to Intel 64 and IA-32 System Programming Guide,
- * if MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be
+ * If MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be
  * read in MSR_PLATFORM_ID[12:8], otherwise in MSR_PERF_STAT[44:40].
  * Unfortunately some Intel Atom SoCs aren't quite compliant to this,
  * so we need manually differentiate SoC families. This is what the
@@ -47,15 +31,15 @@ struct freq_desc {
 
 static struct freq_desc freq_desc_tables[] = {
 	/* PNW */
-	{ 6, 0x27, 0, { 0, 0, 0, 0, 0, FREQ_100, 0, FREQ_83 } },
+	{ 6, 0x27, 0, { 0, 0, 0, 0, 0, 99840, 0, 83200 } },
 	/* CLV+ */
-	{ 6, 0x35, 0, { 0, FREQ_133, 0, 0, 0, FREQ_100, 0, FREQ_83 } },
-	/* TNG */
-	{ 6, 0x4a, 1, { 0, FREQ_100, FREQ_133, 0, 0, 0, 0, 0 } },
-	/* VLV2 */
-	{ 6, 0x37, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } },
-	/* ANN */
-	{ 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } },
+	{ 6, 0x35, 0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 } },
+	/* TNG - Intel Atom processor Z3400 series */
+	{ 6, 0x4a, 1, { 0, 99840, 133200, 0, 0, 0, 0, 0 } },
+	/* VLV2 - Intel Atom processor E3000, Z3600, Z3700 series */
+	{ 6, 0x37, 1, { 83200, 99840, 133200, 166400, 0, 0, 0, 0 } },
+	/* ANN - Intel Atom processor Z3500 series */
+	{ 6, 0x5a, 1, { 83200, 99840, 133200, 99840, 0, 0, 0, 0 } },
 };
 
 static int match_cpu(u8 family, u8 model)
-- 
cgit v1.2.3


From 05680e7fa8a4e700e031a5e72cd8c18265f0031a Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 17 Jun 2016 01:22:47 -0400
Subject: x86/tsc_msr: Correct Silvermont reference clock values

Atom processors use a 19.2 MHz crystal oscillator.

Early processors generate 100 MHz via 19.2 MHz * 26 / 5 = 99.84 MHz.

Later preocessor generate 100 MHz via 19.2 MHz * 125 / 24 = 100 MHz.

Update the Silvermont-based tables accordingly,
matching the Software Developers Manual.

Also, correct a 166 MHz entry that should have been 116 MHz,
and add a missing 80 MHz entry.

Reported-by: Stephane Gasparini <stephane.gasparini@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/5d7561655dfb066ff10801b423405bae4d1cfbe2.1466138954.git.len.brown@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/tsc_msr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 4110f723fd0f..20487e2382c6 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -35,11 +35,11 @@ static struct freq_desc freq_desc_tables[] = {
 	/* CLV+ */
 	{ 6, 0x35, 0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 } },
 	/* TNG - Intel Atom processor Z3400 series */
-	{ 6, 0x4a, 1, { 0, 99840, 133200, 0, 0, 0, 0, 0 } },
+	{ 6, 0x4a, 1, { 0, 100000, 133300, 0, 0, 0, 0, 0 } },
 	/* VLV2 - Intel Atom processor E3000, Z3600, Z3700 series */
-	{ 6, 0x37, 1, { 83200, 99840, 133200, 166400, 0, 0, 0, 0 } },
+	{ 6, 0x37, 1, { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 } },
 	/* ANN - Intel Atom processor Z3500 series */
-	{ 6, 0x5a, 1, { 83200, 99840, 133200, 99840, 0, 0, 0, 0 } },
+	{ 6, 0x5a, 1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 } },
 };
 
 static int match_cpu(u8 family, u8 model)
-- 
cgit v1.2.3


From 6fcb41cdaee5056c96de88ee095bddd27a7697de Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 17 Jun 2016 01:22:48 -0400
Subject: x86/tsc_msr: Add Airmont reference clock values

per the Intel 64 and IA-32 Architecture Software Developer's Manual...

Add the reference clock for Intel Atom Processors
Based on the Airmont Microarchitecture.

Reported-by: Stephane Gasparini <stephane.gasparini@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/abc6a0f4b18281410da1a3f26e2819d8e03e144f.1466138954.git.len.brown@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/tsc_msr.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 20487e2382c6..65b3d8cb8325 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -13,7 +13,7 @@
 #include <asm/apic.h>
 #include <asm/param.h>
 
-#define MAX_NUM_FREQS	8
+#define MAX_NUM_FREQS	9
 
 /*
  * If MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be
@@ -40,6 +40,9 @@ static struct freq_desc freq_desc_tables[] = {
 	{ 6, 0x37, 1, { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 } },
 	/* ANN - Intel Atom processor Z3500 series */
 	{ 6, 0x5a, 1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 } },
+	/* AMT - Intel Atom processor X7-Z8000 and X5-Z8000 series */
+	{ 6, 0x4c, 1, { 83300, 100000, 133300, 116700,
+			80000, 93300, 90000, 88900, 87500 } },
 };
 
 static int match_cpu(u8 family, u8 model)
-- 
cgit v1.2.3


From 03482e08a87d24e5c8c23e6981c482e832cf3bdc Mon Sep 17 00:00:00 2001
From: Yu-cheng Yu <yu-cheng.yu@intel.com>
Date: Fri, 17 Jun 2016 13:07:15 -0700
Subject: x86/fpu/xstate: Align xstate components according to CPUID

CPUID function 0x0d, sub function (i, i > 1) returns in ecx[1] the
alignment requirement of component 'i' when the compacted format is used.

If ecx[1] is 0, component 'i' is located immediately following the preceding
component. If ecx[1] is 1, component 'i' is located on the next 64-byte
boundary following the preceding component.

Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/331e2bef1a0a7a584f06adde095b6bbfbe166472.1466179491.git.yu-cheng.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 60 +++++++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 0b01f003df8b..7963029cb4ad 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -269,6 +269,33 @@ static void __init print_xstate_features(void)
 	print_xstate_feature(XFEATURE_MASK_PKRU);
 }
 
+/*
+ * This check is important because it is easy to get XSTATE_*
+ * confused with XSTATE_BIT_*.
+ */
+#define CHECK_XFEATURE(nr) do {		\
+	WARN_ON(nr < FIRST_EXTENDED_XFEATURE);	\
+	WARN_ON(nr >= XFEATURE_MAX);	\
+} while (0)
+
+/*
+ * We could cache this like xstate_size[], but we only use
+ * it here, so it would be a waste of space.
+ */
+static int xfeature_is_aligned(int xfeature_nr)
+{
+	u32 eax, ebx, ecx, edx;
+
+	CHECK_XFEATURE(xfeature_nr);
+	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
+	/*
+	 * The value returned by ECX[1] indicates the alignment
+	 * of state component 'i' when the compacted format
+	 * of the extended region of an XSAVE area is used:
+	 */
+	return !!(ecx & 2);
+}
+
 /*
  * This function sets up offsets and sizes of all extended states in
  * xsave area. This supports both standard format and compacted format
@@ -306,10 +333,14 @@ static void __init setup_xstate_comp(void)
 		else
 			xstate_comp_sizes[i] = 0;
 
-		if (i > FIRST_EXTENDED_XFEATURE)
+		if (i > FIRST_EXTENDED_XFEATURE) {
 			xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
 					+ xstate_comp_sizes[i-1];
 
+			if (xfeature_is_aligned(i))
+				xstate_comp_offsets[i] =
+					ALIGN(xstate_comp_offsets[i], 64);
+		}
 	}
 }
 
@@ -366,33 +397,6 @@ static int xfeature_is_user(int xfeature_nr)
 }
 */
 
-/*
- * This check is important because it is easy to get XSTATE_*
- * confused with XSTATE_BIT_*.
- */
-#define CHECK_XFEATURE(nr) do {		\
-	WARN_ON(nr < FIRST_EXTENDED_XFEATURE);	\
-	WARN_ON(nr >= XFEATURE_MAX);	\
-} while (0)
-
-/*
- * We could cache this like xstate_size[], but we only use
- * it here, so it would be a waste of space.
- */
-static int xfeature_is_aligned(int xfeature_nr)
-{
-	u32 eax, ebx, ecx, edx;
-
-	CHECK_XFEATURE(xfeature_nr);
-	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
-	/*
-	 * The value returned by ECX[1] indicates the alignment
-	 * of state component i when the compacted format
-	 * of the extended region of an XSAVE area is used
-	 */
-	return !!(ecx & 2);
-}
-
 static int xfeature_uncompacted_offset(int xfeature_nr)
 {
 	u32 eax, ebx, ecx, edx;
-- 
cgit v1.2.3


From 1499ce2dd45afddea2e84f9f920890cf88384c4e Mon Sep 17 00:00:00 2001
From: Yu-cheng Yu <yu-cheng.yu@intel.com>
Date: Fri, 17 Jun 2016 13:07:16 -0700
Subject: x86/fpu/xstate: Fix supervisor xstate component offset

CPUID function 0x0d, sub function (i, i > 1) returns in ebx the offset of
xstate component i. Zero is returned for a supervisor state. A supervisor
state can only be saved by XSAVES and XSAVES uses a compacted format.
There is no fixed offset for a supervisor state. This patch checks and
makes sure a supervisor state offset is not recorded or mis-used. This has
no effect in practice as we currently use no supervisor states, but it
would be good to fix.

Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/81b29e40d35d4cec9f2511a856fe769f34935a3f.1466179491.git.yu-cheng.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/types.h  |  1 +
 arch/x86/include/asm/fpu/xstate.h |  3 ++
 arch/x86/kernel/fpu/xstate.c      | 62 ++++++++++++++++++++++++---------------
 3 files changed, 43 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 36b90bbfc69f..12dd648735b6 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -122,6 +122,7 @@ enum xfeature {
 #define XFEATURE_MASK_OPMASK		(1 << XFEATURE_OPMASK)
 #define XFEATURE_MASK_ZMM_Hi256		(1 << XFEATURE_ZMM_Hi256)
 #define XFEATURE_MASK_Hi16_ZMM		(1 << XFEATURE_Hi16_ZMM)
+#define XFEATURE_MASK_PT		(1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
 #define XFEATURE_MASK_PKRU		(1 << XFEATURE_PKRU)
 
 #define XFEATURE_MASK_FPSSE		(XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index d812cf361282..92f376ccc999 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -18,6 +18,9 @@
 #define XSAVE_YMM_SIZE	    256
 #define XSAVE_YMM_OFFSET    (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
 
+/* Supervisor features */
+#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT)
+
 /* Supported features which support lazy state saving */
 #define XFEATURE_MASK_LAZY	(XFEATURE_MASK_FP | \
 				 XFEATURE_MASK_SSE | \
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 7963029cb4ad..02786fb7a1e8 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -112,6 +112,27 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
 }
 EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
 
+static int xfeature_is_supervisor(int xfeature_nr)
+{
+	/*
+	 * We currently do not support supervisor states, but if
+	 * we did, we could find out like this.
+	 *
+	 * SDM says: If state component 'i' is a user state component,
+	 * ECX[0] return 0; if state component i is a supervisor
+	 * state component, ECX[0] returns 1.
+	 */
+	u32 eax, ebx, ecx, edx;
+
+	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
+	return !!(ecx & 1);
+}
+
+static int xfeature_is_user(int xfeature_nr)
+{
+	return !xfeature_is_supervisor(xfeature_nr);
+}
+
 /*
  * When executing XSAVEOPT (or other optimized XSAVE instructions), if
  * a processor implementation detects that an FPU state component is still
@@ -230,7 +251,14 @@ static void __init setup_xstate_features(void)
 			continue;
 
 		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
-		xstate_offsets[i] = ebx;
+
+		/*
+		 * If an xfeature is supervisor state, the offset
+		 * in EBX is invalid. We leave it to -1.
+		 */
+		if (xfeature_is_user(i))
+			xstate_offsets[i] = ebx;
+
 		xstate_sizes[i] = eax;
 		/*
 		 * In our xstate size checks, we assume that the
@@ -375,32 +403,20 @@ static void __init setup_init_fpu_buf(void)
 	copy_xregs_to_kernel_booting(&init_fpstate.xsave);
 }
 
-static int xfeature_is_supervisor(int xfeature_nr)
-{
-	/*
-	 * We currently do not support supervisor states, but if
-	 * we did, we could find out like this.
-	 *
-	 * SDM says: If state component i is a user state component,
-	 * ECX[0] return 0; if state component i is a supervisor
-	 * state component, ECX[0] returns 1.
-	u32 eax, ebx, ecx, edx;
-	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx;
-	return !!(ecx & 1);
-	*/
-	return 0;
-}
-/*
-static int xfeature_is_user(int xfeature_nr)
-{
-	return !xfeature_is_supervisor(xfeature_nr);
-}
-*/
-
 static int xfeature_uncompacted_offset(int xfeature_nr)
 {
 	u32 eax, ebx, ecx, edx;
 
+	/*
+	 * Only XSAVES supports supervisor states and it uses compacted
+	 * format. Checking a supervisor state's uncompacted offset is
+	 * an error.
+	 */
+	if (XFEATURE_MASK_SUPERVISOR & (1 << xfeature_nr)) {
+		WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr);
+		return -1;
+	}
+
 	CHECK_XFEATURE(xfeature_nr);
 	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
 	return ebx;
-- 
cgit v1.2.3


From 91c3dba7dbc199191272f4a9863f86ea3bfd679f Mon Sep 17 00:00:00 2001
From: Yu-cheng Yu <yu-cheng.yu@intel.com>
Date: Fri, 17 Jun 2016 13:07:17 -0700
Subject: x86/fpu/xstate: Fix PTRACE frames for XSAVES

XSAVES uses compacted format and is a kernel instruction. The kernel
should use standard-format, non-supervisor state data for PTRACE.

Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
[ Edited away artificial linebreaks. ]
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/de3d80949001305fe389799973b675cab055c457.1466179491.git.yu-cheng.yu@intel.com
[ Made various readability edits. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/xstate.h |   5 +-
 arch/x86/kernel/fpu/regset.c      |  52 +++++++----
 arch/x86/kernel/fpu/xstate.c      | 183 ++++++++++++++++++++++++++++++++++++--
 3 files changed, 216 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 92f376ccc999..ae55a43e09c0 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -51,5 +51,8 @@ void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
-
+int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
+			void __user *ubuf, struct xregs_state *xsave);
+int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
+		     struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 81422dfb152b..c114b132d121 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -4,6 +4,7 @@
 #include <asm/fpu/internal.h>
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
+#include <asm/fpu/xstate.h>
 
 /*
  * The xstateregs_active() routine is the same as the regset_fpregs_active() routine,
@@ -85,21 +86,26 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 		return -ENODEV;
 
-	fpu__activate_fpstate_read(fpu);
-
 	xsave = &fpu->state.xsave;
 
-	/*
-	 * Copy the 48bytes defined by the software first into the xstate
-	 * memory layout in the thread struct, so that we can copy the entire
-	 * xstateregs to the user using one user_regset_copyout().
-	 */
-	memcpy(&xsave->i387.sw_reserved,
-		xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
-	/*
-	 * Copy the xstate memory layout.
-	 */
-	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+	fpu__activate_fpstate_read(fpu);
+
+	if (using_compacted_format()) {
+		ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave);
+	} else {
+		fpstate_sanitize_xstate(fpu);
+		/*
+		 * Copy the 48 bytes defined by the software into the xsave
+		 * area in the thread struct, so that we can copy the whole
+		 * area to user using one user_regset_copyout().
+		 */
+		memcpy(&xsave->i387.sw_reserved, xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
+
+		/*
+		 * Copy the xstate memory layout.
+		 */
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+	}
 	return ret;
 }
 
@@ -114,11 +120,27 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 		return -ENODEV;
 
-	fpu__activate_fpstate_write(fpu);
+	/*
+	 * A whole standard-format XSAVE buffer is needed:
+	 */
+	if ((pos != 0) || (count < fpu_user_xstate_size))
+		return -EFAULT;
 
 	xsave = &fpu->state.xsave;
 
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+	fpu__activate_fpstate_write(fpu);
+
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
+		ret = copyin_to_xsaves(kbuf, ubuf, xsave);
+	else
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+
+	/*
+	 * In case of failure, mark all states as init:
+	 */
+	if (ret)
+		fpstate_init(&fpu->state);
+
 	/*
 	 * mxcsr reserved bits must be masked to zero for security reasons.
 	 */
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 02786fb7a1e8..56c0e707af21 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -11,6 +11,7 @@
 #include <asm/fpu/internal.h>
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
+#include <asm/fpu/xstate.h>
 
 #include <asm/tlbflush.h>
 
@@ -697,7 +698,12 @@ void __init fpu__init_system_xstate(void)
 		return;
 	}
 
-	update_regset_xstate_info(fpu_kernel_xstate_size, xfeatures_mask);
+	/*
+	 * Update info used for ptrace frames; use standard-format size and no
+	 * supervisor xstates:
+	 */
+	update_regset_xstate_info(fpu_user_xstate_size,	xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR);
+
 	fpu__init_prepare_fx_sw_frame();
 	setup_init_fpu_buf();
 	setup_xstate_comp();
@@ -925,16 +931,16 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 	if (!boot_cpu_has(X86_FEATURE_OSPKE))
 		return -EINVAL;
 
-	/* Set the bits we need in PKRU  */
+	/* Set the bits we need in PKRU:  */
 	if (init_val & PKEY_DISABLE_ACCESS)
 		new_pkru_bits |= PKRU_AD_BIT;
 	if (init_val & PKEY_DISABLE_WRITE)
 		new_pkru_bits |= PKRU_WD_BIT;
 
-	/* Shift the bits in to the correct place in PKRU for pkey. */
+	/* Shift the bits in to the correct place in PKRU for pkey: */
 	new_pkru_bits <<= pkey_shift;
 
-	/* Locate old copy of the state in the xsave buffer */
+	/* Locate old copy of the state in the xsave buffer: */
 	old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU);
 
 	/*
@@ -947,9 +953,10 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 	else
 		new_pkru_state.pkru = old_pkru_state->pkru;
 
-	/* mask off any old bits in place */
+	/* Mask off any old bits in place: */
 	new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
-	/* Set the newly-requested bits */
+
+	/* Set the newly-requested bits: */
 	new_pkru_state.pkru |= new_pkru_bits;
 
 	/*
@@ -963,8 +970,168 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 	 */
 	new_pkru_state.pad = 0;
 
-	fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state,
-			sizeof(new_pkru_state));
+	fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state));
+
+	return 0;
+}
+
+/*
+ * This is similar to user_regset_copyout(), but will not add offset to
+ * the source data pointer or increment pos, count, kbuf, and ubuf.
+ */
+static inline int xstate_copyout(unsigned int pos, unsigned int count,
+				 void *kbuf, void __user *ubuf,
+				 const void *data, const int start_pos,
+				 const int end_pos)
+{
+	if ((count == 0) || (pos < start_pos))
+		return 0;
+
+	if (end_pos < 0 || pos < end_pos) {
+		unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
+
+		if (kbuf) {
+			memcpy(kbuf + pos, data, copy);
+		} else {
+			if (__copy_to_user(ubuf + pos, data, copy))
+				return -EFAULT;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Convert from kernel XSAVES compacted format to standard format and copy
+ * to a ptrace buffer. It supports partial copy but pos always starts from
+ * zero. This is called from xstateregs_get() and there we check the CPU
+ * has XSAVES.
+ */
+int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
+			void __user *ubuf, struct xregs_state *xsave)
+{
+	unsigned int offset, size;
+	int ret, i;
+	struct xstate_header header;
+
+	/*
+	 * Currently copy_regset_to_user() starts from pos 0:
+	 */
+	if (unlikely(pos != 0))
+		return -EFAULT;
+
+	/*
+	 * The destination is a ptrace buffer; we put in only user xstates:
+	 */
+	memset(&header, 0, sizeof(header));
+	header.xfeatures = xsave->header.xfeatures;
+	header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
+
+	/*
+	 * Copy xregs_state->header:
+	 */
+	offset = offsetof(struct xregs_state, header);
+	size = sizeof(header);
+
+	ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count);
+
+	if (ret)
+		return ret;
+
+	for (i = 0; i < XFEATURE_MAX; i++) {
+		/*
+		 * Copy only in-use xstates:
+		 */
+		if ((header.xfeatures >> i) & 1) {
+			void *src = __raw_xsave_addr(xsave, 1 << i);
+
+			offset = xstate_offsets[i];
+			size = xstate_sizes[i];
+
+			ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count);
+
+			if (ret)
+				return ret;
+
+			if (offset + size >= count)
+				break;
+		}
+
+	}
+
+	/*
+	 * Fill xsave->i387.sw_reserved value for ptrace frame:
+	 */
+	offset = offsetof(struct fxregs_state, sw_reserved);
+	size = sizeof(xstate_fx_sw_bytes);
+
+	ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
+
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/*
+ * Convert from a ptrace standard-format buffer to kernel XSAVES format
+ * and copy to the target thread. This is called from xstateregs_set() and
+ * there we check the CPU has XSAVES and a whole standard-sized buffer
+ * exists.
+ */
+int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
+		     struct xregs_state *xsave)
+{
+	unsigned int offset, size;
+	int i;
+	u64 xfeatures;
+	u64 allowed_features;
+
+	offset = offsetof(struct xregs_state, header);
+	size = sizeof(xfeatures);
+
+	if (kbuf) {
+		memcpy(&xfeatures, kbuf + offset, size);
+	} else {
+		if (__copy_from_user(&xfeatures, ubuf + offset, size))
+			return -EFAULT;
+	}
+
+	/*
+	 * Reject if the user sets any disabled or supervisor features:
+	 */
+	allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
+
+	if (xfeatures & ~allowed_features)
+		return -EINVAL;
+
+	for (i = 0; i < XFEATURE_MAX; i++) {
+		u64 mask = ((u64)1 << i);
+
+		if (xfeatures & mask) {
+			void *dst = __raw_xsave_addr(xsave, 1 << i);
+
+			offset = xstate_offsets[i];
+			size = xstate_sizes[i];
+
+			if (kbuf) {
+				memcpy(dst, kbuf + offset, size);
+			} else {
+				if (__copy_from_user(dst, ubuf + offset, size))
+					return -EFAULT;
+			}
+		}
+	}
+
+	/*
+	 * The state that came in from userspace was user-state only.
+	 * Mask all the user states out of 'xfeatures':
+	 */
+	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+
+	/*
+	 * Add back in the features that came in from userspace:
+	 */
+	xsave->header.xfeatures |= xfeatures;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 996952e0148026ac0e512db5cad26e14f4267e8b Mon Sep 17 00:00:00 2001
From: Yu-cheng Yu <yu-cheng.yu@intel.com>
Date: Fri, 17 Jun 2016 13:07:18 -0700
Subject: x86/fpu/xstate: Fix XSTATE component offset print out

Component offset print out was incorrect for XSAVES. Correct it and move
to a separate function.

Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/86602a8ac400626c6eca7125c3e15934866fc38e.1466179491.git.yu-cheng.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 56c0e707af21..09bac979b8a2 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -269,8 +269,6 @@ static void __init setup_xstate_features(void)
 		WARN_ONCE(last_good_offset > xstate_offsets[i],
 			"x86/fpu: misordered xstate at %d\n", last_good_offset);
 		last_good_offset = xstate_offsets[i];
-
-		printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", i, ebx, i, eax);
 	}
 }
 
@@ -373,6 +371,21 @@ static void __init setup_xstate_comp(void)
 	}
 }
 
+/*
+ * Print out xstate component offsets and sizes
+ */
+static void __init print_xstate_offset_size(void)
+{
+	int i;
+
+	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+		if (!xfeature_enabled(i))
+			continue;
+		pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
+			 i, xstate_comp_offsets[i], i, xstate_sizes[i]);
+	}
+}
+
 /*
  * setup the xstate image representing the init state
  */
@@ -707,6 +720,7 @@ void __init fpu__init_system_xstate(void)
 	fpu__init_prepare_fx_sw_frame();
 	setup_init_fpu_buf();
 	setup_xstate_comp();
+	print_xstate_offset_size();
 
 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
 		xfeatures_mask,
-- 
cgit v1.2.3


From ac73b27aea4eacdd7555f664d5fc6e1d4d1c8bf6 Mon Sep 17 00:00:00 2001
From: Yu-cheng Yu <yu-cheng.yu@intel.com>
Date: Fri, 17 Jun 2016 13:07:19 -0700
Subject: x86/fpu/xstate: Fix xstate_offsets, xstate_sizes for non-extended
 xstates

The arrays xstate_offsets[] and xstate_sizes[] record XSAVE standard-
format offsets and sizes. Values for non-extended state components
fpu and xmm's were not initialized or used. Ptrace format conversion
needs them. Fix it.

Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Ravi V. Shankar <ravi.v.shankar@intel.com>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/cf3ea36cf30e2a99e37da6483e65446d018ff0a7.1466179491.git.yu-cheng.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 09bac979b8a2..f8d1aff10f69 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -246,6 +246,15 @@ static void __init setup_xstate_features(void)
 	/* start at the beginnning of the "extended state" */
 	unsigned int last_good_offset = offsetof(struct xregs_state,
 						 extended_state_area);
+	/*
+	 * The FP xstates and SSE xstates are legacy states. They are always
+	 * in the fixed offsets in the xsave area in either compacted form
+	 * or standard form.
+	 */
+	xstate_offsets[0] = 0;
+	xstate_sizes[0] = offsetof(struct fxregs_state, xmm_space);
+	xstate_offsets[1] = xstate_sizes[0];
+	xstate_sizes[1] = FIELD_SIZEOF(struct fxregs_state, xmm_space);
 
 	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
 		if (!xfeature_enabled(i))
-- 
cgit v1.2.3


From 4ff5308744f5858e4e49e56a0445e2f8b73e47e0 Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Wed, 15 Jun 2016 12:05:45 -0700
Subject: x86/mm: Do not reference phys addr beyond kernel

The new physical address randomized KASLR implementation can cause the
kernel to be aligned close to the end of physical memory. In this case,
_brk_end aligned to PMD will go beyond what is expected safe and hit
the assert in __phys_addr_symbol():

	VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);

Instead, perform an inclusive range check to avoid incorrectly triggering
the assert:

	kernel BUG at arch/x86/mm/physaddr.c:38!
	invalid opcode: 0000 [#1] SMP
	...
	RIP: 0010:[<ffffffffbe055721>] __phys_addr_symbol+0x41/0x50
	...
	Call Trace:
	[<ffffffffbe052eb9>] cpa_process_alias+0xa9/0x210
	[<ffffffffbe109011>] ? do_raw_spin_unlock+0xc1/0x100
	[<ffffffffbe051eef>] __change_page_attr_set_clr+0x8cf/0xbd0
	[<ffffffffbe201a4d>] ? vm_unmap_aliases+0x7d/0x210
	[<ffffffffbe05237c>] change_page_attr_set_clr+0x18c/0x4e0
	[<ffffffffbe0534ec>] set_memory_4k+0x2c/0x40
	[<ffffffffbefb08b3>] check_bugs+0x28/0x2a
	[<ffffffffbefa4f52>] start_kernel+0x49d/0x4b9
	[<ffffffffbefa4120>] ? early_idt_handler_array+0x120/0x120
	[<ffffffffbefa4423>] x86_64_start_reservations+0x29/0x2b
	[<ffffffffbefa4568>] x86_64_start_kernel+0x143/0x152

Signed-off-by: Thomas Garnier <thgarnie@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sai Praneeth <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hpe.com>
Link: http://lkml.kernel.org/r/20160615190545.GA26071@www.outflux.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/pageattr.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7a1f7bbf4105..379b5111ac6b 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -101,7 +101,8 @@ static inline unsigned long highmap_start_pfn(void)
 
 static inline unsigned long highmap_end_pfn(void)
 {
-	return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
+	/* Do not reference physical address outside the kernel. */
+	return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT;
 }
 
 #endif
@@ -112,6 +113,12 @@ within(unsigned long addr, unsigned long start, unsigned long end)
 	return addr >= start && addr < end;
 }
 
+static inline int
+within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
+{
+	return addr >= start && addr <= end;
+}
+
 /*
  * Flushing functions
  */
@@ -1316,7 +1323,8 @@ static int cpa_process_alias(struct cpa_data *cpa)
 	 * to touch the high mapped kernel as well:
 	 */
 	if (!within(vaddr, (unsigned long)_text, _brk_end) &&
-	    within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) {
+	    within_inclusive(cpa->pfn, highmap_start_pfn(),
+			     highmap_end_pfn())) {
 		unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
 					       __START_KERNEL_map - phys_base;
 		alias_cpa = *cpa;
-- 
cgit v1.2.3


From 06a3fcc44d98d6b05afeeae2fbb32938dc3233c7 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 15 Jun 2016 15:04:20 +0300
Subject: x86/platform/intel-mid: Make vertical indentation consistent

The vertical indentation is kinda chaotic in intel-mid.h. Let's be
consistent with it.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1465992260-29897-1-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/intel-mid.h | 65 +++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 31 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 38498a4fb44f..59013a2ac713 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -42,11 +42,11 @@ struct devs_id {
 	void *(*get_platform_data)(void *info);
 	/* Custom handler for devices */
 	void (*device_handler)(struct sfi_device_table_entry *pentry,
-				struct devs_id *dev);
+			       struct devs_id *dev);
 };
 
-#define sfi_device(i)   \
-	static const struct devs_id *const __intel_mid_sfi_##i##_dev __used \
+#define sfi_device(i)								\
+	static const struct devs_id *const __intel_mid_sfi_##i##_dev __used	\
 	__attribute__((__section__(".x86_intel_mid_dev.init"))) = &i
 
 /*
@@ -68,7 +68,7 @@ extern enum intel_mid_cpu_type __intel_mid_cpu_chip;
 /**
  * struct intel_mid_ops - Interface between intel-mid & sub archs
  * @arch_setup: arch_setup function to re-initialize platform
- *             structures (x86_init, x86_platform_init)
+ *		structures (x86_init, x86_platform_init)
  *
  * This structure can be extended if any new interface is required
  * between intel-mid & its sub arch files.
@@ -78,20 +78,20 @@ struct intel_mid_ops {
 };
 
 /* Helper API's for INTEL_MID_OPS_INIT */
-#define DECLARE_INTEL_MID_OPS_INIT(cpuname, cpuid)	\
-				[cpuid] = get_##cpuname##_ops
+#define DECLARE_INTEL_MID_OPS_INIT(cpuname, cpuid)				\
+	[cpuid] = get_##cpuname##_ops
 
 /* Maximum number of CPU ops */
-#define MAX_CPU_OPS(a) (sizeof(a)/sizeof(void *))
+#define MAX_CPU_OPS(a)			(sizeof(a)/sizeof(void *))
 
 /*
  * For every new cpu addition, a weak get_<cpuname>_ops() function needs be
  * declared in arch/x86/platform/intel_mid/intel_mid_weak_decls.h.
  */
-#define INTEL_MID_OPS_INIT {\
-	DECLARE_INTEL_MID_OPS_INIT(penwell, INTEL_MID_CPU_CHIP_PENWELL), \
-	DECLARE_INTEL_MID_OPS_INIT(cloverview, INTEL_MID_CPU_CHIP_CLOVERVIEW), \
-	DECLARE_INTEL_MID_OPS_INIT(tangier, INTEL_MID_CPU_CHIP_TANGIER) \
+#define INTEL_MID_OPS_INIT {							\
+	DECLARE_INTEL_MID_OPS_INIT(penwell, INTEL_MID_CPU_CHIP_PENWELL),	\
+	DECLARE_INTEL_MID_OPS_INIT(cloverview, INTEL_MID_CPU_CHIP_CLOVERVIEW),	\
+	DECLARE_INTEL_MID_OPS_INIT(tangier, INTEL_MID_CPU_CHIP_TANGIER)		\
 };
 
 #ifdef CONFIG_X86_INTEL_MID
@@ -108,8 +108,8 @@ static inline bool intel_mid_has_msic(void)
 
 #else /* !CONFIG_X86_INTEL_MID */
 
-#define intel_mid_identify_cpu()    (0)
-#define intel_mid_has_msic()    (0)
+#define intel_mid_identify_cpu()	0
+#define intel_mid_has_msic()		0
 
 #endif /* !CONFIG_X86_INTEL_MID */
 
@@ -125,35 +125,38 @@ extern enum intel_mid_timer_options intel_mid_timer_options;
  * Penwell uses spread spectrum clock, so the freq number is not exactly
  * the same as reported by MSR based on SDM.
  */
-#define FSB_FREQ_83SKU	83200
-#define FSB_FREQ_100SKU	99840
-#define FSB_FREQ_133SKU	133000
+#define FSB_FREQ_83SKU			83200
+#define FSB_FREQ_100SKU			99840
+#define FSB_FREQ_133SKU			133000
 
-#define FSB_FREQ_167SKU	167000
-#define FSB_FREQ_200SKU	200000
-#define FSB_FREQ_267SKU	267000
-#define FSB_FREQ_333SKU	333000
-#define FSB_FREQ_400SKU	400000
+#define FSB_FREQ_167SKU			167000
+#define FSB_FREQ_200SKU			200000
+#define FSB_FREQ_267SKU			267000
+#define FSB_FREQ_333SKU			333000
+#define FSB_FREQ_400SKU			400000
 
 /* Bus Select SoC Fuse value */
-#define BSEL_SOC_FUSE_MASK	0x7
-#define BSEL_SOC_FUSE_001	0x1 /* FSB 133MHz */
-#define BSEL_SOC_FUSE_101	0x5 /* FSB 100MHz */
-#define BSEL_SOC_FUSE_111	0x7 /* FSB 83MHz */
+#define BSEL_SOC_FUSE_MASK		0x7
+/* FSB 133MHz */
+#define BSEL_SOC_FUSE_001		0x1
+/* FSB 100MHz */
+#define BSEL_SOC_FUSE_101		0x5
+/* FSB 83MHz */
+#define BSEL_SOC_FUSE_111		0x7
 
-#define SFI_MTMR_MAX_NUM 8
-#define SFI_MRTC_MAX	8
+#define SFI_MTMR_MAX_NUM		8
+#define SFI_MRTC_MAX			8
 
 extern void intel_scu_devices_create(void);
 extern void intel_scu_devices_destroy(void);
 
 /* VRTC timer */
-#define MRST_VRTC_MAP_SZ	(1024)
-/*#define MRST_VRTC_PGOFFSET	(0xc00) */
+#define MRST_VRTC_MAP_SZ		1024
+/* #define MRST_VRTC_PGOFFSET		0xc00 */
 
 extern void intel_mid_rtc_init(void);
 
-/* the offset for the mapping of global gpio pin to irq */
-#define INTEL_MID_IRQ_OFFSET 0x100
+/* The offset for the mapping of global gpio pin to irq */
+#define INTEL_MID_IRQ_OFFSET		0x100
 
 #endif /* _ASM_X86_INTEL_MID_H */
-- 
cgit v1.2.3


From eb019503569c8c701f1e9c70e848d99c6680839b Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@oracle.com>
Date: Sun, 10 Jul 2016 19:14:01 +0200
Subject: perf/x86: Fix bogus kernel printk, again

This showed up as "6Failed to access..." here.

Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Chen Yucong <slaoub@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 1b74dde7c47c ("x86/cpu: Convert printk(KERN_<LEVEL> ...) to pr_<level>(...)")
Link: http://lkml.kernel.org/r/1468170841-17045-1-git-send-email-vegard.nossum@oracle.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 26ced536005a..91eac39625be 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -263,7 +263,7 @@ static bool check_hw_exists(void)
 
 msr_fail:
 	pr_cont("Broken PMU hardware detected, using software events only.\n");
-	pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
+	printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
 		boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
 		reg, val_new);
 
-- 
cgit v1.2.3


From 447d29d1d3aed839e74c2401ef63387780ac51ed Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sun, 12 Jun 2016 12:31:53 +0200
Subject: x86/quirks: Apply nvidia_bugs quirk only on root bus

Since the following commit:

  8659c406ade3 ("x86: only scan the root bus in early PCI quirks")

... early quirks are only applied to devices on the root bus.

The motivation was to prevent application of the nvidia_bugs quirk on
secondary buses.

We're about to reintroduce scanning of secondary buses for a quirk to
reset the Broadcom 4331 wireless card on 2011/2012 Macs. To prevent
regressions, open code the requirement to apply nvidia_bugs only on the
root bus.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Link: http://lkml.kernel.org/r/4d5477c1d76b2f0387a780f2142bbcdd9fee869b.1465690253.git.lukas@wunner.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/early-quirks.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index bca14c899137..256976fe2666 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -75,6 +75,13 @@ static void __init nvidia_bugs(int num, int slot, int func)
 {
 #ifdef CONFIG_ACPI
 #ifdef CONFIG_X86_IO_APIC
+	/*
+	 * Only applies to Nvidia root ports (bus 0) and not to
+	 * Nvidia graphics cards with PCI ports on secondary buses.
+	 */
+	if (num)
+		return;
+
 	/*
 	 * All timer overrides on Nvidia are
 	 * wrong unless HPET is enabled.
-- 
cgit v1.2.3


From 850c321027c2e31d0afc71588974719a4b565550 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sun, 12 Jun 2016 12:31:53 +0200
Subject: x86/quirks: Reintroduce scanning of secondary buses

We used to scan secondary buses until the following commit that
was applied in 2009:

  8659c406ade3 ("x86: only scan the root bus in early PCI quirks")

which commit constrained early quirks to the root bus only. Its
motivation was to prevent application of the nvidia_bugs quirk
on secondary buses.

We're about to add a quirk to reset the Broadcom 4331 wireless card on
2011/2012 Macs, which is located on a secondary bus behind a PCIe root
port. To facilitate that, reintroduce scanning of secondary buses.

The commit message of 8659c406ade3 notes that scanning only the root bus
"saves quite some unnecessary scanning work". The algorithm used prior
to 8659c406ade3 was particularly time consuming because it scanned
buses 0 to 31 brute force. To avoid lengthening boot time, employ a
recursive strategy which only scans buses that are actually reachable
from the root bus.

Yinghai Lu pointed out that the secondary bus number read from a
bridge's config space may be invalid, in particular a value of 0 would
cause an infinite loop. The PCI core goes beyond that and recurses to a
child bus only if its bus number is greater than the parent bus number
(see pci_scan_bridge()). Since the root bus is numbered 0, this implies
that secondary buses may not be 0. Do the same on early scanning.

If this algorithm is found to significantly impact boot time or cause
infinite loops on broken hardware, it would be possible to limit its
recursion depth: The Broadcom 4331 quirk applies at depth 1, all others
at depth 0, so the bus need not be scanned deeper than that for now. An
alternative approach would be to revert to scanning only the root bus,
and apply the Broadcom 4331 quirk to the root ports 8086:1c12, 8086:1e12
and 8086:1e16. Apple always positioned the card behind either of these
three ports. The quirk would then check presence of the card in slot 0
below the root port and do its deed.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: linux-pci@vger.kernel.org
Link: http://lkml.kernel.org/r/f0daa70dac1a9b2483abdb31887173eb6ab77bdf.1465690253.git.lukas@wunner.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/early-quirks.c | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 256976fe2666..ea60c05c2487 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -610,12 +610,6 @@ struct chipset {
 	void (*f)(int num, int slot, int func);
 };
 
-/*
- * Only works for devices on the root bus. If you add any devices
- * not on bus 0 readd another loop level in early_quirks(). But
- * be careful because at least the Nvidia quirk here relies on
- * only matching on bus 0.
- */
 static struct chipset early_qrk[] __initdata = {
 	{ PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
 	  PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, nvidia_bugs },
@@ -648,6 +642,8 @@ static struct chipset early_qrk[] __initdata = {
 	{}
 };
 
+static void __init early_pci_scan_bus(int bus);
+
 /**
  * check_dev_quirk - apply early quirks to a given PCI device
  * @num: bus number
@@ -656,7 +652,7 @@ static struct chipset early_qrk[] __initdata = {
  *
  * Check the vendor & device ID against the early quirks table.
  *
- * If the device is single function, let early_quirks() know so we don't
+ * If the device is single function, let early_pci_scan_bus() know so we don't
  * poke at this device again.
  */
 static int __init check_dev_quirk(int num, int slot, int func)
@@ -665,6 +661,7 @@ static int __init check_dev_quirk(int num, int slot, int func)
 	u16 vendor;
 	u16 device;
 	u8 type;
+	u8 sec;
 	int i;
 
 	class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE);
@@ -692,25 +689,36 @@ static int __init check_dev_quirk(int num, int slot, int func)
 
 	type = read_pci_config_byte(num, slot, func,
 				    PCI_HEADER_TYPE);
+
+	if ((type & 0x7f) == PCI_HEADER_TYPE_BRIDGE) {
+		sec = read_pci_config_byte(num, slot, func, PCI_SECONDARY_BUS);
+		if (sec > num)
+			early_pci_scan_bus(sec);
+	}
+
 	if (!(type & 0x80))
 		return -1;
 
 	return 0;
 }
 
-void __init early_quirks(void)
+static void __init early_pci_scan_bus(int bus)
 {
 	int slot, func;
 
-	if (!early_pci_allowed())
-		return;
-
 	/* Poor man's PCI discovery */
-	/* Only scan the root bus */
 	for (slot = 0; slot < 32; slot++)
 		for (func = 0; func < 8; func++) {
 			/* Only probe function 0 on single fn devices */
-			if (check_dev_quirk(0, slot, func))
+			if (check_dev_quirk(bus, slot, func))
 				break;
 		}
 }
+
+void __init early_quirks(void)
+{
+	if (!early_pci_allowed())
+		return;
+
+	early_pci_scan_bus(0);
+}
-- 
cgit v1.2.3


From abb2bafd295fe962bbadc329dbfb2146457283ac Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sun, 12 Jun 2016 12:31:53 +0200
Subject: x86/quirks: Add early quirk to reset Apple AirPort card
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The EFI firmware on Macs contains a full-fledged network stack for
downloading OS X images from osrecovery.apple.com. Unfortunately
on Macs introduced 2011 and 2012, EFI brings up the Broadcom 4331
wireless card on every boot and leaves it enabled even after
ExitBootServices has been called. The card continues to assert its IRQ
line, causing spurious interrupts if the IRQ is shared. It also corrupts
memory by DMAing received packets, allowing for remote code execution
over the air. This only stops when a driver is loaded for the wireless
card, which may be never if the driver is not installed or blacklisted.

The issue seems to be constrained to the Broadcom 4331. Chris Milsted
has verified that the newer Broadcom 4360 built into the MacBookPro11,3
(2013/2014) does not exhibit this behaviour. The chances that Apple will
ever supply a firmware fix for the older machines appear to be zero.

The solution is to reset the card on boot by writing to a reset bit in
its mmio space. This must be done as an early quirk and not as a plain
vanilla PCI quirk to successfully combat memory corruption by DMAed
packets: Matthew Garrett found out in 2012 that the packets are written
to EfiBootServicesData memory (http://mjg59.dreamwidth.org/11235.html).
This type of memory is made available to the page allocator by
efi_free_boot_services(). Plain vanilla PCI quirks run much later, in
subsys initcall level. In-between a time window would be open for memory
corruption. Random crashes occurring in this time window and attributed
to DMAed packets have indeed been observed in the wild by Chris
Bainbridge.

When Matthew Garrett analyzed the memory corruption issue in 2012, he
sought to fix it with a grub quirk which transitions the card to D3hot:
http://git.savannah.gnu.org/cgit/grub.git/commit/?id=9d34bb85da56

This approach does not help users with other bootloaders and while it
may prevent DMAed packets, it does not cure the spurious interrupts
emanating from the card. Unfortunately the card's mmio space is
inaccessible in D3hot, so to reset it, we have to undo the effect of
Matthew's grub patch and transition the card back to D0.

Note that the quirk takes a few shortcuts to reduce the amount of code:
The size of BAR 0 and the location of the PM capability is identical
on all affected machines and therefore hardcoded. Only the address of
BAR 0 differs between models. Also, it is assumed that the BCMA core
currently mapped is the 802.11 core. The EFI driver seems to always take
care of this.

Michael Büsch, Bjorn Helgaas and Matt Fleming contributed feedback
towards finding the best solution to this problem.

The following should be a comprehensive list of affected models:
    iMac13,1        2012  21.5"       [Root Port 00:1c.3 = 8086:1e16]
    iMac13,2        2012  27"         [Root Port 00:1c.3 = 8086:1e16]
    Macmini5,1      2011  i5 2.3 GHz  [Root Port 00:1c.1 = 8086:1c12]
    Macmini5,2      2011  i5 2.5 GHz  [Root Port 00:1c.1 = 8086:1c12]
    Macmini5,3      2011  i7 2.0 GHz  [Root Port 00:1c.1 = 8086:1c12]
    Macmini6,1      2012  i5 2.5 GHz  [Root Port 00:1c.1 = 8086:1e12]
    Macmini6,2      2012  i7 2.3 GHz  [Root Port 00:1c.1 = 8086:1e12]
    MacBookPro8,1   2011  13"         [Root Port 00:1c.1 = 8086:1c12]
    MacBookPro8,2   2011  15"         [Root Port 00:1c.1 = 8086:1c12]
    MacBookPro8,3   2011  17"         [Root Port 00:1c.1 = 8086:1c12]
    MacBookPro9,1   2012  15"         [Root Port 00:1c.1 = 8086:1e12]
    MacBookPro9,2   2012  13"         [Root Port 00:1c.1 = 8086:1e12]
    MacBookPro10,1  2012  15"         [Root Port 00:1c.1 = 8086:1e12]
    MacBookPro10,2  2012  13"         [Root Port 00:1c.1 = 8086:1e12]

For posterity, spurious interrupts caused by the Broadcom 4331 wireless
card resulted in splats like this (stacktrace omitted):

    irq 17: nobody cared (try booting with the "irqpoll" option)
    handlers:
    [<ffffffff81374370>] pcie_isr
    [<ffffffffc0704550>] sdhci_irq [sdhci] threaded [<ffffffffc07013c0>] sdhci_thread_irq [sdhci]
    [<ffffffffc0a0b960>] azx_interrupt [snd_hda_codec]
    Disabling IRQ #17

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=79301
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111781
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=728916
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=895951#c16
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1009819
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1098621
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1149632#c5
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1279130
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1332732
Tested-by: Konstantin Simanov <k.simanov@stlk.ru>        # [MacBookPro8,1]
Tested-by: Lukas Wunner <lukas@wunner.de>                # [MacBookPro9,1]
Tested-by: Bryan Paradis <bryan.paradis@gmail.com>       # [MacBookPro9,2]
Tested-by: Andrew Worsley <amworsley@gmail.com>          # [MacBookPro10,1]
Tested-by: Chris Bainbridge <chris.bainbridge@gmail.com> # [MacBookPro10,2]
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Acked-by: Rafał Miłecki <zajec5@gmail.com>
Acked-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Chris Milsted <cmilsted@redhat.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Michael Buesch <m@bues.ch>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: b43-dev@lists.infradead.org
Cc: linux-pci@vger.kernel.org
Cc: linux-wireless@vger.kernel.org
Cc: stable@vger.kernel.org
Cc: stable@vger.kernel.org # 123456789abc: x86/quirks: Apply nvidia_bugs quirk only on root bus
Cc: stable@vger.kernel.org # 123456789abc: x86/quirks: Reintroduce scanning of secondary buses
Link: http://lkml.kernel.org/r/48d0972ac82a53d460e5fce77a07b2560db95203.1465690253.git.lukas@wunner.de
[ Did minor readability edits. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/early-quirks.c | 64 ++++++++++++++++++++++++++++++++++++++++++
 drivers/bcma/bcma_private.h    |  2 --
 include/linux/bcma/bcma.h      |  1 +
 3 files changed, 65 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index ea60c05c2487..57b71373bae3 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -11,7 +11,11 @@
 
 #include <linux/pci.h>
 #include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/dmi.h>
 #include <linux/pci_ids.h>
+#include <linux/bcma/bcma.h>
+#include <linux/bcma/bcma_regs.h>
 #include <drm/i915_drm.h>
 #include <asm/pci-direct.h>
 #include <asm/dma.h>
@@ -21,6 +25,9 @@
 #include <asm/iommu.h>
 #include <asm/gart.h>
 #include <asm/irq_remapping.h>
+#include <asm/early_ioremap.h>
+
+#define dev_err(msg)  pr_err("pci 0000:%02x:%02x.%d: %s", bus, slot, func, msg)
 
 static void __init fix_hypertransport_config(int num, int slot, int func)
 {
@@ -597,6 +604,61 @@ static void __init force_disable_hpet(int num, int slot, int func)
 #endif
 }
 
+#define BCM4331_MMIO_SIZE	16384
+#define BCM4331_PM_CAP		0x40
+#define bcma_aread32(reg)	ioread32(mmio + 1 * BCMA_CORE_SIZE + reg)
+#define bcma_awrite32(reg, val)	iowrite32(val, mmio + 1 * BCMA_CORE_SIZE + reg)
+
+static void __init apple_airport_reset(int bus, int slot, int func)
+{
+	void __iomem *mmio;
+	u16 pmcsr;
+	u64 addr;
+	int i;
+
+	if (!dmi_match(DMI_SYS_VENDOR, "Apple Inc."))
+		return;
+
+	/* Card may have been put into PCI_D3hot by grub quirk */
+	pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL);
+
+	if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) {
+		pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+		write_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL, pmcsr);
+		mdelay(10);
+
+		pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL);
+		if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) {
+			dev_err("Cannot power up Apple AirPort card\n");
+			return;
+		}
+	}
+
+	addr  =      read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
+	addr |= (u64)read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_1) << 32;
+	addr &= PCI_BASE_ADDRESS_MEM_MASK;
+
+	mmio = early_ioremap(addr, BCM4331_MMIO_SIZE);
+	if (!mmio) {
+		dev_err("Cannot iomap Apple AirPort card\n");
+		return;
+	}
+
+	pr_info("Resetting Apple AirPort card (left enabled by EFI)\n");
+
+	for (i = 0; bcma_aread32(BCMA_RESET_ST) && i < 30; i++)
+		udelay(10);
+
+	bcma_awrite32(BCMA_RESET_CTL, BCMA_RESET_CTL_RESET);
+	bcma_aread32(BCMA_RESET_CTL);
+	udelay(1);
+
+	bcma_awrite32(BCMA_RESET_CTL, 0);
+	bcma_aread32(BCMA_RESET_CTL);
+	udelay(10);
+
+	early_iounmap(mmio, BCM4331_MMIO_SIZE);
+}
 
 #define QFLAG_APPLY_ONCE 	0x1
 #define QFLAG_APPLIED		0x2
@@ -639,6 +701,8 @@ static struct chipset early_qrk[] __initdata = {
 	 */
 	{ PCI_VENDOR_ID_INTEL, 0x0f00,
 		PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
+	{ PCI_VENDOR_ID_BROADCOM, 0x4331,
+	  PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
 	{}
 };
 
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index eda09090cb52..f642c4264c27 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -8,8 +8,6 @@
 #include <linux/bcma/bcma.h>
 #include <linux/delay.h>
 
-#define BCMA_CORE_SIZE		0x1000
-
 #define bcma_err(bus, fmt, ...) \
 	pr_err("bus%d: " fmt, (bus)->num, ##__VA_ARGS__)
 #define bcma_warn(bus, fmt, ...) \
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index e6b41f42602b..3db25df396cb 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -159,6 +159,7 @@ struct bcma_host_ops {
 #define BCMA_CORE_DEFAULT		0xFFF
 
 #define BCMA_MAX_NR_CORES		16
+#define BCMA_CORE_SIZE			0x1000
 
 /* Chip IDs of PCIe devices */
 #define BCMA_CHIP_ID_BCM4313	0x4313
-- 
cgit v1.2.3


From 44530d588e142a96cf0cd345a7cb8911c4f88720 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 10 Jul 2016 20:58:36 +0200
Subject: Revert "perf/x86/intel, watchdog: Switch NMI watchdog to ref cycles
 on x86"

This reverts commit 2c95afc1e83d93fac3be6923465e1753c2c53b0a.

Stephane reported the following regression:

 > Since Andi added:
 >
 > commit 2c95afc1e83d93fac3be6923465e1753c2c53b0a
 > Author: Andi Kleen <ak@linux.intel.com>
 > Date:   Thu Jun 9 06:14:38 2016 -0700
 >
 >    perf/x86/intel, watchdog: Switch NMI watchdog to ref cycles on x86
 >
 > $ perf stat -e ref-cycles ls
 >   <not counted> ....
 >
 > fails systematically because the ref-cycles is now used by the
 > watchdog and given this is a system-wide pinned event, it monopolizes
 > the fixed counter 2 which is the only counter able to measure this event.

Since the next merge window is near, fix the regression for now
by reverting the commit.

Reported-by: Stephane Eranian <eranian@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/hw_nmi.c | 8 --------
 include/linux/nmi.h           | 1 -
 kernel/watchdog.c             | 7 -------
 3 files changed, 16 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 016f4263fad4..7788ce643bf4 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -18,16 +18,8 @@
 #include <linux/nmi.h>
 #include <linux/module.h>
 #include <linux/delay.h>
-#include <linux/perf_event.h>
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
-int hw_nmi_get_event(void)
-{
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		return PERF_COUNT_HW_REF_CPU_CYCLES;
-	return PERF_COUNT_HW_CPU_CYCLES;
-}
-
 u64 hw_nmi_get_sample_period(int watchdog_thresh)
 {
 	return (u64)(cpu_khz) * 1000 * watchdog_thresh;
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 79858af27209..4630eeae18e0 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -66,7 +66,6 @@ static inline bool trigger_allbutself_cpu_backtrace(void)
 
 #ifdef CONFIG_LOCKUP_DETECTOR
 u64 hw_nmi_get_sample_period(int watchdog_thresh);
-int hw_nmi_get_event(void);
 extern int nmi_watchdog_enabled;
 extern int soft_watchdog_enabled;
 extern int watchdog_user_enabled;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 8dd30fcd91be..9acb29f280ec 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -315,12 +315,6 @@ static int is_softlockup(unsigned long touch_ts)
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 
-/* Can be overriden by architecture */
-__weak int hw_nmi_get_event(void)
-{
-	return PERF_COUNT_HW_CPU_CYCLES;
-}
-
 static struct perf_event_attr wd_hw_attr = {
 	.type		= PERF_TYPE_HARDWARE,
 	.config		= PERF_COUNT_HW_CPU_CYCLES,
@@ -610,7 +604,6 @@ static int watchdog_nmi_enable(unsigned int cpu)
 
 	wd_attr = &wd_hw_attr;
 	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
-	wd_attr->config = hw_nmi_get_event();
 
 	/* Try to register using hardware perf events */
 	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
-- 
cgit v1.2.3


From 1fc2b67b43d5001b92b3a002b988884ad0137e99 Mon Sep 17 00:00:00 2001
From: Yu-cheng Yu <yu-cheng.yu@intel.com>
Date: Mon, 11 Jul 2016 09:18:54 -0700
Subject: x86/fpu/xstate: Fix __fpu_restore_sig() for XSAVES

When the kernel is using XSAVES compacted format, we cannot do
__copy_from_user() from a signal frame, which has standard-format data.
Fix it by using copyin_to_xsaves(), which converts between formats and
filters out all supervisor states that we do not allow userspace to
write.

Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: H. Peter Anvin <h.peter.anvin@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi V Shankar <ravi.v.shankar@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1468253937-40008-2-git-send-email-fenghua.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/signal.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 8aa96cbb5dfb..9e231d88bb33 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -323,8 +323,15 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		 */
 		fpu__drop(fpu);
 
-		if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) ||
-		    __copy_from_user(&env, buf, sizeof(env))) {
+		if (using_compacted_format()) {
+			err = copyin_to_xsaves(NULL, buf_fx,
+					       &fpu->state.xsave);
+		} else {
+			err = __copy_from_user(&fpu->state.xsave,
+					       buf_fx, state_size);
+		}
+
+		if (err || __copy_from_user(&env, buf, sizeof(env))) {
 			fpstate_init(&fpu->state);
 			trace_x86_fpu_init_state(fpu);
 			err = -1;
-- 
cgit v1.2.3


From 5060b91513b866f774da15dfd82157864c4b1683 Mon Sep 17 00:00:00 2001
From: Yu-cheng Yu <yu-cheng.yu@intel.com>
Date: Mon, 11 Jul 2016 09:18:55 -0700
Subject: x86/fpu/xstate: Return NULL for disabled xstate component address

It is an error to request a disabled XSAVE/XSAVES component address.
For that case, make __raw_xsave_addr() return a NULL and issue a
warning.

Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: H. Peter Anvin <h.peter.anvin@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi V Shankar <ravi.v.shankar@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1468253937-40008-3-git-send-email-fenghua.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index f8d1aff10f69..4fb8dd7697c5 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -760,6 +760,11 @@ void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask)
 {
 	int feature_nr = fls64(xstate_feature_mask) - 1;
 
+	if (!xfeature_enabled(feature_nr)) {
+		WARN_ON_FPU(1);
+		return NULL;
+	}
+
 	return (void *)xsave + xstate_comp_offsets[feature_nr];
 }
 /*
-- 
cgit v1.2.3


From 35ac2d7ba787eb4b7418a5a6f5919c25e10a780a Mon Sep 17 00:00:00 2001
From: Yu-cheng Yu <yu-cheng.yu@intel.com>
Date: Mon, 11 Jul 2016 09:18:56 -0700
Subject: x86/fpu/xstate: Fix fpstate_init() for XRSTORS

In XSAVES mode if fpstate_init() is used to initialize a
task's extended state area, xsave.header.xcomp_bv[63] must
be set. Otherwise, when the task is scheduled, a warning is
triggered from copy_kernel_to_xregs().

One such test case is: setting an invalid extended state
through PTRACE. When xstateregs_set() rejects the syscall
and re-initializes the task's extended state area. This triggers
the warning mentioned above.

Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: H. Peter Anvin <h.peter.anvin@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi V Shankar <ravi.v.shankar@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1468253937-40008-4-git-send-email-fenghua.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/types.h | 6 ++++++
 arch/x86/kernel/fpu/core.c       | 8 ++++++++
 2 files changed, 14 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 12dd648735b6..48df486b02f9 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -231,6 +231,12 @@ struct xstate_header {
 	u64				reserved[6];
 } __attribute__((packed));
 
+/*
+ * xstate_header.xcomp_bv[63] indicates that the extended_state_area
+ * is in compacted format.
+ */
+#define XCOMP_BV_COMPACTED_FORMAT ((u64)1 << 63)
+
 /*
  * This is our most modern FPU state format, as saved by the XSAVE
  * and restored by the XRSTOR instructions.
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index c759bd01ec99..3fc03a09a93b 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -8,6 +8,7 @@
 #include <asm/fpu/internal.h>
 #include <asm/fpu/regset.h>
 #include <asm/fpu/signal.h>
+#include <asm/fpu/types.h>
 #include <asm/traps.h>
 
 #include <linux/hardirq.h>
@@ -229,6 +230,13 @@ void fpstate_init(union fpregs_state *state)
 
 	memset(state, 0, fpu_kernel_xstate_size);
 
+	/*
+	 * XRSTORS requires that this bit is set in xcomp_bv, or
+	 * it will #GP. Make sure it is replaced after the memset().
+	 */
+	if (static_cpu_has(X86_FEATURE_XSAVES))
+		state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT;
+
 	if (static_cpu_has(X86_FEATURE_FXSR))
 		fpstate_init_fxstate(&state->fxsave);
 	else
-- 
cgit v1.2.3


From b8be15d588060a03569ac85dc4a0247460988f5b Mon Sep 17 00:00:00 2001
From: Yu-cheng Yu <yu-cheng.yu@intel.com>
Date: Mon, 11 Jul 2016 09:18:57 -0700
Subject: x86/fpu/xstate: Re-enable XSAVES

We did not handle XSAVES instructions correctly. There were issues in
converting between standard and compacted format when interfacing with
user-space. These issues have been corrected.

Add a WARN_ONCE() to make it clear that XSAVES supervisor states are not
yet implemented.

Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: H. Peter Anvin <h.peter.anvin@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi V Shankar <ravi.v.shankar@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1468253937-40008-5-git-send-email-fenghua.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/init.c   | 15 ---------------
 arch/x86/kernel/fpu/xstate.c |  9 +++++++++
 2 files changed, 9 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 60f3839c5bfa..93982aebb398 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -230,21 +230,6 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 	}
 
 	fpu_user_xstate_size = fpu_kernel_xstate_size;
-
-	/*
-	 * Quirk: we don't yet handle the XSAVES* instructions
-	 * correctly, as we don't correctly convert between
-	 * standard and compacted format when interfacing
-	 * with user-space - so disable it for now.
-	 *
-	 * The difference is small: with recent CPUs the
-	 * compacted format is only marginally smaller than
-	 * the standard FPU state format.
-	 *
-	 * ( This is easy to backport while we are fixing
-	 *   XSAVES* support. )
-	 */
-	setup_clear_cpu_cap(X86_FEATURE_XSAVES);
 }
 
 /*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 4fb8dd7697c5..3169bcaf9391 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -221,6 +221,15 @@ void fpu__init_cpu_xstate(void)
 {
 	if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
 		return;
+	/*
+	 * Make it clear that XSAVES supervisor states are not yet
+	 * implemented should anyone expect it to work by changing
+	 * bits in XFEATURE_MASK_* macros and XCR0.
+	 */
+	WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR),
+		"x86/fpu: XSAVES supervisor states are not yet implemented.\n");
+
+	xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
 
 	cr4_set_bits(X86_CR4_OSXSAVE);
 	xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
-- 
cgit v1.2.3


From 02c0cd2dcf7fdc47d054b855b148ea8b82dbb7eb Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 17 Jun 2016 01:22:50 -0400
Subject: x86/tsc_msr: Remove irqoff around MSR-based TSC enumeration

Remove the irqoff/irqon around MSR-based TSC enumeration,
as it is not necessary.

Also rename: try_msr_calibrate_tsc() to cpu_khz_from_msr(),
as that better describes what the routine does.

Signed-off-by: Len Brown <len.brown@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/a6b5c3ecd3b068175d2309599ab28163fc34215e.1466138954.git.len.brown@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/tsc.h | 3 +--
 arch/x86/kernel/tsc.c      | 5 +----
 arch/x86/kernel/tsc_msr.c  | 2 +-
 3 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 7428697c5b8d..db1f779a3766 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -52,7 +52,6 @@ extern int notsc_setup(char *);
 extern void tsc_save_sched_clock_state(void);
 extern void tsc_restore_sched_clock_state(void);
 
-/* MSR based TSC calibration for Intel Atom SoC platforms */
-unsigned long try_msr_calibrate_tsc(void);
+unsigned long cpu_khz_from_msr(void);
 
 #endif /* _ASM_X86_TSC_H */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 38ba6de56ede..35a3976c19cc 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -674,10 +674,7 @@ unsigned long native_calibrate_tsc(void)
 	unsigned long flags, latch, ms, fast_calibrate;
 	int hpet = is_hpet_enabled(), i, loopmin;
 
-	/* Calibrate TSC using MSR for Intel Atom SoCs */
-	local_irq_save(flags);
-	fast_calibrate = try_msr_calibrate_tsc();
-	local_irq_restore(flags);
+	fast_calibrate = cpu_khz_from_msr();
 	if (fast_calibrate)
 		return fast_calibrate;
 
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 65b3d8cb8325..0fe720d64fef 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -68,7 +68,7 @@ static int match_cpu(u8 family, u8 model)
  * Set global "lapic_timer_frequency" to bus_clock_cycles/jiffy
  * Return processor base frequency in KHz, or 0 on failure.
  */
-unsigned long try_msr_calibrate_tsc(void)
+unsigned long cpu_khz_from_msr(void)
 {
 	u32 lo, hi, ratio, freq_id, freq;
 	unsigned long res;
-- 
cgit v1.2.3


From aa297292d708e89773b3b2cdcaf33f01bfa095d8 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 17 Jun 2016 01:22:51 -0400
Subject: x86/tsc: Enumerate SKL cpu_khz and tsc_khz via CPUID

Skylake CPU base-frequency and TSC frequency may differ
by up to 2%.

Enumerate CPU and TSC frequencies separately, allowing
cpu_khz and tsc_khz to differ.

The existing CPU frequency calibration mechanism is unchanged.
However, CPUID extensions are preferred, when available.

CPUID.0x16 is preferred over MSR and timer calibration
for CPU frequency discovery.

CPUID.0x15 takes precedence over CPU-frequency
for TSC frequency discovery.

Signed-off-by: Len Brown <len.brown@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/b27ec289fd005833b27d694d9c2dbb716c5cdff7.1466138954.git.len.brown@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/tsc.h      |  1 +
 arch/x86/include/asm/x86_init.h |  4 ++-
 arch/x86/kernel/tsc.c           | 75 +++++++++++++++++++++++++++++++++++++----
 arch/x86/kernel/x86_init.c      |  1 +
 4 files changed, 73 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index db1f779a3766..a30591e1567c 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -36,6 +36,7 @@ extern void mark_tsc_unstable(char *reason);
 extern int unsynchronized_tsc(void);
 extern int check_tsc_unstable(void);
 extern int check_tsc_disabled(void);
+extern unsigned long native_calibrate_cpu(void);
 extern unsigned long native_calibrate_tsc(void);
 extern unsigned long long native_sched_clock_from_tsc(u64 tsc);
 
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 4dcdf74dfed8..08a08a800e17 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -181,7 +181,8 @@ struct x86_legacy_features {
 
 /**
  * struct x86_platform_ops - platform specific runtime functions
- * @calibrate_tsc:		calibrate TSC
+ * @calibrate_cpu:		calibrate CPU
+ * @calibrate_tsc:		calibrate TSC, if different from CPU
  * @get_wallclock:		get time from HW clock like RTC etc.
  * @set_wallclock:		set time back to HW clock
  * @is_untracked_pat_range	exclude from PAT logic
@@ -200,6 +201,7 @@ struct x86_legacy_features {
  * 				semantics.
  */
 struct x86_platform_ops {
+	unsigned long (*calibrate_cpu)(void);
 	unsigned long (*calibrate_tsc)(void);
 	void (*get_wallclock)(struct timespec *ts);
 	int (*set_wallclock)(const struct timespec *ts);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 35a3976c19cc..e1496b79c28a 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -239,7 +239,7 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 	return ns;
 }
 
-static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
+static void set_cyc2ns_scale(unsigned long khz, int cpu)
 {
 	unsigned long long tsc_now, ns_now;
 	struct cyc2ns_data *data;
@@ -248,7 +248,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 	local_irq_save(flags);
 	sched_clock_idle_sleep_event();
 
-	if (!cpu_khz)
+	if (!khz)
 		goto done;
 
 	data = cyc2ns_write_begin(cpu);
@@ -261,7 +261,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 	 * time function is continuous; see the comment near struct
 	 * cyc2ns_data.
 	 */
-	clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, cpu_khz,
+	clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, khz,
 			       NSEC_PER_MSEC, 0);
 
 	/*
@@ -665,15 +665,72 @@ success:
 }
 
 /**
- * native_calibrate_tsc - calibrate the tsc on boot
+ * native_calibrate_tsc
+ * Determine TSC frequency via CPUID, else return 0.
  */
 unsigned long native_calibrate_tsc(void)
+{
+	unsigned int eax_denominator, ebx_numerator, ecx_hz, edx;
+	unsigned int crystal_khz;
+
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		return 0;
+
+	if (boot_cpu_data.cpuid_level < 0x15)
+		return 0;
+
+	eax_denominator = ebx_numerator = ecx_hz = edx = 0;
+
+	/* CPUID 15H TSC/Crystal ratio, plus optionally Crystal Hz */
+	cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx);
+
+	if (ebx_numerator == 0 || eax_denominator == 0)
+		return 0;
+
+	crystal_khz = ecx_hz / 1000;
+
+	if (crystal_khz == 0) {
+		switch (boot_cpu_data.x86_model) {
+		case 0x4E:	/* SKL */
+		case 0x5E:	/* SKL */
+			crystal_khz = 24000;	/* 24 MHz */
+		}
+	}
+
+	return crystal_khz * ebx_numerator / eax_denominator;
+}
+
+static unsigned long cpu_khz_from_cpuid(void)
+{
+	unsigned int eax_base_mhz, ebx_max_mhz, ecx_bus_mhz, edx;
+
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		return 0;
+
+	if (boot_cpu_data.cpuid_level < 0x16)
+		return 0;
+
+	eax_base_mhz = ebx_max_mhz = ecx_bus_mhz = edx = 0;
+
+	cpuid(0x16, &eax_base_mhz, &ebx_max_mhz, &ecx_bus_mhz, &edx);
+
+	return eax_base_mhz * 1000;
+}
+
+/**
+ * native_calibrate_cpu - calibrate the cpu on boot
+ */
+unsigned long native_calibrate_cpu(void)
 {
 	u64 tsc1, tsc2, delta, ref1, ref2;
 	unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
 	unsigned long flags, latch, ms, fast_calibrate;
 	int hpet = is_hpet_enabled(), i, loopmin;
 
+	fast_calibrate = cpu_khz_from_cpuid();
+	if (fast_calibrate)
+		return fast_calibrate;
+
 	fast_calibrate = cpu_khz_from_msr();
 	if (fast_calibrate)
 		return fast_calibrate;
@@ -834,8 +891,10 @@ int recalibrate_cpu_khz(void)
 	if (!boot_cpu_has(X86_FEATURE_TSC))
 		return -ENODEV;
 
+	cpu_khz = x86_platform.calibrate_cpu();
 	tsc_khz = x86_platform.calibrate_tsc();
-	cpu_khz = tsc_khz;
+	if (tsc_khz == 0)
+		tsc_khz = cpu_khz;
 	cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
 						    cpu_khz_old, cpu_khz);
 
@@ -1241,8 +1300,10 @@ void __init tsc_init(void)
 		return;
 	}
 
+	cpu_khz = x86_platform.calibrate_cpu();
 	tsc_khz = x86_platform.calibrate_tsc();
-	cpu_khz = tsc_khz;
+	if (tsc_khz == 0)
+		tsc_khz = cpu_khz;
 
 	if (!tsc_khz) {
 		mark_tsc_unstable("could not calculate TSC khz");
@@ -1262,7 +1323,7 @@ void __init tsc_init(void)
 	 */
 	for_each_possible_cpu(cpu) {
 		cyc2ns_init(cpu);
-		set_cyc2ns_scale(cpu_khz, cpu);
+		set_cyc2ns_scale(tsc_khz, cpu);
 	}
 
 	if (tsc_disabled > 0)
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index dad5fe9633a3..58b459296e13 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -92,6 +92,7 @@ static void default_nmi_init(void) { };
 static int default_i8042_detect(void) { return 1; };
 
 struct x86_platform_ops x86_platform = {
+	.calibrate_cpu			= native_calibrate_cpu,
 	.calibrate_tsc			= native_calibrate_tsc,
 	.get_wallclock			= mach_get_cmos_time,
 	.set_wallclock			= mach_set_rtc_mmss,
-- 
cgit v1.2.3


From ff4c86635ee12461fd3bd911d7d5253394da8f9d Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 17 Jun 2016 01:22:52 -0400
Subject: x86/tsc: Enumerate BXT tsc_khz via CPUID

Hard code the BXT crystal clock (aka ART - Always Running Timer)
to 19.200 MHz, and use CPUID leaf 0x15 to determine the BXT TSC frequency.

Use tsc_khz to sanity check BXT cpu_khz,
which can be erroneous in some configurations.

(I simplified the original patch from Bin Gao.)

Original-From: Bin Gao <bin.gao@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/bf4e7c175acd6d09719c47c319b10ff1f0627ff8.1466138954.git.len.brown@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/tsc.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index e1496b79c28a..2a952fcb1516 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -693,7 +693,11 @@ unsigned long native_calibrate_tsc(void)
 		switch (boot_cpu_data.x86_model) {
 		case 0x4E:	/* SKL */
 		case 0x5E:	/* SKL */
-			crystal_khz = 24000;	/* 24 MHz */
+			crystal_khz = 24000;	/* 24.0 MHz */
+			break;
+		case 0x5C:	/* BXT */
+			crystal_khz = 19200;	/* 19.2 MHz */
+			break;
 		}
 	}
 
@@ -895,6 +899,8 @@ int recalibrate_cpu_khz(void)
 	tsc_khz = x86_platform.calibrate_tsc();
 	if (tsc_khz == 0)
 		tsc_khz = cpu_khz;
+	else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
+		cpu_khz = tsc_khz;
 	cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
 						    cpu_khz_old, cpu_khz);
 
@@ -1302,8 +1308,16 @@ void __init tsc_init(void)
 
 	cpu_khz = x86_platform.calibrate_cpu();
 	tsc_khz = x86_platform.calibrate_tsc();
+
+	/*
+	 * Trust non-zero tsc_khz as authorative,
+	 * and use it to sanity check cpu_khz,
+	 * which will be off if system timer is off.
+	 */
 	if (tsc_khz == 0)
 		tsc_khz = cpu_khz;
+	else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
+		cpu_khz = tsc_khz;
 
 	if (!tsc_khz) {
 		mark_tsc_unstable("could not calculate TSC khz");
-- 
cgit v1.2.3


From f2d3adf46d5763e7154e303e972c891999a4da43 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 12 Jul 2016 10:54:45 -0300
Subject: kvm arm/arm64: Remove trailing whitespace from headers

Noticed while making a copy of these files to tools/ where those kernel
files were being directly accessed, which we're not allowing anymore to
avoid that changes in the kernel side break tooling.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Eric Auger <eric.auger@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-82thftcdhj2j5wt6ir4vuyhk@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/arm/include/uapi/asm/kvm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index df3f60cb1168..a2b3eb313a25 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -139,8 +139,8 @@ struct kvm_arch_memory_slot {
 #define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
 
 #define KVM_REG_ARM_TIMER_CTL		ARM_CP15_REG32(0, 14, 3, 1)
-#define KVM_REG_ARM_TIMER_CNT		ARM_CP15_REG64(1, 14) 
-#define KVM_REG_ARM_TIMER_CVAL		ARM_CP15_REG64(3, 14) 
+#define KVM_REG_ARM_TIMER_CNT		ARM_CP15_REG64(1, 14)
+#define KVM_REG_ARM_TIMER_CVAL		ARM_CP15_REG64(3, 14)
 
 /* Normal registers are mapped as coprocessor 16. */
 #define KVM_REG_ARM_CORE		(0x0010 << KVM_REG_ARM_COPROC_SHIFT)
-- 
cgit v1.2.3


From 707a605b5a1732e548f4ff51ccf0199a14d95f0f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 12 Jul 2016 14:04:22 +0300
Subject: x86/pci: Use MRFLD abbreviation for Merrifield

Everywhere in the kernel the MRFLD is used as abbreviation of Intel Merrifield.
Do the same in intel_mid_pci.c module.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1468321462-136016-1-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/pci/intel_mid_pci.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 5413d6a9817c..5a18aedcb341 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -36,8 +36,8 @@
 #define PCIE_CAP_OFFSET	0x100
 
 /* Quirks for the listed devices */
-#define PCI_DEVICE_ID_INTEL_MRFL_MMC	0x1190
-#define PCI_DEVICE_ID_INTEL_MRFL_HSU	0x1191
+#define PCI_DEVICE_ID_INTEL_MRFLD_MMC	0x1190
+#define PCI_DEVICE_ID_INTEL_MRFLD_HSU	0x1191
 
 /* Fixed BAR fields */
 #define PCIE_VNDR_CAP_ID_FIXED_BAR 0x00	/* Fixed BAR (TBD) */
@@ -229,7 +229,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 			 * Skip HS UART common registers device since it has
 			 * IRQ0 assigned and not used by the kernel.
 			 */
-			if (dev->device == PCI_DEVICE_ID_INTEL_MRFL_HSU)
+			if (dev->device == PCI_DEVICE_ID_INTEL_MRFLD_HSU)
 				return -EBUSY;
 			/*
 			 * TNG has IRQ0 assigned to eMMC controller. But there
@@ -238,7 +238,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 			 * eMMC gets it. The rest of devices still could be
 			 * enabled without interrupt line being allocated.
 			 */
-			if (dev->device != PCI_DEVICE_ID_INTEL_MRFL_MMC)
+			if (dev->device != PCI_DEVICE_ID_INTEL_MRFLD_MMC)
 				return 0;
 		}
 		break;
-- 
cgit v1.2.3


From 05f310e26fe9d97fec0ce1752edc16bf1ea55a2d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 12 Jul 2016 14:16:32 +0300
Subject: x86/sfi: Enable enumeration of SD devices

SFI specification v0.8.2 defines type of devices which are connected to
SD bus. In particularly WiFi dongle is a such.

Add a callback to enumerate the devices connected to SD bus.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1468322192-62080-1-git-send-email-andriy.shevchenko@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/intel-mid.h  | 15 +++++++++++++++
 arch/x86/platform/intel-mid/sfi.c | 29 +++++++++++++++++++++++++++++
 include/linux/sfi.h               |  1 +
 3 files changed, 45 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 59013a2ac713..9d6b097aa73d 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -49,6 +49,21 @@ struct devs_id {
 	static const struct devs_id *const __intel_mid_sfi_##i##_dev __used	\
 	__attribute__((__section__(".x86_intel_mid_dev.init"))) = &i
 
+/**
+* struct mid_sd_board_info - template for SD device creation
+* @name:		identifies the driver
+* @bus_num:		board-specific identifier for a given SD controller
+* @max_clk:		the maximum frequency device supports
+* @platform_data:	the particular data stored there is driver-specific
+*/
+struct mid_sd_board_info {
+	char		name[SFI_NAME_LEN];
+	int		bus_num;
+	unsigned short	addr;
+	u32		max_clk;
+	void		*platform_data;
+};
+
 /*
  * Medfield is the follow-up of Moorestown, it combines two chip solution into
  * one. Other than that it also added always-on and constant tsc and lapic
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
index 5ee360a951ce..1555672d436f 100644
--- a/arch/x86/platform/intel-mid/sfi.c
+++ b/arch/x86/platform/intel-mid/sfi.c
@@ -407,6 +407,32 @@ static void __init sfi_handle_i2c_dev(struct sfi_device_table_entry *pentry,
 		i2c_register_board_info(pentry->host_num, &i2c_info, 1);
 }
 
+static void __init sfi_handle_sd_dev(struct sfi_device_table_entry *pentry,
+					struct devs_id *dev)
+{
+	struct mid_sd_board_info sd_info;
+	void *pdata;
+
+	memset(&sd_info, 0, sizeof(sd_info));
+	strncpy(sd_info.name, pentry->name, SFI_NAME_LEN);
+	sd_info.bus_num = pentry->host_num;
+	sd_info.max_clk = pentry->max_freq;
+	sd_info.addr = pentry->addr;
+	pr_debug("SD bus = %d, name = %16.16s, max_clk = %d, addr = 0x%x\n",
+		 sd_info.bus_num,
+		 sd_info.name,
+		 sd_info.max_clk,
+		 sd_info.addr);
+	pdata = intel_mid_sfi_get_pdata(dev, &sd_info);
+	if (IS_ERR(pdata))
+		return;
+
+	/* Nothing we can do with this for now */
+	sd_info.platform_data = pdata;
+
+	pr_debug("Successfully registered %16.16s", sd_info.name);
+}
+
 extern struct devs_id *const __x86_intel_mid_dev_start[],
 		      *const __x86_intel_mid_dev_end[];
 
@@ -490,6 +516,9 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
 			case SFI_DEV_TYPE_I2C:
 				sfi_handle_i2c_dev(pentry, dev);
 				break;
+			case SFI_DEV_TYPE_SD:
+				sfi_handle_sd_dev(pentry, dev);
+				break;
 			case SFI_DEV_TYPE_UART:
 			case SFI_DEV_TYPE_HSI:
 			default:
diff --git a/include/linux/sfi.h b/include/linux/sfi.h
index d9b436f09925..e0e1597ef9e6 100644
--- a/include/linux/sfi.h
+++ b/include/linux/sfi.h
@@ -156,6 +156,7 @@ struct sfi_device_table_entry {
 #define SFI_DEV_TYPE_UART	2
 #define SFI_DEV_TYPE_HSI	3
 #define SFI_DEV_TYPE_IPC	4
+#define SFI_DEV_TYPE_SD		5
 
 	u8	host_num;	/* attached to host 0, 1...*/
 	u16	addr;
-- 
cgit v1.2.3


From 00839ee3b299303c6a5e26a0a2485427a3afcbbf Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 7 Jul 2016 17:19:11 -0700
Subject: x86/mm: Move swap offset/type up in PTE to work around erratum

This erratum can result in Accessed/Dirty getting set by the hardware
when we do not expect them to be (on !Present PTEs).

Instead of trying to fix them up after this happens, we just
allow the bits to get set and try to ignore them.  We do this by
shifting the layout of the bits we use for swap offset/type in
our 64-bit PTEs.

It looks like this:

 bitnrs: |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2|1|0|
 names:  |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P|
 before: |         OFFSET (9-63)          |0|X|X| TYPE(1-5) |0|
  after: | OFFSET (14-63)  |  TYPE (9-13) |0|X|X|X| X| X|X|X|0|

Note that D was already a don't care (X) even before.  We just
move TYPE up and turn its old spot (which could be hit by the
A bit) into all don't cares.

We take 5 bits away from the offset, but that still leaves us
with 50 bits which lets us index into a 62-bit swapfile (4 EiB).
I think that's probably fine for the moment.  We could
theoretically reclaim 5 of the bits (1, 2, 3, 4, 7) but it
doesn't gain us anything.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: dave.hansen@intel.com
Cc: linux-mm@kvack.org
Cc: mhocko@suse.com
Link: http://lkml.kernel.org/r/20160708001911.9A3FD2B6@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/pgtable_64.h | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 2ee781114d34..7e8ec7ae10fa 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -140,18 +140,32 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
 #define pte_unmap(pte) ((void)(pte))/* NOP */
 
-/* Encode and de-code a swap entry */
+/*
+ * Encode and de-code a swap entry
+ *
+ * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2|1|0| <- bit number
+ * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
+ * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry
+ *
+ * G (8) is aliased and used as a PROT_NONE indicator for
+ * !present ptes.  We need to start storing swap entries above
+ * there.  We also need to avoid using A and D because of an
+ * erratum where they can be incorrectly set by hardware on
+ * non-present PTEs.
+ */
+#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
 #define SWP_TYPE_BITS 5
-#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
+/* Place the offset above the type: */
+#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1)
 
 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
 
-#define __swp_type(x)			(((x).val >> (_PAGE_BIT_PRESENT + 1)) \
+#define __swp_type(x)			(((x).val >> (SWP_TYPE_FIRST_BIT)) \
 					 & ((1U << SWP_TYPE_BITS) - 1))
-#define __swp_offset(x)			((x).val >> SWP_OFFSET_SHIFT)
+#define __swp_offset(x)			((x).val >> SWP_OFFSET_FIRST_BIT)
 #define __swp_entry(type, offset)	((swp_entry_t) { \
-					 ((type) << (_PAGE_BIT_PRESENT + 1)) \
-					 | ((offset) << SWP_OFFSET_SHIFT) })
+					 ((type) << (SWP_TYPE_FIRST_BIT)) \
+					 | ((offset) << SWP_OFFSET_FIRST_BIT) })
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 
-- 
cgit v1.2.3


From 97e3c602ccbdd7db54e92fe05675c664c052a466 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 7 Jul 2016 17:19:12 -0700
Subject: x86/mm: Ignore A/D bits in pte/pmd/pud_none()

The erratum we are fixing here can lead to stray setting of the
A and D bits.  That means that a pte that we cleared might
suddenly have A/D set.  So, stop considering those bits when
determining if a pte is pte_none().  The same goes for the
other pmd_none() and pud_none().  pgd_none() can be skipped
because it is not affected; we do not use PGD entries for
anything other than pagetables on affected configurations.

This adds a tiny amount of overhead to all pte_none() checks.
I doubt we'll be able to measure it anywhere.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: dave.hansen@intel.com
Cc: linux-mm@kvack.org
Cc: mhocko@suse.com
Link: http://lkml.kernel.org/r/20160708001912.5216F89C@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/pgtable.h       | 13 ++++++++++---
 arch/x86/include/asm/pgtable_types.h |  6 ++++++
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1a27396b6ea0..2815d268af8b 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -480,7 +480,7 @@ pte_t *populate_extra_pte(unsigned long vaddr);
 
 static inline int pte_none(pte_t pte)
 {
-	return !pte.pte;
+	return !(pte.pte & ~(_PAGE_KNL_ERRATUM_MASK));
 }
 
 #define __HAVE_ARCH_PTE_SAME
@@ -552,7 +552,8 @@ static inline int pmd_none(pmd_t pmd)
 {
 	/* Only check low word on 32-bit platforms, since it might be
 	   out of sync with upper half. */
-	return (unsigned long)native_pmd_val(pmd) == 0;
+	unsigned long val = native_pmd_val(pmd);
+	return (val & ~_PAGE_KNL_ERRATUM_MASK) == 0;
 }
 
 static inline unsigned long pmd_page_vaddr(pmd_t pmd)
@@ -616,7 +617,7 @@ static inline unsigned long pages_to_mb(unsigned long npg)
 #if CONFIG_PGTABLE_LEVELS > 2
 static inline int pud_none(pud_t pud)
 {
-	return native_pud_val(pud) == 0;
+	return (native_pud_val(pud) & ~(_PAGE_KNL_ERRATUM_MASK)) == 0;
 }
 
 static inline int pud_present(pud_t pud)
@@ -694,6 +695,12 @@ static inline int pgd_bad(pgd_t pgd)
 
 static inline int pgd_none(pgd_t pgd)
 {
+	/*
+	 * There is no need to do a workaround for the KNL stray
+	 * A/D bit erratum here.  PGDs only point to page tables
+	 * except on 32-bit non-PAE which is not supported on
+	 * KNL.
+	 */
 	return !native_pgd_val(pgd);
 }
 #endif	/* CONFIG_PGTABLE_LEVELS > 3 */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7b5efe264eff..d14d0a55322a 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -70,6 +70,12 @@
 			 _PAGE_PKEY_BIT2 | \
 			 _PAGE_PKEY_BIT3)
 
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+#define _PAGE_KNL_ERRATUM_MASK (_PAGE_DIRTY | _PAGE_ACCESSED)
+#else
+#define _PAGE_KNL_ERRATUM_MASK 0
+#endif
+
 #ifdef CONFIG_KMEMCHECK
 #define _PAGE_HIDDEN	(_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
 #else
-- 
cgit v1.2.3


From e4a84be6f05eab4778732d799f63b3cd15427885 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 7 Jul 2016 17:19:14 -0700
Subject: x86/mm: Disallow running with 32-bit PTEs to work around erratum

The Intel(R) Xeon Phi(TM) Processor x200 Family (codename: Knights
Landing) has an erratum where a processor thread setting the Accessed
or Dirty bits may not do so atomically against its checks for the
Present bit.  This may cause a thread (which is about to page fault)
to set A and/or D, even though the Present bit had already been
atomically cleared.

These bits are truly "stray".  In the case of the Dirty bit, the
thread associated with the stray set was *not* allowed to write to
the page.  This means that we do not have to launder the bit(s); we
can simply ignore them.

If the PTE is used for storing a swap index or a NUMA migration index,
the A bit could be misinterpreted as part of the swap type.  The stray
bits being set cause a software-cleared PTE to be interpreted as a
swap entry.  In some cases (like when the swap index ends up being
for a non-existent swapfile), the kernel detects the stray value
and WARN()s about it, but there is no guarantee that the kernel can
always detect it.

When we have 64-bit PTEs (64-bit mode or 32-bit PAE), we were able
to move the swap PTE format around to avoid these troublesome bits.
But, 32-bit non-PAE is tight on bits.  So, disallow it from running
on this hardware.  I can't imagine anyone wanting to run 32-bit
non-highmem kernels on this hardware, but disallowing them from
running entirely is surely the safe thing to do.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: dave.hansen@intel.com
Cc: linux-mm@kvack.org
Cc: mhocko@suse.com
Link: http://lkml.kernel.org/r/20160708001914.D0B50110@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/boot.h     |  1 +
 arch/x86/boot/cpu.c      |  2 ++
 arch/x86/boot/cpucheck.c | 33 +++++++++++++++++++++++++++++++++
 arch/x86/boot/cpuflags.c |  1 +
 arch/x86/boot/cpuflags.h |  1 +
 5 files changed, 38 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 9011a88353de..a5ce666a2423 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -294,6 +294,7 @@ static inline int cmdline_find_option_bool(const char *option)
 
 /* cpu.c, cpucheck.c */
 int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
+int check_knl_erratum(void);
 int validate_cpu(void);
 
 /* early_serial_console.c */
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 29207f69ae8c..26240dde081e 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -93,6 +93,8 @@ int validate_cpu(void)
 		show_cap_strs(err_flags);
 		putchar('\n');
 		return -1;
+	} else if (check_knl_erratum()) {
+		return -1;
 	} else {
 		return 0;
 	}
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 1fd7d575092e..4ad7d70e8739 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -24,6 +24,7 @@
 # include "boot.h"
 #endif
 #include <linux/types.h>
+#include <asm/intel-family.h>
 #include <asm/processor-flags.h>
 #include <asm/required-features.h>
 #include <asm/msr-index.h>
@@ -175,6 +176,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
 			puts("WARNING: PAE disabled. Use parameter 'forcepae' to enable at your own risk!\n");
 		}
 	}
+	if (!err)
+		err = check_knl_erratum();
 
 	if (err_flags_ptr)
 		*err_flags_ptr = err ? err_flags : NULL;
@@ -185,3 +188,33 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
 
 	return (cpu.level < req_level || err) ? -1 : 0;
 }
+
+int check_knl_erratum(void)
+{
+	/*
+	 * First check for the affected model/family:
+	 */
+	if (!is_intel() ||
+	    cpu.family != 6 ||
+	    cpu.model != INTEL_FAM6_XEON_PHI_KNL)
+		return 0;
+
+	/*
+	 * This erratum affects the Accessed/Dirty bits, and can
+	 * cause stray bits to be set in !Present PTEs.  We have
+	 * enough bits in our 64-bit PTEs (which we have on real
+	 * 64-bit mode or PAE) to avoid using these troublesome
+	 * bits.  But, we do not have enough space in our 32-bit
+	 * PTEs.  So, refuse to run on 32-bit non-PAE kernels.
+	 */
+	if (IS_ENABLED(CONFIG_X86_64) || IS_ENABLED(CONFIG_X86_PAE))
+		return 0;
+
+	puts("This 32-bit kernel can not run on this Xeon Phi x200\n"
+	     "processor due to a processor erratum.  Use a 64-bit\n"
+	     "kernel, or enable PAE in this 32-bit kernel.\n\n");
+
+	return -1;
+}
+
+
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
index 431fa5f84537..6687ab953257 100644
--- a/arch/x86/boot/cpuflags.c
+++ b/arch/x86/boot/cpuflags.c
@@ -102,6 +102,7 @@ void get_cpuflags(void)
 			cpuid(0x1, &tfms, &ignored, &cpu.flags[4],
 			      &cpu.flags[0]);
 			cpu.level = (tfms >> 8) & 15;
+			cpu.family = cpu.level;
 			cpu.model = (tfms >> 4) & 15;
 			if (cpu.level >= 6)
 				cpu.model += ((tfms >> 16) & 0xf) << 4;
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h
index 4cb404fd45ce..15ad56a3f905 100644
--- a/arch/x86/boot/cpuflags.h
+++ b/arch/x86/boot/cpuflags.h
@@ -6,6 +6,7 @@
 
 struct cpu_features {
 	int level;		/* Family, or 64 for x86-64 */
+	int family;		/* Family, always */
 	int model;
 	u32 flags[NCAPINTS];
 };
-- 
cgit v1.2.3


From dcb32d9913b7ed527b135a7e221f8d14b67bb952 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 7 Jul 2016 17:19:15 -0700
Subject: x86/mm: Use pte_none() to test for empty PTE

The page table manipulation code seems to have grown a couple of
sites that are looking for empty PTEs.  Just in case one of these
entries got a stray bit set, use pte_none() instead of checking
for a zero pte_val().

The use pte_same() makes me a bit nervous.  If we were doing a
pte_same() check against two cleared entries and one of them had
a stray bit set, it might fail the pte_same() check.  But, I
don't think we ever _do_ pte_same() for cleared entries.  It is
almost entirely used for checking for races in fault-in paths.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: dave.hansen@intel.com
Cc: linux-mm@kvack.org
Cc: mhocko@suse.com
Link: http://lkml.kernel.org/r/20160708001915.813703D9@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/init_64.c    | 12 ++++++------
 arch/x86/mm/pageattr.c   |  2 +-
 arch/x86/mm/pgtable_32.c |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bce2e5d9edd4..bb88fbc0a288 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -354,7 +354,7 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
 		 * pagetable pages as RO. So assume someone who pre-setup
 		 * these mappings are more intelligent.
 		 */
-		if (pte_val(*pte)) {
+		if (!pte_none(*pte)) {
 			if (!after_bootmem)
 				pages++;
 			continue;
@@ -396,7 +396,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
 			continue;
 		}
 
-		if (pmd_val(*pmd)) {
+		if (!pmd_none(*pmd)) {
 			if (!pmd_large(*pmd)) {
 				spin_lock(&init_mm.page_table_lock);
 				pte = (pte_t *)pmd_page_vaddr(*pmd);
@@ -470,7 +470,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
 			continue;
 		}
 
-		if (pud_val(*pud)) {
+		if (!pud_none(*pud)) {
 			if (!pud_large(*pud)) {
 				pmd = pmd_offset(pud, 0);
 				last_map_addr = phys_pmd_init(pmd, addr, end,
@@ -673,7 +673,7 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
 
 	for (i = 0; i < PTRS_PER_PTE; i++) {
 		pte = pte_start + i;
-		if (pte_val(*pte))
+		if (!pte_none(*pte))
 			return;
 	}
 
@@ -691,7 +691,7 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
 
 	for (i = 0; i < PTRS_PER_PMD; i++) {
 		pmd = pmd_start + i;
-		if (pmd_val(*pmd))
+		if (!pmd_none(*pmd))
 			return;
 	}
 
@@ -710,7 +710,7 @@ static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd)
 
 	for (i = 0; i < PTRS_PER_PUD; i++) {
 		pud = pud_start + i;
-		if (pud_val(*pud))
+		if (!pud_none(*pud))
 			return false;
 	}
 
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7a1f7bbf4105..75142159b0a5 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1185,7 +1185,7 @@ repeat:
 		return __cpa_process_fault(cpa, address, primary);
 
 	old_pte = *kpte;
-	if (!pte_val(old_pte))
+	if (pte_none(old_pte))
 		return __cpa_process_fault(cpa, address, primary);
 
 	if (level == PG_LEVEL_4K) {
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 75cc0978d45d..e67ae0e6c59d 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -47,7 +47,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
 		return;
 	}
 	pte = pte_offset_kernel(pmd, vaddr);
-	if (pte_val(pteval))
+	if (!pte_none(pteval))
 		set_pte_at(&init_mm, vaddr, pte, pteval);
 	else
 		pte_clear(&init_mm, vaddr, pte);
-- 
cgit v1.2.3


From 4d581259b7d44c8120a614b4e9244094c824d51f Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@canonical.com>
Date: Thu, 14 Jul 2016 18:05:56 +0800
Subject: x86/reboot: Add Dell Optiplex 7450 AIO reboot quirk

Dell Optiplex 7450 AIO works with BOOT_ACPI; however, the quirk for
"OptiPlex 745" changes its boot method to BOOT_BIOS and causes 7450 AIO
hangs when rebooting; as a result, 7450 AIO is appended to overwrite
BOOT_BIOS by BOOT_ACPI in order not to break the original 745 series

Signed-off-by: Alex Hung <alex.hung@canonical.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/reboot.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index a9b31eb815f2..15ed70f8278b 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -54,6 +54,19 @@ bool port_cf9_safe = false;
  * Dell Inc. so their systems "just work". :-)
  */
 
+/*
+ * Some machines require the "reboot=a" commandline options
+ */
+static int __init set_acpi_reboot(const struct dmi_system_id *d)
+{
+	if (reboot_type != BOOT_ACPI) {
+		reboot_type = BOOT_ACPI;
+		pr_info("%s series board detected. Selecting %s-method for reboots.\n",
+			d->ident, "ACPI");
+	}
+	return 0;
+}
+
 /*
  * Some machines require the "reboot=b" or "reboot=k"  commandline options,
  * this quirk makes that automatic.
@@ -395,6 +408,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"),
 		},
 	},
+	{	/* Handle problems with rebooting on Dell Optiplex 7450 AIO */
+		.callback = set_acpi_reboot,
+		.ident = "Dell OptiPlex 7450 AIO",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 7450 AIO"),
+		},
+	},
 
 	/* Hewlett-Packard */
 	{	/* Handle problems with rebooting on HP laptops */
-- 
cgit v1.2.3


From 2ba78056acfe8d63a29565f91dae4678ed6b81ca Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Thu, 14 Jul 2016 12:06:53 -0700
Subject: kasan: add newline to messages

Currently GPF messages with KASAN look as follows:

  kasan: GPF could be caused by NULL-ptr deref or user memory accessgeneral protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN

Add newlines.

Link: http://lkml.kernel.org/r/1467294357-98002-1-git-send-email-dvyukov@google.com
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/kasan_init_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 1b1110fa0057..0493c17b8a51 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -54,8 +54,8 @@ static int kasan_die_handler(struct notifier_block *self,
 			     void *data)
 {
 	if (val == DIE_GPF) {
-		pr_emerg("CONFIG_KASAN_INLINE enabled");
-		pr_emerg("GPF could be caused by NULL-ptr deref or user memory access");
+		pr_emerg("CONFIG_KASAN_INLINE enabled\n");
+		pr_emerg("GPF could be caused by NULL-ptr deref or user memory access\n");
 	}
 	return NOTIFY_OK;
 }
-- 
cgit v1.2.3


From 9babed6a66b5577628d9e76e5a6cb6104d7ddd4c Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Thu, 14 Jul 2016 12:07:43 -0700
Subject: m32r: fix build warning about putc

We were getting build warning:

  arch/m32r/boot/compressed/m32r_sio.c:11:13:
     warning: conflicting types for built-in function 'putc'

Here putc is used as a static function so lets just rename it to avoid
the conflict with the builtin putc.

Link: http://lkml.kernel.org/r/1466977046-24724-1-git-send-email-sudipm.mukherjee@gmail.com
Signed-off-by: Sudip Mukherjee <sudip.mukherjee@codethink.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m32r/boot/compressed/m32r_sio.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/m32r/boot/compressed/m32r_sio.c b/arch/m32r/boot/compressed/m32r_sio.c
index 01d877c6868f..cf3023dced49 100644
--- a/arch/m32r/boot/compressed/m32r_sio.c
+++ b/arch/m32r/boot/compressed/m32r_sio.c
@@ -8,12 +8,13 @@
 
 #include <asm/processor.h>
 
-static void putc(char c);
+static void m32r_putc(char c);
 
 static int puts(const char *s)
 {
 	char c;
-	while ((c = *s++)) putc(c);
+	while ((c = *s++))
+		m32r_putc(c);
 	return 0;
 }
 
@@ -41,7 +42,7 @@ static int puts(const char *s)
 #define BOOT_SIO0TXB	PLD_ESIO0TXB
 #endif
 
-static void putc(char c)
+static void m32r_putc(char c)
 {
 	while ((*BOOT_SIO0STS & 0x3) != 0x3)
 		cpu_relax();
@@ -61,7 +62,7 @@ static void putc(char c)
 #define SIO0TXB	(volatile unsigned short *)(0x00efd000 + 30)
 #endif
 
-static void putc(char c)
+static void m32r_putc(char c)
 {
 	while ((*SIO0STS & 0x1) == 0)
 		cpu_relax();
-- 
cgit v1.2.3


From af2cf278ef4f9289f88504c3e03cb12f76027575 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 14 Jul 2016 13:22:49 -0700
Subject: x86/mm/hotplug: Don't remove PGD entries in remove_pagetable()

So when memory hotplug removes a piece of physical memory from pagetable
mappings, it also frees the underlying PGD entry.

This complicates PGD management, so don't do this. We can keep the
PGD mapped and the PUD table all clear - it's only a single 4K page
per 512 GB of memory hotplugged.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <Waiman.Long@hp.com>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/064ff6c7275734537f969e876f6cd0baa954d2cc.1468527351.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/init_64.c | 27 ---------------------------
 1 file changed, 27 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bb88fbc0a288..e14f87057c3f 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -702,27 +702,6 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
 	spin_unlock(&init_mm.page_table_lock);
 }
 
-/* Return true if pgd is changed, otherwise return false. */
-static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd)
-{
-	pud_t *pud;
-	int i;
-
-	for (i = 0; i < PTRS_PER_PUD; i++) {
-		pud = pud_start + i;
-		if (!pud_none(*pud))
-			return false;
-	}
-
-	/* free a pud table */
-	free_pagetable(pgd_page(*pgd), 0);
-	spin_lock(&init_mm.page_table_lock);
-	pgd_clear(pgd);
-	spin_unlock(&init_mm.page_table_lock);
-
-	return true;
-}
-
 static void __meminit
 remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
 		 bool direct)
@@ -913,7 +892,6 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
 	unsigned long addr;
 	pgd_t *pgd;
 	pud_t *pud;
-	bool pgd_changed = false;
 
 	for (addr = start; addr < end; addr = next) {
 		next = pgd_addr_end(addr, end);
@@ -924,13 +902,8 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
 
 		pud = (pud_t *)pgd_page_vaddr(*pgd);
 		remove_pud_table(pud, addr, next, direct);
-		if (free_pud_table(pud, pgd))
-			pgd_changed = true;
 	}
 
-	if (pgd_changed)
-		sync_global_pgds(start, end - 1, 1);
-
 	flush_tlb_all();
 }
 
-- 
cgit v1.2.3


From 360cb4d15567a7eca07a5f3ade6de308bbfb4e70 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 14 Jul 2016 13:22:50 -0700
Subject: x86/mm/cpa: In populate_pgd(), don't set the PGD entry until it's
 populated

This avoids pointless races in which another CPU or task might see a
partially populated global PGD entry.  These races should normally
be harmless, but, if another CPU propagates the entry via
vmalloc_fault() and then populate_pgd() fails (due to memory allocation
failure, for example), this prevents a use-after-free of the PGD
entry.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/bf99df27eac6835f687005364bd1fbd89130946c.1468527351.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/pageattr.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 75142159b0a5..26aa487ae4ef 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1104,8 +1104,6 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
 		pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
 		if (!pud)
 			return -1;
-
-		set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
 	}
 
 	pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
@@ -1113,11 +1111,16 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
 
 	ret = populate_pud(cpa, addr, pgd_entry, pgprot);
 	if (ret < 0) {
-		unmap_pgd_range(cpa->pgd, addr,
+		if (pud)
+			free_page((unsigned long)pud);
+		unmap_pud_range(pgd_entry, addr,
 				addr + (cpa->numpages << PAGE_SHIFT));
 		return ret;
 	}
 
+	if (pud)
+		set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
+
 	cpa->numpages = ret;
 	return 0;
 }
-- 
cgit v1.2.3


From d92fc69ccac4c0a20679fdbdc81b2010685a6f33 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 14 Jul 2016 13:22:51 -0700
Subject: x86/mm: Remove kernel_unmap_pages_in_pgd() and
 efi_cleanup_page_tables()

kernel_unmap_pages_in_pgd() is dangerous: if a PGD entry in
init_mm.pgd were to be cleared, callers would need to ensure that
the pgd entry hadn't been propagated to any other pgd.

Its only caller was efi_cleanup_page_tables(), and that, in turn,
was unused, so just delete both functions.  This leaves a couple of
other helpers unused, so delete them, too.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Acked-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/77ff20fdde3b75cd393be5559ad8218870520248.1468527351.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/efi.h           |  1 -
 arch/x86/include/asm/pgtable_types.h |  2 --
 arch/x86/mm/pageattr.c               | 28 ----------------------------
 arch/x86/platform/efi/efi.c          |  2 --
 arch/x86/platform/efi/efi_32.c       |  3 ---
 arch/x86/platform/efi/efi_64.c       |  5 -----
 6 files changed, 41 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 78d1e7467eae..45ea38df86d4 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -125,7 +125,6 @@ extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
 extern void efi_sync_low_kernel_mappings(void);
 extern int __init efi_alloc_page_tables(void);
 extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
-extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);
 extern void __init old_map_region(efi_memory_desc_t *md);
 extern void __init runtime_code_page_mkexec(void);
 extern void __init efi_runtime_update_mappings(void);
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index d14d0a55322a..f1218f512f62 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -481,8 +481,6 @@ extern pmd_t *lookup_pmd_address(unsigned long address);
 extern phys_addr_t slow_virt_to_phys(void *__address);
 extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
 				   unsigned numpages, unsigned long page_flags);
-void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address,
-			       unsigned numpages);
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* _ASM_X86_PGTABLE_DEFS_H */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 26aa487ae4ef..26c93c6e04a0 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -746,18 +746,6 @@ static bool try_to_free_pmd_page(pmd_t *pmd)
 	return true;
 }
 
-static bool try_to_free_pud_page(pud_t *pud)
-{
-	int i;
-
-	for (i = 0; i < PTRS_PER_PUD; i++)
-		if (!pud_none(pud[i]))
-			return false;
-
-	free_page((unsigned long)pud);
-	return true;
-}
-
 static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
 {
 	pte_t *pte = pte_offset_kernel(pmd, start);
@@ -871,16 +859,6 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
 	 */
 }
 
-static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end)
-{
-	pgd_t *pgd_entry = root + pgd_index(addr);
-
-	unmap_pud_range(pgd_entry, addr, end);
-
-	if (try_to_free_pud_page((pud_t *)pgd_page_vaddr(*pgd_entry)))
-		pgd_clear(pgd_entry);
-}
-
 static int alloc_pte_page(pmd_t *pmd)
 {
 	pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
@@ -1994,12 +1972,6 @@ out:
 	return retval;
 }
 
-void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address,
-			       unsigned numpages)
-{
-	unmap_pgd_range(root, address, address + (numpages << PAGE_SHIFT));
-}
-
 /*
  * The testcases use internal knowledge of the implementation that shouldn't
  * be exposed to the rest of the kernel. Include these directly here.
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f93545e7dc54..62986e5fbdba 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -978,8 +978,6 @@ static void __init __efi_enter_virtual_mode(void)
 	 * EFI mixed mode we need all of memory to be accessible when
 	 * we pass parameters to the EFI runtime services in the
 	 * thunking code.
-	 *
-	 * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift);
 	 */
 	free_pages((unsigned long)new_memmap, pg_shift);
 
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 338402b91d2e..cef39b097649 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -49,9 +49,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 {
 	return 0;
 }
-void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
-{
-}
 
 void __init efi_map_region(efi_memory_desc_t *md)
 {
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index b226b3f497f1..d288dcea1ffe 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -285,11 +285,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 	return 0;
 }
 
-void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
-{
-	kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages);
-}
-
 static void __init __map_region(efi_memory_desc_t *md, u64 va)
 {
 	unsigned long flags = _PAGE_RW;
-- 
cgit v1.2.3


From 9a2e9da3e003112399f2863b7b6b911043c01895 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 14 Jul 2016 13:22:52 -0700
Subject: x86/dumpstack: Try harder to get a call trace on stack overflow

If we overflow the stack, print_context_stack() will abort.  Detect
this case and rewind back into the valid part of the stack so that
we can trace it.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/ee1690eb2715ccc5dc187fde94effa4ca0ccbbcd.1468527351.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/dumpstack.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index ef8017ca5ba9..cc88e25d73e9 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -87,7 +87,7 @@ static inline int valid_stack_ptr(struct task_struct *task,
 		else
 			return 0;
 	}
-	return p > t && p < t + THREAD_SIZE - size;
+	return p >= t && p < t + THREAD_SIZE - size;
 }
 
 unsigned long
@@ -98,6 +98,14 @@ print_context_stack(struct task_struct *task,
 {
 	struct stack_frame *frame = (struct stack_frame *)bp;
 
+	/*
+	 * If we overflowed the stack into a guard page, jump back to the
+	 * bottom of the usable stack.
+	 */
+	if ((unsigned long)task_stack_page(task) - (unsigned long)stack <
+	    PAGE_SIZE)
+		stack = (unsigned long *)task_stack_page(task);
+
 	while (valid_stack_ptr(task, stack, sizeof(*stack), end)) {
 		unsigned long addr;
 
-- 
cgit v1.2.3


From 98f30b1207932b6553ea605c99393d8afca12324 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 14 Jul 2016 13:22:53 -0700
Subject: x86/dumpstack/64: Handle faults when printing the "Stack: " part of
 an OOPS

If we overflow the stack into a guard page, we'll recursively fault
when trying to dump the contents of the guard page.  Use
probe_kernel_address() so we can recover if this happens.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/e626d47a55d7b04dcb1b4d33faa95e8505b217c8.1468527351.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/dumpstack_64.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index d558a8a49016..2552a1eadfed 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -272,6 +272,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 
 	stack = sp;
 	for (i = 0; i < kstack_depth_to_print; i++) {
+		unsigned long word;
+
 		if (stack >= irq_stack && stack <= irq_stack_end) {
 			if (stack == irq_stack_end) {
 				stack = (unsigned long *) (irq_stack_end[-1]);
@@ -281,12 +283,18 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		if (kstack_end(stack))
 			break;
 		}
+
+		if (probe_kernel_address(stack, word))
+			break;
+
 		if ((i % STACKSLOTS_PER_LINE) == 0) {
 			if (i != 0)
 				pr_cont("\n");
-			printk("%s %016lx", log_lvl, *stack++);
+			printk("%s %016lx", log_lvl, word);
 		} else
-			pr_cont(" %016lx", *stack++);
+			pr_cont(" %016lx", word);
+
+		stack++;
 		touch_nmi_watchdog();
 	}
 	preempt_enable();
-- 
cgit v1.2.3


From 46aea3873401836abb7f01200e7946e7d518b359 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 14 Jul 2016 13:22:54 -0700
Subject: x86/mm/64: In vmalloc_fault(), use CR3 instead of current->active_mm

If we get a vmalloc fault while current->active_mm->pgd doesn't
match CR3, we'll crash without this change.  I've seen this failure
mode on heavily instrumented kernels with virtually mapped stacks.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/4650d7674185f165ed8fdf9ac4c5c35c5c179ba8.1468527351.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/fault.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 7d1fa7cd2374..ca44e2e7fd00 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -439,7 +439,7 @@ static noinline int vmalloc_fault(unsigned long address)
 	 * happen within a race in page table update. In the later
 	 * case just flush:
 	 */
-	pgd = pgd_offset(current->active_mm, address);
+	pgd = (pgd_t *)__va(read_cr3()) + pgd_index(address);
 	pgd_ref = pgd_offset_k(address);
 	if (pgd_none(*pgd_ref))
 		return -1;
-- 
cgit v1.2.3


From 2deb4be28077638591fe5fc593b7f8aabc140f42 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 14 Jul 2016 13:22:55 -0700
Subject: x86/dumpstack: When OOPSing, rewind the stack before do_exit()

If we call do_exit() with a clean stack, we greatly reduce the risk of
recursive oopses due to stack overflow in do_exit, and we allow
do_exit to work even if we OOPS from an IST stack.  The latter gives
us a much better chance of surviving long enough after we detect a
stack overflow to write out our logs.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/32f73ceb372ec61889598da5e5b145889b9f2e19.1468527351.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/entry_32.S   | 11 +++++++++++
 arch/x86/entry/entry_64.S   | 11 +++++++++++
 arch/x86/kernel/dumpstack.c | 10 +++++++++-
 3 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 983e5d3a0d27..0b56666e6039 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1153,3 +1153,14 @@ ENTRY(async_page_fault)
 	jmp	error_code
 END(async_page_fault)
 #endif
+
+ENTRY(rewind_stack_do_exit)
+	/* Prevent any naive code from trying to unwind to our caller. */
+	xorl	%ebp, %ebp
+
+	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esi
+	leal	-TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
+
+	call	do_exit
+1:	jmp 1b
+END(rewind_stack_do_exit)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9ee0da1807ed..b846875aeea6 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1423,3 +1423,14 @@ ENTRY(ignore_sysret)
 	mov	$-ENOSYS, %eax
 	sysret
 END(ignore_sysret)
+
+ENTRY(rewind_stack_do_exit)
+	/* Prevent any naive code from trying to unwind to our caller. */
+	xorl	%ebp, %ebp
+
+	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rax
+	leaq	-TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%rax), %rsp
+
+	call	do_exit
+1:	jmp 1b
+END(rewind_stack_do_exit)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index cc88e25d73e9..de8242d8bb61 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -234,6 +234,8 @@ unsigned long oops_begin(void)
 EXPORT_SYMBOL_GPL(oops_begin);
 NOKPROBE_SYMBOL(oops_begin);
 
+void __noreturn rewind_stack_do_exit(int signr);
+
 void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 {
 	if (regs && kexec_should_crash(current))
@@ -255,7 +257,13 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 		panic("Fatal exception in interrupt");
 	if (panic_on_oops)
 		panic("Fatal exception");
-	do_exit(signr);
+
+	/*
+	 * We're not going to return, but we might be on an IST stack or
+	 * have very little stack space left.  Rewind the stack and kill
+	 * the task.
+	 */
+	rewind_stack_do_exit(signr);
 }
 NOKPROBE_SYMBOL(oops_end);
 
-- 
cgit v1.2.3


From dfa9a942fd7951c8f333cf3f377dde51ebd21685 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 14 Jul 2016 13:22:56 -0700
Subject: x86/uaccess: Move thread_info::uaccess_err and
 thread_info::sig_on_uaccess_err to thread_struct

struct thread_info is a legacy mess.  To prepare for its partial removal,
move the uaccess control fields out -- they're straightforward.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/d0ac4d01c8e4d4d756264604e47445d5acc7900e.1468527351.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/vsyscall/vsyscall_64.c | 6 +++---
 arch/x86/include/asm/processor.h      | 3 +++
 arch/x86/include/asm/thread_info.h    | 2 --
 arch/x86/include/asm/uaccess.h        | 4 ++--
 arch/x86/mm/extable.c                 | 2 +-
 arch/x86/mm/fault.c                   | 2 +-
 6 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 174c2549939d..3aba2b043050 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -221,8 +221,8 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 	 * With a real vsyscall, page faults cause SIGSEGV.  We want to
 	 * preserve that behavior to make writing exploits harder.
 	 */
-	prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
-	current_thread_info()->sig_on_uaccess_error = 1;
+	prev_sig_on_uaccess_error = current->thread.sig_on_uaccess_error;
+	current->thread.sig_on_uaccess_error = 1;
 
 	ret = -EFAULT;
 	switch (vsyscall_nr) {
@@ -243,7 +243,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 		break;
 	}
 
-	current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
+	current->thread.sig_on_uaccess_error = prev_sig_on_uaccess_error;
 
 check_fault:
 	if (ret == -EFAULT) {
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 62c6cc3cc5d3..f53ae57bd985 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -419,6 +419,9 @@ struct thread_struct {
 	/* Max allowed port in the bitmap, in bytes: */
 	unsigned		io_bitmap_max;
 
+	unsigned int		sig_on_uaccess_error:1;
+	unsigned int		uaccess_err:1;	/* uaccess failed */
+
 	/* Floating point and extended processor state */
 	struct fpu		fpu;
 	/*
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 30c133ac05cd..7c47bb659ecd 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -58,8 +58,6 @@ struct thread_info {
 	__u32			status;		/* thread synchronous flags */
 	__u32			cpu;		/* current CPU */
 	mm_segment_t		addr_limit;
-	unsigned int		sig_on_uaccess_error:1;
-	unsigned int		uaccess_err:1;	/* uaccess failed */
 };
 
 #define INIT_THREAD_INFO(tsk)			\
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index d40ec723f799..8f66e5655c23 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -487,13 +487,13 @@ struct __large_struct { unsigned long buf[100]; };
  * uaccess_try and catch
  */
 #define uaccess_try	do {						\
-	current_thread_info()->uaccess_err = 0;				\
+	current->thread.uaccess_err = 0;				\
 	__uaccess_begin();						\
 	barrier();
 
 #define uaccess_catch(err)						\
 	__uaccess_end();						\
-	(err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0);	\
+	(err) |= (current->thread.uaccess_err ? -EFAULT : 0);		\
 } while (0)
 
 /**
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 4bb53b89f3c5..0f90cc218d04 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -37,7 +37,7 @@ bool ex_handler_ext(const struct exception_table_entry *fixup,
 		   struct pt_regs *regs, int trapnr)
 {
 	/* Special hack for uaccess_err */
-	current_thread_info()->uaccess_err = 1;
+	current->thread.uaccess_err = 1;
 	regs->ip = ex_fixup_addr(fixup);
 	return true;
 }
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ca44e2e7fd00..69be03d4aca6 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -737,7 +737,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 		 * In this case we need to make sure we're not recursively
 		 * faulting through the emulate_vsyscall() logic.
 		 */
-		if (current_thread_info()->sig_on_uaccess_error && signal) {
+		if (current->thread.sig_on_uaccess_error && signal) {
 			tsk->thread.trap_nr = X86_TRAP_PF;
 			tsk->thread.error_code = error_code | PF_USER;
 			tsk->thread.cr2 = address;
-- 
cgit v1.2.3


From 2a53ccbc0de1b1950aeedd24680f7eca65c86ff5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 15 Jul 2016 10:21:11 +0200
Subject: x86/dumpstack: Rename thread_struct::sig_on_uaccess_error to
 sig_on_uaccess_err

Rename it to match the thread_struct::uaccess_err pattern and also
because it was too long.

Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/vsyscall/vsyscall_64.c | 10 +++++-----
 arch/x86/include/asm/processor.h      |  2 +-
 arch/x86/mm/fault.c                   |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 3aba2b043050..75fc719b7f31 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -96,7 +96,7 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
 {
 	/*
 	 * XXX: if access_ok, get_user, and put_user handled
-	 * sig_on_uaccess_error, this could go away.
+	 * sig_on_uaccess_err, this could go away.
 	 */
 
 	if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) {
@@ -125,7 +125,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 	struct task_struct *tsk;
 	unsigned long caller;
 	int vsyscall_nr, syscall_nr, tmp;
-	int prev_sig_on_uaccess_error;
+	int prev_sig_on_uaccess_err;
 	long ret;
 
 	/*
@@ -221,8 +221,8 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 	 * With a real vsyscall, page faults cause SIGSEGV.  We want to
 	 * preserve that behavior to make writing exploits harder.
 	 */
-	prev_sig_on_uaccess_error = current->thread.sig_on_uaccess_error;
-	current->thread.sig_on_uaccess_error = 1;
+	prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err;
+	current->thread.sig_on_uaccess_err = 1;
 
 	ret = -EFAULT;
 	switch (vsyscall_nr) {
@@ -243,7 +243,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 		break;
 	}
 
-	current->thread.sig_on_uaccess_error = prev_sig_on_uaccess_error;
+	current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err;
 
 check_fault:
 	if (ret == -EFAULT) {
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f53ae57bd985..cbdfe5f76347 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -419,7 +419,7 @@ struct thread_struct {
 	/* Max allowed port in the bitmap, in bytes: */
 	unsigned		io_bitmap_max;
 
-	unsigned int		sig_on_uaccess_error:1;
+	unsigned int		sig_on_uaccess_err:1;
 	unsigned int		uaccess_err:1;	/* uaccess failed */
 
 	/* Floating point and extended processor state */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 69be03d4aca6..d22161ab941d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -737,7 +737,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 		 * In this case we need to make sure we're not recursively
 		 * faulting through the emulate_vsyscall() logic.
 		 */
-		if (current->thread.sig_on_uaccess_error && signal) {
+		if (current->thread.sig_on_uaccess_err && signal) {
 			tsk->thread.trap_nr = X86_TRAP_PF;
 			tsk->thread.error_code = error_code | PF_USER;
 			tsk->thread.cr2 = address;
-- 
cgit v1.2.3


From 13d4ea097d18b419ad2a2b696063d44bf59acec0 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 14 Jul 2016 13:22:57 -0700
Subject: x86/uaccess: Move thread_info::addr_limit to thread_struct

struct thread_info is a legacy mess.  To prepare for its partial removal,
move thread_info::addr_limit out.

As an added benefit, this way is simpler.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/15bee834d09402b47ac86f2feccdf6529f9bc5b0.1468527351.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/checksum_32.h |  3 +--
 arch/x86/include/asm/processor.h   | 17 ++++++++++-------
 arch/x86/include/asm/thread_info.h |  7 -------
 arch/x86/include/asm/uaccess.h     |  6 +++---
 arch/x86/kernel/asm-offsets.c      |  4 +++-
 arch/x86/lib/copy_user_64.S        |  8 ++++----
 arch/x86/lib/csum-wrappers_64.c    |  1 +
 arch/x86/lib/getuser.S             | 20 ++++++++++----------
 arch/x86/lib/putuser.S             | 10 +++++-----
 arch/x86/lib/usercopy_64.c         |  2 +-
 drivers/pnp/isapnp/proc.c          |  2 +-
 lib/bitmap.c                       |  2 +-
 12 files changed, 40 insertions(+), 42 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h
index 532f85e6651f..7b53743ed267 100644
--- a/arch/x86/include/asm/checksum_32.h
+++ b/arch/x86/include/asm/checksum_32.h
@@ -2,8 +2,7 @@
 #define _ASM_X86_CHECKSUM_32_H
 
 #include <linux/in6.h>
-
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * computes the checksum of a memory block at buff, length len,
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cbdfe5f76347..89314ed74fee 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -371,6 +371,10 @@ extern unsigned int xstate_size;
 
 struct perf_event;
 
+typedef struct {
+	unsigned long		seg;
+} mm_segment_t;
+
 struct thread_struct {
 	/* Cached TLS descriptors: */
 	struct desc_struct	tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -419,6 +423,8 @@ struct thread_struct {
 	/* Max allowed port in the bitmap, in bytes: */
 	unsigned		io_bitmap_max;
 
+	mm_segment_t		addr_limit;
+
 	unsigned int		sig_on_uaccess_err:1;
 	unsigned int		uaccess_err:1;	/* uaccess failed */
 
@@ -493,11 +499,6 @@ static inline void load_sp0(struct tss_struct *tss,
 #define set_iopl_mask native_set_iopl_mask
 #endif /* CONFIG_PARAVIRT */
 
-typedef struct {
-	unsigned long		seg;
-} mm_segment_t;
-
-
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
@@ -719,6 +720,7 @@ static inline void spin_lock_prefetch(const void *x)
 	.sp0			= TOP_OF_INIT_STACK,			  \
 	.sysenter_cs		= __KERNEL_CS,				  \
 	.io_bitmap_ptr		= NULL,					  \
+	.addr_limit		= KERNEL_DS,				  \
 }
 
 extern unsigned long thread_saved_pc(struct task_struct *tsk);
@@ -768,8 +770,9 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
 #define STACK_TOP		TASK_SIZE
 #define STACK_TOP_MAX		TASK_SIZE_MAX
 
-#define INIT_THREAD  { \
-	.sp0 = TOP_OF_INIT_STACK \
+#define INIT_THREAD  {						\
+	.sp0			= TOP_OF_INIT_STACK,		\
+	.addr_limit		= KERNEL_DS,			\
 }
 
 /*
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 7c47bb659ecd..89bff044a6f5 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -57,7 +57,6 @@ struct thread_info {
 	__u32			flags;		/* low level flags */
 	__u32			status;		/* thread synchronous flags */
 	__u32			cpu;		/* current CPU */
-	mm_segment_t		addr_limit;
 };
 
 #define INIT_THREAD_INFO(tsk)			\
@@ -65,7 +64,6 @@ struct thread_info {
 	.task		= &tsk,			\
 	.flags		= 0,			\
 	.cpu		= 0,			\
-	.addr_limit	= KERNEL_DS,		\
 }
 
 #define init_thread_info	(init_thread_union.thread_info)
@@ -184,11 +182,6 @@ static inline unsigned long current_stack_pointer(void)
 # define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
 #endif
 
-/* Load thread_info address into "reg" */
-#define GET_THREAD_INFO(reg) \
-	_ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \
-	_ASM_SUB $(THREAD_SIZE),reg ;
-
 /*
  * ASM operand which evaluates to a 'thread_info' address of
  * the current task, if it is known that "reg" is exactly "off"
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 8f66e5655c23..c03bfb68c503 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -29,12 +29,12 @@
 #define USER_DS 	MAKE_MM_SEG(TASK_SIZE_MAX)
 
 #define get_ds()	(KERNEL_DS)
-#define get_fs()	(current_thread_info()->addr_limit)
-#define set_fs(x)	(current_thread_info()->addr_limit = (x))
+#define get_fs()	(current->thread.addr_limit)
+#define set_fs(x)	(current->thread.addr_limit = (x))
 
 #define segment_eq(a, b)	((a).seg == (b).seg)
 
-#define user_addr_max() (current_thread_info()->addr_limit.seg)
+#define user_addr_max() (current->thread.addr_limit.seg)
 #define __addr_ok(addr) 	\
 	((unsigned long __force)(addr) < user_addr_max())
 
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 674134e9f5e5..2bd5c6ff7ee7 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -31,7 +31,9 @@ void common(void) {
 	BLANK();
 	OFFSET(TI_flags, thread_info, flags);
 	OFFSET(TI_status, thread_info, status);
-	OFFSET(TI_addr_limit, thread_info, addr_limit);
+
+	BLANK();
+	OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
 
 	BLANK();
 	OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 2b0ef26da0bd..bf603ebbfd8e 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -17,11 +17,11 @@
 
 /* Standard copy_to_user with segment limit checking */
 ENTRY(_copy_to_user)
-	GET_THREAD_INFO(%rax)
+	mov PER_CPU_VAR(current_task), %rax
 	movq %rdi,%rcx
 	addq %rdx,%rcx
 	jc bad_to_user
-	cmpq TI_addr_limit(%rax),%rcx
+	cmpq TASK_addr_limit(%rax),%rcx
 	ja bad_to_user
 	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\
 		      "jmp copy_user_generic_string",		\
@@ -32,11 +32,11 @@ ENDPROC(_copy_to_user)
 
 /* Standard copy_from_user with segment limit checking */
 ENTRY(_copy_from_user)
-	GET_THREAD_INFO(%rax)
+	mov PER_CPU_VAR(current_task), %rax
 	movq %rsi,%rcx
 	addq %rdx,%rcx
 	jc bad_from_user
-	cmpq TI_addr_limit(%rax),%rcx
+	cmpq TASK_addr_limit(%rax),%rcx
 	ja bad_from_user
 	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\
 		      "jmp copy_user_generic_string",		\
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index 28a6654f0d08..b6fcb9a9ddbc 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -6,6 +6,7 @@
  */
 #include <asm/checksum.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 #include <asm/smap.h>
 
 /**
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 46668cda4ffd..0ef5128c2de8 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -35,8 +35,8 @@
 
 	.text
 ENTRY(__get_user_1)
-	GET_THREAD_INFO(%_ASM_DX)
-	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+	mov PER_CPU_VAR(current_task), %_ASM_DX
+	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
 	ASM_STAC
 1:	movzbl (%_ASM_AX),%edx
@@ -48,8 +48,8 @@ ENDPROC(__get_user_1)
 ENTRY(__get_user_2)
 	add $1,%_ASM_AX
 	jc bad_get_user
-	GET_THREAD_INFO(%_ASM_DX)
-	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+	mov PER_CPU_VAR(current_task), %_ASM_DX
+	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
 	ASM_STAC
 2:	movzwl -1(%_ASM_AX),%edx
@@ -61,8 +61,8 @@ ENDPROC(__get_user_2)
 ENTRY(__get_user_4)
 	add $3,%_ASM_AX
 	jc bad_get_user
-	GET_THREAD_INFO(%_ASM_DX)
-	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+	mov PER_CPU_VAR(current_task), %_ASM_DX
+	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
 	ASM_STAC
 3:	movl -3(%_ASM_AX),%edx
@@ -75,8 +75,8 @@ ENTRY(__get_user_8)
 #ifdef CONFIG_X86_64
 	add $7,%_ASM_AX
 	jc bad_get_user
-	GET_THREAD_INFO(%_ASM_DX)
-	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+	mov PER_CPU_VAR(current_task), %_ASM_DX
+	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
 	ASM_STAC
 4:	movq -7(%_ASM_AX),%rdx
@@ -86,8 +86,8 @@ ENTRY(__get_user_8)
 #else
 	add $7,%_ASM_AX
 	jc bad_get_user_8
-	GET_THREAD_INFO(%_ASM_DX)
-	cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
+	mov PER_CPU_VAR(current_task), %_ASM_DX
+	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user_8
 	ASM_STAC
 4:	movl -7(%_ASM_AX),%edx
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index e0817a12d323..c891ece81e5b 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -29,14 +29,14 @@
  * as they get called from within inline assembly.
  */
 
-#define ENTER	GET_THREAD_INFO(%_ASM_BX)
+#define ENTER	mov PER_CPU_VAR(current_task), %_ASM_BX
 #define EXIT	ASM_CLAC ;	\
 		ret
 
 .text
 ENTRY(__put_user_1)
 	ENTER
-	cmp TI_addr_limit(%_ASM_BX),%_ASM_CX
+	cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX
 	jae bad_put_user
 	ASM_STAC
 1:	movb %al,(%_ASM_CX)
@@ -46,7 +46,7 @@ ENDPROC(__put_user_1)
 
 ENTRY(__put_user_2)
 	ENTER
-	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
+	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $1,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
@@ -58,7 +58,7 @@ ENDPROC(__put_user_2)
 
 ENTRY(__put_user_4)
 	ENTER
-	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
+	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $3,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
@@ -70,7 +70,7 @@ ENDPROC(__put_user_4)
 
 ENTRY(__put_user_8)
 	ENTER
-	mov TI_addr_limit(%_ASM_BX),%_ASM_BX
+	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $7,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
 	jae bad_put_user
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 0a42327a59d7..9f760cdcaf40 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -6,7 +6,7 @@
  * Copyright 2002 Andi Kleen <ak@suse.de>
  */
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * Zero Userspace
diff --git a/drivers/pnp/isapnp/proc.c b/drivers/pnp/isapnp/proc.c
index 5edee645d890..262285e48a09 100644
--- a/drivers/pnp/isapnp/proc.c
+++ b/drivers/pnp/isapnp/proc.c
@@ -21,7 +21,7 @@
 #include <linux/isapnp.h>
 #include <linux/proc_fs.h>
 #include <linux/init.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 extern struct pnp_protocol isapnp_protocol;
 
diff --git a/lib/bitmap.c b/lib/bitmap.c
index c66da508cbf7..eca88087fa8a 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -14,9 +14,9 @@
 #include <linux/bug.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
+#include <linux/uaccess.h>
 
 #include <asm/page.h>
-#include <asm/uaccess.h>
 
 /*
  * bitmaps provide an array of bits, implemented using an an
-- 
cgit v1.2.3


From fb59831b496a5bb7d0a06c7e702d88d1757edfca Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 14 Jul 2016 13:22:58 -0700
Subject: x86/smp: Remove stack_smp_processor_id()

It serves no purpose -- raw_smp_processor_id() works fine.  This
change will be needed to move thread_info off the stack.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/a2bf4f07fbc30fb32f9f7f3f8f94ad3580823847.1468527351.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/cpu.h   | 1 -
 arch/x86/include/asm/smp.h   | 6 ------
 arch/x86/kernel/cpu/common.c | 2 +-
 3 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 678637ad7476..59d34c521d96 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -17,7 +17,6 @@ static inline void prefill_possible_map(void) {}
 
 #define cpu_physical_id(cpu)			boot_cpu_physical_apicid
 #define safe_smp_processor_id()			0
-#define stack_smp_processor_id()		0
 
 #endif /* CONFIG_SMP */
 
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 66b057306f40..0576b6157f3a 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -172,12 +172,6 @@ extern int safe_smp_processor_id(void);
 #elif defined(CONFIG_X86_64_SMP)
 #define raw_smp_processor_id() (this_cpu_read(cpu_number))
 
-#define stack_smp_processor_id()					\
-({								\
-	struct thread_info *ti;						\
-	__asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));	\
-	ti->cpu;							\
-})
 #define safe_smp_processor_id()		smp_processor_id()
 
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0fe6953f421c..d22a7b9c4f0e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1452,7 +1452,7 @@ void cpu_init(void)
 	struct task_struct *me;
 	struct tss_struct *t;
 	unsigned long v;
-	int cpu = stack_smp_processor_id();
+	int cpu = raw_smp_processor_id();
 	int i;
 
 	wait_for_master_cpu(cpu);
-- 
cgit v1.2.3


From eb43e8f85fffc1ba535e0362a872101dfe48abe3 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 14 Jul 2016 13:22:59 -0700
Subject: x86/smp: Remove unnecessary initialization of thread_info::cpu

It's statically initialized to zero -- no need to dynamically
initialize it to zero as well.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/6cf6314dce3051371a913ee19d1b88e29c68c560.1468527351.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/smpboot.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fafe8b923cac..0e91dbeca2fd 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1285,7 +1285,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	cpumask_copy(cpu_callin_mask, cpumask_of(0));
 	mb();
 
-	current_thread_info()->cpu = 0;  /* needed? */
 	for_each_possible_cpu(i) {
 		zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
 		zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
-- 
cgit v1.2.3


From 3ebfd81f7fb3e81a754e37283b7f38c62244641a Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Thu, 14 Jul 2016 12:31:53 -0700
Subject: x86/syscalls: Add compat_sys_preadv64v2/compat_sys_pwritev64v2

Don't use the same syscall numbers for 2 different syscalls:

 534	x32	preadv			compat_sys_preadv64
 535	x32	pwritev			compat_sys_pwritev64
 534	x32	preadv2			compat_sys_preadv2
 535	x32	pwritev2		compat_sys_pwritev2

Add compat_sys_preadv64v2() and compat_sys_pwritev64v2() so that 64-bit offset
is passed in one 64-bit register on x32, similar to compat_sys_preadv64()
and compat_sys_pwritev64().

Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/CAMe9rOovCMf-RQfx_n1U_Tu_DX1BYkjtFr%3DQ4-_PFVSj9BCzUA@mail.gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/syscalls/syscall_64.tbl |  4 ++--
 arch/x86/include/asm/unistd.h          |  2 ++
 fs/read_write.c                        | 18 ++++++++++++++++++
 3 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 555263e385c9..e9ce9c7c39b4 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -374,5 +374,5 @@
 543	x32	io_setup		compat_sys_io_setup
 544	x32	io_submit		compat_sys_io_submit
 545	x32	execveat		compat_sys_execveat/ptregs
-534	x32	preadv2			compat_sys_preadv2
-535	x32	pwritev2		compat_sys_pwritev2
+546	x32	preadv2			compat_sys_preadv64v2
+547	x32	pwritev2		compat_sys_pwritev64v2
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 2b19caa4081c..32712a925f26 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -26,6 +26,8 @@
 #  define __ARCH_WANT_COMPAT_SYS_GETDENTS64
 #  define __ARCH_WANT_COMPAT_SYS_PREADV64
 #  define __ARCH_WANT_COMPAT_SYS_PWRITEV64
+#  define __ARCH_WANT_COMPAT_SYS_PREADV64V2
+#  define __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
 
 # endif
 
diff --git a/fs/read_write.c b/fs/read_write.c
index 933b53a375b4..66215a7b17cf 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1168,6 +1168,15 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
 	return do_compat_preadv64(fd, vec, vlen, pos, 0);
 }
 
+#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
+COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
+		const struct compat_iovec __user *,vec,
+		unsigned long, vlen, loff_t, pos, int, flags)
+{
+	return do_compat_preadv64(fd, vec, vlen, pos, flags);
+}
+#endif
+
 COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
 		const struct compat_iovec __user *,vec,
 		compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
@@ -1265,6 +1274,15 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
 	return do_compat_pwritev64(fd, vec, vlen, pos, 0);
 }
 
+#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
+COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
+		const struct compat_iovec __user *,vec,
+		unsigned long, vlen, loff_t, pos, int, flags)
+{
+	return do_compat_pwritev64(fd, vec, vlen, pos, flags);
+}
+#endif
+
 COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
 		const struct compat_iovec __user *,vec,
 		compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags)
-- 
cgit v1.2.3


From c48ec42d6eae08f55685ab660f0743ed33b9f22a Mon Sep 17 00:00:00 2001
From: Wei Jiangang <weijg.fnst@cn.fujitsu.com>
Date: Fri, 15 Jul 2016 16:12:10 +0800
Subject: x86/tsc: Remove the unused check_tsc_disabled()

check_tsc_disabled() was introduced by commit:

  c73deb6aecda ("perf/x86: Add ability to calculate TSC from perf sample timestamps")

The only caller was arch_perf_update_userpage(), which had been refactored
by commit:

  d8b11a0cbd1c ("perf/x86: Clean up cap_user_time* setting")

... so no need keep and export it any more.

Signed-off-by: Wei Jiangang <weijg.fnst@cn.fujitsu.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: a.p.zijlstra@chello.nl
Cc: adrian.hunter@intel.com
Cc: bp@suse.de
Link: http://lkml.kernel.org/r/1468570330-25810-1-git-send-email-weijg.fnst@cn.fujitsu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/tsc.h | 1 -
 arch/x86/kernel/tsc.c      | 6 ------
 2 files changed, 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index a30591e1567c..33b6365c22fe 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -35,7 +35,6 @@ extern void tsc_init(void);
 extern void mark_tsc_unstable(char *reason);
 extern int unsynchronized_tsc(void);
 extern int check_tsc_unstable(void);
-extern int check_tsc_disabled(void);
 extern unsigned long native_calibrate_cpu(void);
 extern unsigned long native_calibrate_tsc(void);
 extern unsigned long long native_sched_clock_from_tsc(u64 tsc);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 2a952fcb1516..a804b5ab32d0 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -335,12 +335,6 @@ int check_tsc_unstable(void)
 }
 EXPORT_SYMBOL_GPL(check_tsc_unstable);
 
-int check_tsc_disabled(void)
-{
-	return tsc_disabled;
-}
-EXPORT_SYMBOL_GPL(check_tsc_disabled);
-
 #ifdef CONFIG_X86_TSC
 int __init notsc_setup(char *str)
 {
-- 
cgit v1.2.3


From 102bb9fef68a21f357dc813d4792666c8295bc35 Mon Sep 17 00:00:00 2001
From: Wei Jiangang <weijg.fnst@cn.fujitsu.com>
Date: Thu, 14 Jul 2016 10:24:06 +0800
Subject: x86/apic: Remove the unused struct apic::apic_id_mask field

The only user verify_local_APIC() had been removed by commit:

  4399c03c6780 ("x86/apic: Remove verify_local_APIC()")

... so there is no need to keep it.

Signed-off-by: Wei Jiangang <weijg.fnst@cn.fujitsu.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: boris.ostrovsky@oracle.com
Cc: bsd@redhat.com
Cc: david.vrabel@citrix.com
Cc: jgross@suse.com
Cc: konrad.wilk@oracle.com
Cc: xen-devel@lists.xenproject.org
Link: http://lkml.kernel.org/r/1468463046-20849-1-git-send-email-weijg.fnst@cn.fujitsu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/apic.h           | 1 -
 arch/x86/kernel/apic/apic_flat_64.c   | 2 --
 arch/x86/kernel/apic/apic_noop.c      | 1 -
 arch/x86/kernel/apic/apic_numachip.c  | 2 --
 arch/x86/kernel/apic/bigsmp_32.c      | 1 -
 arch/x86/kernel/apic/probe_32.c       | 1 -
 arch/x86/kernel/apic/x2apic_cluster.c | 1 -
 arch/x86/kernel/apic/x2apic_phys.c    | 1 -
 arch/x86/kernel/apic/x2apic_uv_x.c    | 1 -
 arch/x86/xen/apic.c                   | 1 -
 10 files changed, 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index bc27611fa58f..f5befd4945f2 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -300,7 +300,6 @@ struct apic {
 
 	unsigned int (*get_apic_id)(unsigned long x);
 	unsigned long (*set_apic_id)(unsigned int id);
-	unsigned long apic_id_mask;
 
 	int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
 				      const struct cpumask *andmask,
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 76f89e2b245a..048747778d37 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -181,7 +181,6 @@ static struct apic apic_flat =  {
 
 	.get_apic_id			= flat_get_apic_id,
 	.set_apic_id			= set_apic_id,
-	.apic_id_mask			= 0xFFu << 24,
 
 	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
@@ -278,7 +277,6 @@ static struct apic apic_physflat =  {
 
 	.get_apic_id			= flat_get_apic_id,
 	.set_apic_id			= set_apic_id,
-	.apic_id_mask			= 0xFFu << 24,
 
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 13d19ed58514..2cebf59092d8 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -141,7 +141,6 @@ struct apic apic_noop = {
 
 	.get_apic_id			= noop_get_apic_id,
 	.set_apic_id			= NULL,
-	.apic_id_mask			= 0x0F << 24,
 
 	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index ab5c2c685a3c..714d4fda0d52 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -269,7 +269,6 @@ static const struct apic apic_numachip1 __refconst = {
 
 	.get_apic_id			= numachip1_get_apic_id,
 	.set_apic_id			= numachip1_set_apic_id,
-	.apic_id_mask			= 0xffU << 24,
 
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
@@ -321,7 +320,6 @@ static const struct apic apic_numachip2 __refconst = {
 
 	.get_apic_id			= numachip2_get_apic_id,
 	.set_apic_id			= numachip2_set_apic_id,
-	.apic_id_mask			= 0xffU << 24,
 
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index cf9bd896c12d..06dbaa458bfe 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -171,7 +171,6 @@ static struct apic apic_bigsmp = {
 
 	.get_apic_id			= bigsmp_get_apic_id,
 	.set_apic_id			= NULL,
-	.apic_id_mask			= 0xFF << 24,
 
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index f316e34abb42..93edfa01b408 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -101,7 +101,6 @@ static struct apic apic_default = {
 
 	.get_apic_id			= default_get_apic_id,
 	.set_apic_id			= NULL,
-	.apic_id_mask			= 0x0F << 24,
 
 	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index aca8b75c1552..24170d0809ba 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -270,7 +270,6 @@ static struct apic apic_x2apic_cluster = {
 
 	.get_apic_id			= x2apic_get_apic_id,
 	.set_apic_id			= x2apic_set_apic_id,
-	.apic_id_mask			= 0xFFFFFFFFu,
 
 	.cpu_mask_to_apicid_and		= x2apic_cpu_mask_to_apicid_and,
 
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index a1242e2c12e6..4f13f54f1b1f 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -126,7 +126,6 @@ static struct apic apic_x2apic_phys = {
 
 	.get_apic_id			= x2apic_get_apic_id,
 	.set_apic_id			= x2apic_set_apic_id,
-	.apic_id_mask			= 0xFFFFFFFFu,
 
 	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
 
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 29003154fafd..5a58c917179c 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -582,7 +582,6 @@ static struct apic __refdata apic_x2apic_uv_x = {
 
 	.get_apic_id			= x2apic_get_apic_id,
 	.set_apic_id			= set_apic_id,
-	.apic_id_mask			= 0xFFFFFFFFu,
 
 	.cpu_mask_to_apicid_and		= uv_cpu_mask_to_apicid_and,
 
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index db52a7fafcc2..44c88ad1841a 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -177,7 +177,6 @@ static struct apic xen_pv_apic = {
 
 	.get_apic_id 			= xen_get_apic_id,
 	.set_apic_id 			= xen_set_apic_id, /* Can be NULL on 32-bit. */
-	.apic_id_mask			= 0xFF << 24, /* Used by verify_local_APIC. Match with what xen_get_apic_id does. */
 
 	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
 
-- 
cgit v1.2.3


From 6bd80f372371a7b3f5ff13e4e8a560066299c001 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 6 Jun 2016 09:43:00 +0200
Subject: m68k/defconfig: Update defconfigs for v4.7-rc2

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/configs/amiga_defconfig    | 4 ++++
 arch/m68k/configs/apollo_defconfig   | 4 ++++
 arch/m68k/configs/atari_defconfig    | 4 ++++
 arch/m68k/configs/bvme6000_defconfig | 4 ++++
 arch/m68k/configs/hp300_defconfig    | 4 ++++
 arch/m68k/configs/mac_defconfig      | 4 ++++
 arch/m68k/configs/multi_defconfig    | 4 ++++
 arch/m68k/configs/mvme147_defconfig  | 4 ++++
 arch/m68k/configs/mvme16x_defconfig  | 4 ++++
 arch/m68k/configs/q40_defconfig      | 4 ++++
 arch/m68k/configs/sun3_defconfig     | 4 ++++
 arch/m68k/configs/sun3x_defconfig    | 4 ++++
 12 files changed, 48 insertions(+)

(limited to 'arch')

diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 3ee6976f6088..8f5b6f7dd136 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -359,6 +360,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -553,7 +555,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index e96787ffcbce..31bded9c83d4 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -341,6 +342,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -512,7 +514,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 083fe6beac14..0d7739e04ae2 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -350,6 +351,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -533,7 +535,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 475130c06dcb..2cbb5c465fec 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -340,6 +341,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -504,7 +506,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 4339658c200f..96102a42c156 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -341,6 +342,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -514,7 +516,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 831cc8c3a2e2..97d88f7dc5a7 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -357,6 +358,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -536,7 +538,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 6377afeb522b..be25ef208f0f 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -390,6 +391,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -616,7 +618,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 4304b3d56262..a008344360c9 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -339,6 +340,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -504,7 +506,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 074bda4094ff..6735a25f36d4 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -340,6 +341,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -504,7 +506,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 07b9fa8d7f2e..780c6e9f6cf9 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -346,6 +347,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -527,7 +529,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 36e6fae02d45..44693cf361e5 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -337,6 +338,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -506,7 +508,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index 903acf929511..ef0071d61158 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -9,6 +9,7 @@ CONFIG_LOG_BUF_SHIFT=16
 # CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_USERFAULTFD=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
@@ -337,6 +338,7 @@ CONFIG_MACVTAP=m
 CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_GENEVE=m
+CONFIG_GTP=m
 CONFIG_MACSEC=m
 CONFIG_NETCONSOLE=m
 CONFIG_NETCONSOLE_DYNAMIC=y
@@ -506,7 +508,9 @@ CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
 CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
 CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_HASH=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
-- 
cgit v1.2.3


From 6f6ef07f412c5bfc37cde57e94b1fec789471907 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 20 Jul 2016 11:30:34 +0300
Subject: x86/insn: perf tools: Fix vcvtph2ps instruction decoding

vcvtph2ps does not have an immediate operand, so remove the erroneous
'Ib' from its opcode map entry. Add vcvtph2ps to the perf tools new
instructions test to verify it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: X86 ML <x86@kernel.org>
Link: http://lkml.kernel.org/r/1469003437-32706-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/x86/lib/x86-opcode-map.txt                     |  2 +-
 tools/perf/arch/x86/tests/insn-x86-dat-32.c         | 10 ++++++----
 tools/perf/arch/x86/tests/insn-x86-dat-64.c         | 10 ++++++----
 tools/perf/arch/x86/tests/insn-x86-dat-src.c        |  4 ++++
 tools/perf/util/intel-pt-decoder/x86-opcode-map.txt |  2 +-
 5 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index d388de72eaca..28082de46f0d 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -629,7 +629,7 @@ AVXcode: 2
 10: pblendvb Vdq,Wdq (66)
 11:
 12:
-13: vcvtph2ps Vx,Wx,Ib (66),(v)
+13: vcvtph2ps Vx,Wx (66),(v)
 14: blendvps Vdq,Wdq (66)
 15: blendvpd Vdq,Wdq (66)
 16: vpermps Vqq,Hqq,Wqq (66),(v)
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
index 3b491cfe204e..ca08e6eb988f 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
@@ -6,6 +6,8 @@
 
 {{0x0f, 0x31, }, 2, 0, "", "",
 "0f 31                \trdtsc  ",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb       \tvcvtph2ps %xmm3,%ymm5",},
 {{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
 "f3 0f 1b 00          \tbndmk  (%eax),%bnd0",},
 {{0xf3, 0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
@@ -309,19 +311,19 @@
 {{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
 "0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%eax,%ecx,1)",},
 {{0xf2, 0xe8, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "call", "unconditional",
-"f2 e8 fc ff ff ff    \tbnd call 3c3 <main+0x3c3>",},
+"f2 e8 fc ff ff ff    \tbnd call 3c8 <main+0x3c8>",},
 {{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect",
 "f2 ff 10             \tbnd call *(%eax)",},
 {{0xf2, 0xc3, }, 2, 0, "ret", "indirect",
 "f2 c3                \tbnd ret ",},
 {{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional",
-"f2 e9 fc ff ff ff    \tbnd jmp 3ce <main+0x3ce>",},
+"f2 e9 fc ff ff ff    \tbnd jmp 3d3 <main+0x3d3>",},
 {{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional",
-"f2 e9 fc ff ff ff    \tbnd jmp 3d4 <main+0x3d4>",},
+"f2 e9 fc ff ff ff    \tbnd jmp 3d9 <main+0x3d9>",},
 {{0xf2, 0xff, 0x21, }, 3, 0, "jmp", "indirect",
 "f2 ff 21             \tbnd jmp *(%ecx)",},
 {{0xf2, 0x0f, 0x85, 0xfc, 0xff, 0xff, 0xff, }, 7, 0xfffffffc, "jcc", "conditional",
-"f2 0f 85 fc ff ff ff \tbnd jne 3de <main+0x3de>",},
+"f2 0f 85 fc ff ff ff \tbnd jne 3e3 <main+0x3e3>",},
 {{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "",
 "0f 3a cc c1 00       \tsha1rnds4 $0x0,%xmm1,%xmm0",},
 {{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
index 4fe7cce179c4..262d9d25a6fa 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
@@ -6,6 +6,8 @@
 
 {{0x0f, 0x31, }, 2, 0, "", "",
 "0f 31                \trdtsc  ",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb       \tvcvtph2ps %xmm3,%ymm5",},
 {{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
 "f3 0f 1b 00          \tbndmk  (%rax),%bnd0",},
 {{0xf3, 0x41, 0x0f, 0x1b, 0x00, }, 5, 0, "", "",
@@ -325,19 +327,19 @@
 {{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
 "0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%rax,%rcx,1)",},
 {{0xf2, 0xe8, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "call", "unconditional",
-"f2 e8 00 00 00 00    \tbnd callq 3f6 <main+0x3f6>",},
+"f2 e8 00 00 00 00    \tbnd callq 3fb <main+0x3fb>",},
 {{0x67, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect",
 "67 f2 ff 10          \tbnd callq *(%eax)",},
 {{0xf2, 0xc3, }, 2, 0, "ret", "indirect",
 "f2 c3                \tbnd retq ",},
 {{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional",
-"f2 e9 00 00 00 00    \tbnd jmpq 402 <main+0x402>",},
+"f2 e9 00 00 00 00    \tbnd jmpq 407 <main+0x407>",},
 {{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional",
-"f2 e9 00 00 00 00    \tbnd jmpq 408 <main+0x408>",},
+"f2 e9 00 00 00 00    \tbnd jmpq 40d <main+0x40d>",},
 {{0x67, 0xf2, 0xff, 0x21, }, 4, 0, "jmp", "indirect",
 "67 f2 ff 21          \tbnd jmpq *(%ecx)",},
 {{0xf2, 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, }, 7, 0, "jcc", "conditional",
-"f2 0f 85 00 00 00 00 \tbnd jne 413 <main+0x413>",},
+"f2 0f 85 00 00 00 00 \tbnd jne 418 <main+0x418>",},
 {{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "",
 "0f 3a cc c1 00       \tsha1rnds4 $0x0,%xmm1,%xmm0",},
 {{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
index 41b1b1c62660..3cd677513e9e 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
@@ -19,6 +19,10 @@ int main(void)
 	/* Following line is a marker for the awk script - do not change */
 	asm volatile("rdtsc"); /* Start here */
 
+	/* Test fix for vcvtph2ps in x86-opcode-map.txt */
+
+	asm volatile("vcvtph2ps %xmm3,%ymm5");
+
 #ifdef __x86_64__
 
 	/* bndmk m64, bnd */
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
index d388de72eaca..28082de46f0d 100644
--- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -629,7 +629,7 @@ AVXcode: 2
 10: pblendvb Vdq,Wdq (66)
 11:
 12:
-13: vcvtph2ps Vx,Wx,Ib (66),(v)
+13: vcvtph2ps Vx,Wx (66),(v)
 14: blendvps Vdq,Wdq (66)
 15: blendvpd Vdq,Wdq (66)
 16: vpermps Vqq,Hqq,Wqq (66),(v)
-- 
cgit v1.2.3


From edce21216a8887bf06ba85ee49a00695e44c4341 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 21 Jul 2016 09:53:52 +0200
Subject: x86/boot: Reorganize and clean up the BIOS area reservation code

So the reserve_ebda_region() code has accumulated a number of
problems over the years that make it really difficult to read
and understand:

- The calculation of 'lowmem' and 'ebda_addr' is an unnecessarily
  interleaved mess of first lowmem, then ebda_addr, then lowmem tweaks...

- 'lowmem' here means 'super low mem' - i.e. 16-bit addressable memory. In other
  parts of the x86 code 'lowmem' means 32-bit addressable memory... This makes it
  super confusing to read.

- It does not help at all that we have various memory range markers, half of which
  are 'start of range', half of which are 'end of range' - but this crucial
  property is not obvious in the naming at all ... gave me a headache trying to
  understand all this.

- Also, the 'ebda_addr' name sucks: it highlights that it's an address (which is
  obvious, all values here are addresses!), while it does not highlight that it's
  the _start_ of the EBDA region ...

- 'BIOS_LOWMEM_KILOBYTES' says a lot of things, except that this is the only value
  that is a pointer to a value, not a memory range address!

- The function name itself is a misnomer: it says 'reserve_ebda_region()' while
  its main purpose is to reserve all the firmware ROM typically between 640K and
  1MB, while the 'EBDA' part is only a small part of that ...

- Likewise, the paravirt quirk flag name 'ebda_search' is misleading as well: this
  too should be about whether to reserve firmware areas in the paravirt case.

- In fact thinking about this as 'end of RAM' is confusing: what this function
  *really* wants to reserve is firmware data and code areas! Once the thinking is
  inverted from a mixed 'ram' and 'reserved firmware area' notion to a pure
  'reserved area' notion everything becomes a lot clearer.

To improve all this rewrite the whole code (without changing the logic):

- Firstly invert the naming from 'lowmem end' to 'BIOS reserved area start'
  and propagate this concept through all the variable names and constants.

	BIOS_RAM_SIZE_KB_PTR		// was: BIOS_LOWMEM_KILOBYTES

	BIOS_START_MIN			// was: INSANE_CUTOFF

	ebda_start			// was: ebda_addr
	bios_start			// was: lowmem

	BIOS_START_MAX			// was: LOWMEM_CAP

- Then clean up the name of the function itself by renaming it
  to reserve_bios_regions() and renaming the ::ebda_search paravirt
  flag to ::reserve_bios_regions.

- Fix up all the comments (fix typos), harmonize and simplify their
  formulation and remove comments that become unnecessary due to
  the much better naming all around.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/bios_ebda.h  |   2 +-
 arch/x86/include/asm/x86_init.h   |   4 +-
 arch/x86/kernel/ebda.c            | 124 +++++++++++++++++++++++++-------------
 arch/x86/kernel/head32.c          |   2 +-
 arch/x86/kernel/head64.c          |   2 +-
 arch/x86/kernel/platform-quirks.c |   4 +-
 6 files changed, 88 insertions(+), 50 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h
index 2b00c776f223..4b7b8e71607e 100644
--- a/arch/x86/include/asm/bios_ebda.h
+++ b/arch/x86/include/asm/bios_ebda.h
@@ -17,7 +17,7 @@ static inline unsigned int get_bios_ebda(void)
 	return address;	/* 0 means none */
 }
 
-void reserve_ebda_region(void);
+void reserve_bios_regions(void);
 
 #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
 /*
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 4dcdf74dfed8..c519c052700a 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -168,14 +168,14 @@ struct x86_legacy_devices {
  * struct x86_legacy_features - legacy x86 features
  *
  * @rtc: this device has a CMOS real-time clock present
- * @ebda_search: it's safe to search for the EBDA signature in the hardware's
+ * @reserve_bios_regions: it's safe to search for the EBDA signature in the hardware's
  * 	low RAM
  * @devices: legacy x86 devices, refer to struct x86_legacy_devices
  * 	documentation for further details.
  */
 struct x86_legacy_features {
 	int rtc;
-	int ebda_search;
+	int reserve_bios_regions;
 	struct x86_legacy_devices devices;
 };
 
diff --git a/arch/x86/kernel/ebda.c b/arch/x86/kernel/ebda.c
index afe65dffee80..6219eef20e2e 100644
--- a/arch/x86/kernel/ebda.c
+++ b/arch/x86/kernel/ebda.c
@@ -6,66 +6,104 @@
 #include <asm/bios_ebda.h>
 
 /*
+ * This function reserves all conventional PC system BIOS related
+ * firmware memory areas (some of which are data, some of which
+ * are code), that must not be used by the kernel as available
+ * RAM.
+ *
  * The BIOS places the EBDA/XBDA at the top of conventional
  * memory, and usually decreases the reported amount of
- * conventional memory (int 0x12) too. This also contains a
- * workaround for Dell systems that neglect to reserve EBDA.
- * The same workaround also avoids a problem with the AMD768MPX
- * chipset: reserve a page before VGA to prevent PCI prefetch
- * into it (errata #56). Usually the page is reserved anyways,
- * unless you have no PS/2 mouse plugged in.
+ * conventional memory (int 0x12) too.
+ *
+ * This means that as a first approximation on most systems we can
+ * guess the reserved BIOS area by looking at the low BIOS RAM size
+ * value and assume that everything above that value (up to 1MB) is
+ * reserved.
+ *
+ * But life in firmware country is not that simple:
+ *
+ * - This code also contains a quirk for Dell systems that neglect
+ *   to reserve the EBDA area in the 'RAM size' value ...
+ *
+ * - The same quirk also avoids a problem with the AMD768MPX
+ *   chipset: reserve a page before VGA to prevent PCI prefetch
+ *   into it (errata #56). (Usually the page is reserved anyways,
+ *   unless you have no PS/2 mouse plugged in.)
+ *
+ * - Plus paravirt systems don't have a reliable value in the
+ *   'BIOS RAM size' pointer we can rely on, so we must quirk
+ *   them too.
+ *
+ * Due to those various problems this function is deliberately
+ * very conservative and tries to err on the side of reserving
+ * too much, to not risk reserving too little.
+ *
+ * Losing a small amount of memory in the bottom megabyte is
+ * rarely a problem, as long as we have enough memory to install
+ * the SMP bootup trampoline which *must* be in this area.
  *
- * This functions is deliberately very conservative.  Losing
- * memory in the bottom megabyte is rarely a problem, as long
- * as we have enough memory to install the trampoline.  Using
- * memory that is in use by the BIOS or by some DMA device
- * the BIOS didn't shut down *is* a big problem.
+ * Using memory that is in use by the BIOS or by some DMA device
+ * the BIOS didn't shut down *is* a big problem to the kernel,
+ * obviously.
  */
 
-#define BIOS_LOWMEM_KILOBYTES	0x413
-#define LOWMEM_CAP		0x9f000U	/* Absolute maximum */
-#define INSANE_CUTOFF		0x20000U	/* Less than this = insane */
+#define BIOS_RAM_SIZE_KB_PTR	0x413
 
-void __init reserve_ebda_region(void)
+#define BIOS_START_MIN		0x20000U	/* 128K, less than this is insane */
+#define BIOS_START_MAX		0x9f000U	/* 640K, absolute maximum */
+
+void __init reserve_bios_regions(void)
 {
-	unsigned int lowmem, ebda_addr;
+	unsigned int bios_start, ebda_start;
 
 	/*
-	 * To determine the position of the EBDA and the
-	 * end of conventional memory, we need to look at
-	 * the BIOS data area. In a paravirtual environment
-	 * that area is absent. We'll just have to assume
-	 * that the paravirt case can handle memory setup
-	 * correctly, without our help.
+	 * NOTE: In a paravirtual environment the BIOS reserved
+	 * area is absent. We'll just have to assume that the
+	 * paravirt case can handle memory setup correctly,
+	 * without our help.
 	 */
-	if (!x86_platform.legacy.ebda_search)
+	if (!x86_platform.legacy.reserve_bios_regions)
 		return;
 
-	/* end of low (conventional) memory */
-	lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
-	lowmem <<= 10;
-
-	/* start of EBDA area */
-	ebda_addr = get_bios_ebda();
+	/* Get the start address of the EBDA page: */
+	ebda_start = get_bios_ebda();
 
 	/*
-	 * Note: some old Dells seem to need 4k EBDA without
-	 * reporting so, so just consider the memory above 0x9f000
-	 * to be off limits (bugzilla 2990).
+	 * Quirk: some old Dells seem to have a 4k EBDA without
+	 * reporting so in their BIOS RAM size value, so just
+	 * consider the memory above 640K to be off limits
+	 * (bugzilla 2990).
+	 *
+	 * We detect this case by filtering for nonsensical EBDA
+	 * addresses below 128K, where we can assume that they
+	 * are bogus and bump it up to a fixed 640K value:
 	 */
+	if (ebda_start < BIOS_START_MIN)
+		ebda_start = BIOS_START_MAX;
 
-	/* If the EBDA address is below 128K, assume it is bogus */
-	if (ebda_addr < INSANE_CUTOFF)
-		ebda_addr = LOWMEM_CAP;
+	/*
+	 * BIOS RAM size is encoded in kilobytes, convert it
+	 * to bytes to get a first guess at where the BIOS
+	 * firmware area starts:
+	 */
+	bios_start = *(unsigned short *)__va(BIOS_RAM_SIZE_KB_PTR);
+	bios_start <<= 10;
 
-	/* If lowmem is less than 128K, assume it is bogus */
-	if (lowmem < INSANE_CUTOFF)
-		lowmem = LOWMEM_CAP;
+	/*
+	 * If bios_start is less than 128K, assume it is bogus
+	 * and bump it up to 640K:
+	 */
+	if (bios_start < BIOS_START_MIN)
+		bios_start = BIOS_START_MAX;
 
-	/* Use the lower of the lowmem and EBDA markers as the cutoff */
-	lowmem = min(lowmem, ebda_addr);
-	lowmem = min(lowmem, LOWMEM_CAP); /* Absolute cap */
+	/*
+	 * Use the lower of the bios_start and ebda_start
+	 * as the starting point, but don't allow it to
+	 * go beyond 640K:
+	 */
+	bios_start = min(bios_start, ebda_start);
+	bios_start = min(bios_start, BIOS_START_MAX);
 
-	/* reserve all memory between lowmem and the 1MB mark */
-	memblock_reserve(lowmem, 0x100000 - lowmem);
+	/* Reserve all memory between bios_start and the 1MB mark: */
+	memblock_reserve(bios_start, 0x100000 - bios_start);
 }
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index d784bb547a9d..2dda0bc4576e 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -26,7 +26,7 @@ static void __init i386_default_early_setup(void)
 	x86_init.resources.reserve_resources = i386_reserve_resources;
 	x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
 
-	reserve_ebda_region();
+	reserve_bios_regions();
 }
 
 asmlinkage __visible void __init i386_start_kernel(void)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index b72fb0b71dd1..99d48e7d2974 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -183,7 +183,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
 		copy_bootdata(__va(real_mode_data));
 
 	x86_early_init_platform_quirks();
-	reserve_ebda_region();
+	reserve_bios_regions();
 
 	switch (boot_params.hdr.hardware_subarch) {
 	case X86_SUBARCH_INTEL_MID:
diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c
index b2f8a33b36ff..24a50301f150 100644
--- a/arch/x86/kernel/platform-quirks.c
+++ b/arch/x86/kernel/platform-quirks.c
@@ -7,12 +7,12 @@
 void __init x86_early_init_platform_quirks(void)
 {
 	x86_platform.legacy.rtc = 1;
-	x86_platform.legacy.ebda_search = 0;
+	x86_platform.legacy.reserve_bios_regions = 0;
 	x86_platform.legacy.devices.pnpbios = 1;
 
 	switch (boot_params.hdr.hardware_subarch) {
 	case X86_SUBARCH_PC:
-		x86_platform.legacy.ebda_search = 1;
+		x86_platform.legacy.reserve_bios_regions = 1;
 		break;
 	case X86_SUBARCH_XEN:
 	case X86_SUBARCH_LGUEST:
-- 
cgit v1.2.3


From 25af37f4e1e0a747824e3713b80d6b97dad28b7c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 20 Jul 2016 11:30:35 +0300
Subject: x86/insn: Add AVX-512 support to the instruction decoder

Add support for Intel's AVX-512 instructions to the instruction decoder.

AVX-512 instructions are documented in Intel Architecture Instruction
Set Extensions Programming Reference (February 2016).

AVX-512 instructions are identified by a EVEX prefix which, for the
purpose of instruction decoding, can be treated as though it were a
4-byte VEX prefix.

Existing instructions which can now accept an EVEX prefix need not be
further annotated in the op code map (x86-opcode-map.txt). In the case
of new instructions, the op code map is updated accordingly.

Also add associated Mask Instructions that are used to manipulate mask
registers used in AVX-512 instructions.

The 'perf tools' instruction decoder is updated in a subsequent patch.
And a representative set of instructions is added to the perf tools new
instructions test in a subsequent patch.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: X86 ML <x86@kernel.org>
Link: http://lkml.kernel.org/r/1469003437-32706-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/x86/include/asm/inat.h          |  17 ++-
 arch/x86/include/asm/insn.h          |  12 +-
 arch/x86/lib/insn.c                  |  18 ++-
 arch/x86/lib/x86-opcode-map.txt      | 263 +++++++++++++++++++++++------------
 arch/x86/tools/gen-insn-attr-x86.awk |  11 +-
 5 files changed, 220 insertions(+), 101 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index 74a2e312e8a2..02aff0867211 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -48,6 +48,7 @@
 /* AVX VEX prefixes */
 #define INAT_PFX_VEX2	13	/* 2-bytes VEX prefix */
 #define INAT_PFX_VEX3	14	/* 3-bytes VEX prefix */
+#define INAT_PFX_EVEX	15	/* EVEX prefix */
 
 #define INAT_LSTPFX_MAX	3
 #define INAT_LGCPFX_MAX	11
@@ -89,6 +90,7 @@
 #define INAT_VARIANT	(1 << (INAT_FLAG_OFFS + 4))
 #define INAT_VEXOK	(1 << (INAT_FLAG_OFFS + 5))
 #define INAT_VEXONLY	(1 << (INAT_FLAG_OFFS + 6))
+#define INAT_EVEXONLY	(1 << (INAT_FLAG_OFFS + 7))
 /* Attribute making macros for attribute tables */
 #define INAT_MAKE_PREFIX(pfx)	(pfx << INAT_PFX_OFFS)
 #define INAT_MAKE_ESCAPE(esc)	(esc << INAT_ESC_OFFS)
@@ -141,7 +143,13 @@ static inline int inat_last_prefix_id(insn_attr_t attr)
 static inline int inat_is_vex_prefix(insn_attr_t attr)
 {
 	attr &= INAT_PFX_MASK;
-	return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3;
+	return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 ||
+	       attr == INAT_PFX_EVEX;
+}
+
+static inline int inat_is_evex_prefix(insn_attr_t attr)
+{
+	return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX;
 }
 
 static inline int inat_is_vex3_prefix(insn_attr_t attr)
@@ -216,6 +224,11 @@ static inline int inat_accept_vex(insn_attr_t attr)
 
 static inline int inat_must_vex(insn_attr_t attr)
 {
-	return attr & INAT_VEXONLY;
+	return attr & (INAT_VEXONLY | INAT_EVEXONLY);
+}
+
+static inline int inat_must_evex(insn_attr_t attr)
+{
+	return attr & INAT_EVEXONLY;
 }
 #endif
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index e7814b74caf8..b3e32b010ab1 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -91,6 +91,7 @@ struct insn {
 #define X86_VEX_B(vex)	((vex) & 0x20)	/* VEX3 Byte1 */
 #define X86_VEX_L(vex)	((vex) & 0x04)	/* VEX3 Byte2, VEX2 Byte1 */
 /* VEX bit fields */
+#define X86_EVEX_M(vex)	((vex) & 0x03)		/* EVEX Byte1 */
 #define X86_VEX3_M(vex)	((vex) & 0x1f)		/* VEX3 Byte1 */
 #define X86_VEX2_M	1			/* VEX2.M always 1 */
 #define X86_VEX_V(vex)	(((vex) & 0x78) >> 3)	/* VEX3 Byte2, VEX2 Byte1 */
@@ -133,6 +134,13 @@ static inline int insn_is_avx(struct insn *insn)
 	return (insn->vex_prefix.value != 0);
 }
 
+static inline int insn_is_evex(struct insn *insn)
+{
+	if (!insn->prefixes.got)
+		insn_get_prefixes(insn);
+	return (insn->vex_prefix.nbytes == 4);
+}
+
 /* Ensure this instruction is decoded completely */
 static inline int insn_complete(struct insn *insn)
 {
@@ -144,8 +152,10 @@ static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
 {
 	if (insn->vex_prefix.nbytes == 2)	/* 2 bytes VEX */
 		return X86_VEX2_M;
-	else
+	else if (insn->vex_prefix.nbytes == 3)	/* 3 bytes VEX */
 		return X86_VEX3_M(insn->vex_prefix.bytes[1]);
+	else					/* EVEX */
+		return X86_EVEX_M(insn->vex_prefix.bytes[1]);
 }
 
 static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 1a416935bac9..1088eb8f3a5f 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -155,14 +155,24 @@ found:
 			/*
 			 * In 32-bits mode, if the [7:6] bits (mod bits of
 			 * ModRM) on the second byte are not 11b, it is
-			 * LDS or LES.
+			 * LDS or LES or BOUND.
 			 */
 			if (X86_MODRM_MOD(b2) != 3)
 				goto vex_end;
 		}
 		insn->vex_prefix.bytes[0] = b;
 		insn->vex_prefix.bytes[1] = b2;
-		if (inat_is_vex3_prefix(attr)) {
+		if (inat_is_evex_prefix(attr)) {
+			b2 = peek_nbyte_next(insn_byte_t, insn, 2);
+			insn->vex_prefix.bytes[2] = b2;
+			b2 = peek_nbyte_next(insn_byte_t, insn, 3);
+			insn->vex_prefix.bytes[3] = b2;
+			insn->vex_prefix.nbytes = 4;
+			insn->next_byte += 4;
+			if (insn->x86_64 && X86_VEX_W(b2))
+				/* VEX.W overrides opnd_size */
+				insn->opnd_bytes = 8;
+		} else if (inat_is_vex3_prefix(attr)) {
 			b2 = peek_nbyte_next(insn_byte_t, insn, 2);
 			insn->vex_prefix.bytes[2] = b2;
 			insn->vex_prefix.nbytes = 3;
@@ -221,7 +231,9 @@ void insn_get_opcode(struct insn *insn)
 		m = insn_vex_m_bits(insn);
 		p = insn_vex_p_bits(insn);
 		insn->attr = inat_get_avx_attribute(op, m, p);
-		if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
+		if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
+		    (!inat_accept_vex(insn->attr) &&
+		     !inat_is_group(insn->attr)))
 			insn->attr = 0;	/* This instruction is bad */
 		goto end;	/* VEX has only 1 byte for opcode */
 	}
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 28082de46f0d..ec378cd7b71e 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -13,12 +13,17 @@
 # opcode: escape # escaped-name
 # EndTable
 #
+# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix
+# mnemonics that begin with lowercase 'k' accept a VEX prefix
+#
 #<group maps>
 # GrpTable: GrpXXX
 # reg:  mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
 # EndTable
 #
 # AVX Superscripts
+#  (ev): this opcode requires EVEX prefix.
+#  (evo): this opcode is changed by EVEX prefix (EVEX opcode)
 #  (v): this opcode requires VEX prefix.
 #  (v1): this opcode only supports 128bit VEX.
 #
@@ -137,7 +142,7 @@ AVXcode:
 # 0x60 - 0x6f
 60: PUSHA/PUSHAD (i64)
 61: POPA/POPAD (i64)
-62: BOUND Gv,Ma (i64)
+62: BOUND Gv,Ma (i64) | EVEX (Prefix)
 63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
 64: SEG=FS (Prefix)
 65: SEG=GS (Prefix)
@@ -399,17 +404,17 @@ AVXcode: 1
 3f:
 # 0x0f 0x40-0x4f
 40: CMOVO Gv,Ev
-41: CMOVNO Gv,Ev
-42: CMOVB/C/NAE Gv,Ev
+41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66)
+42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66)
 43: CMOVAE/NB/NC Gv,Ev
-44: CMOVE/Z Gv,Ev
-45: CMOVNE/NZ Gv,Ev
-46: CMOVBE/NA Gv,Ev
-47: CMOVA/NBE Gv,Ev
+44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66)
+45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66)
+46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66)
+47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66)
 48: CMOVS Gv,Ev
 49: CMOVNS Gv,Ev
-4a: CMOVP/PE Gv,Ev
-4b: CMOVNP/PO Gv,Ev
+4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66)
+4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk
 4c: CMOVL/NGE Gv,Ev
 4d: CMOVNL/GE Gv,Ev
 4e: CMOVLE/NG Gv,Ev
@@ -426,7 +431,7 @@ AVXcode: 1
 58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
 59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
 5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
-5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
+5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
 5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
 5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
 5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
@@ -447,7 +452,7 @@ AVXcode: 1
 6c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
 6d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
 6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
-6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3)
+6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev)
 # 0x0f 0x70-0x7f
 70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
 71: Grp12 (1A)
@@ -458,14 +463,14 @@ AVXcode: 1
 76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
 # Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
 77: emms | vzeroupper | vzeroall
-78: VMREAD Ey,Gy
-79: VMWRITE Gy,Ey
-7a:
-7b:
+78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev)
+79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev)
+7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev)
+7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev)
 7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
 7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
-7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
+7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev)
 # 0x0f 0x80-0x8f
 # Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
 80: JO Jz (f64)
@@ -485,16 +490,16 @@ AVXcode: 1
 8e: JLE/JNG Jz (f64)
 8f: JNLE/JG Jz (f64)
 # 0x0f 0x90-0x9f
-90: SETO Eb
-91: SETNO Eb
-92: SETB/C/NAE Eb
-93: SETAE/NB/NC Eb
+90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66)
+91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66)
+92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2)
+93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2)
 94: SETE/Z Eb
 95: SETNE/NZ Eb
 96: SETBE/NA Eb
 97: SETA/NBE Eb
-98: SETS Eb
-99: SETNS Eb
+98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66)
+99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66)
 9a: SETP/PE Eb
 9b: SETNP/PO Eb
 9c: SETL/NGE Eb
@@ -564,11 +569,11 @@ d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
 d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
 d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
 da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
-db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1)
+db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo)
 dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
 dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
 de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
-df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1)
+df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo)
 # 0x0f 0xe0-0xef
 e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
 e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
@@ -576,16 +581,16 @@ e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
 e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
 e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
 e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
-e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2)
+e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2)
 e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
 e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
 e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
 ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
-eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1)
+eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo)
 ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
 ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
 ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
-ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1)
+ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo)
 # 0x0f 0xf0-0xff
 f0: vlddqu Vx,Mx (F2)
 f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
@@ -626,81 +631,105 @@ AVXcode: 2
 0e: vtestps Vx,Wx (66),(v)
 0f: vtestpd Vx,Wx (66),(v)
 # 0x0f 0x38 0x10-0x1f
-10: pblendvb Vdq,Wdq (66)
-11:
-12:
-13: vcvtph2ps Vx,Wx (66),(v)
-14: blendvps Vdq,Wdq (66)
-15: blendvpd Vdq,Wdq (66)
-16: vpermps Vqq,Hqq,Wqq (66),(v)
+10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev)
+11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev)
+12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev)
+13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev)
+14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo)
+15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo)
+16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo)
 17: vptest Vx,Wx (66)
 18: vbroadcastss Vx,Wd (66),(v)
-19: vbroadcastsd Vqq,Wq (66),(v)
-1a: vbroadcastf128 Vqq,Mdq (66),(v)
-1b:
+19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo)
+1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo)
+1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev)
 1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
 1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
 1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
-1f:
+1f: vpabsq Vx,Wx (66),(ev)
 # 0x0f 0x38 0x20-0x2f
-20: vpmovsxbw Vx,Ux/Mq (66),(v1)
-21: vpmovsxbd Vx,Ux/Md (66),(v1)
-22: vpmovsxbq Vx,Ux/Mw (66),(v1)
-23: vpmovsxwd Vx,Ux/Mq (66),(v1)
-24: vpmovsxwq Vx,Ux/Md (66),(v1)
-25: vpmovsxdq Vx,Ux/Mq (66),(v1)
-26:
-27:
-28: vpmuldq Vx,Hx,Wx (66),(v1)
-29: vpcmpeqq Vx,Hx,Wx (66),(v1)
-2a: vmovntdqa Vx,Mx (66),(v1)
+20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev)
+21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev)
+22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev)
+23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev)
+24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev)
+25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev)
+26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev)
+27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev)
+28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev)
+29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev)
+2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev)
 2b: vpackusdw Vx,Hx,Wx (66),(v1)
-2c: vmaskmovps Vx,Hx,Mx (66),(v)
-2d: vmaskmovpd Vx,Hx,Mx (66),(v)
+2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo)
+2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo)
 2e: vmaskmovps Mx,Hx,Vx (66),(v)
 2f: vmaskmovpd Mx,Hx,Vx (66),(v)
 # 0x0f 0x38 0x30-0x3f
-30: vpmovzxbw Vx,Ux/Mq (66),(v1)
-31: vpmovzxbd Vx,Ux/Md (66),(v1)
-32: vpmovzxbq Vx,Ux/Mw (66),(v1)
-33: vpmovzxwd Vx,Ux/Mq (66),(v1)
-34: vpmovzxwq Vx,Ux/Md (66),(v1)
-35: vpmovzxdq Vx,Ux/Mq (66),(v1)
-36: vpermd Vqq,Hqq,Wqq (66),(v)
+30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev)
+31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev)
+32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev)
+33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev)
+34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev)
+35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev)
+36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo)
 37: vpcmpgtq Vx,Hx,Wx (66),(v1)
-38: vpminsb Vx,Hx,Wx (66),(v1)
-39: vpminsd Vx,Hx,Wx (66),(v1)
-3a: vpminuw Vx,Hx,Wx (66),(v1)
-3b: vpminud Vx,Hx,Wx (66),(v1)
+38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev)
+39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev)
+3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev)
+3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo)
 3c: vpmaxsb Vx,Hx,Wx (66),(v1)
-3d: vpmaxsd Vx,Hx,Wx (66),(v1)
+3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo)
 3e: vpmaxuw Vx,Hx,Wx (66),(v1)
-3f: vpmaxud Vx,Hx,Wx (66),(v1)
+3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo)
 # 0x0f 0x38 0x40-0x8f
-40: vpmulld Vx,Hx,Wx (66),(v1)
+40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo)
 41: vphminposuw Vdq,Wdq (66),(v1)
-42:
-43:
-44:
+42: vgetexpps/d Vx,Wx (66),(ev)
+43: vgetexpss/d Vx,Hx,Wx (66),(ev)
+44: vplzcntd/q Vx,Wx (66),(ev)
 45: vpsrlvd/q Vx,Hx,Wx (66),(v)
-46: vpsravd Vx,Hx,Wx (66),(v)
+46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
 47: vpsllvd/q Vx,Hx,Wx (66),(v)
-# Skip 0x48-0x57
+# Skip 0x48-0x4b
+4c: vrcp14ps/d Vpd,Wpd (66),(ev)
+4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
+4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
+4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev)
+# Skip 0x50-0x57
 58: vpbroadcastd Vx,Wx (66),(v)
-59: vpbroadcastq Vx,Wx (66),(v)
-5a: vbroadcasti128 Vqq,Mdq (66),(v)
-# Skip 0x5b-0x77
+59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
+5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
+5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
+# Skip 0x5c-0x63
+64: vpblendmd/q Vx,Hx,Wx (66),(ev)
+65: vblendmps/d Vx,Hx,Wx (66),(ev)
+66: vpblendmb/w Vx,Hx,Wx (66),(ev)
+# Skip 0x67-0x74
+75: vpermi2b/w Vx,Hx,Wx (66),(ev)
+76: vpermi2d/q Vx,Hx,Wx (66),(ev)
+77: vpermi2ps/d Vx,Hx,Wx (66),(ev)
 78: vpbroadcastb Vx,Wx (66),(v)
 79: vpbroadcastw Vx,Wx (66),(v)
-# Skip 0x7a-0x7f
+7a: vpbroadcastb Vx,Rv (66),(ev)
+7b: vpbroadcastw Vx,Rv (66),(ev)
+7c: vpbroadcastd/q Vx,Rv (66),(ev)
+7d: vpermt2b/w Vx,Hx,Wx (66),(ev)
+7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
+7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
 80: INVEPT Gy,Mdq (66)
 81: INVPID Gy,Mdq (66)
 82: INVPCID Gy,Mdq (66)
+83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
+88: vexpandps/d Vpd,Wpd (66),(ev)
+89: vpexpandd/q Vx,Wx (66),(ev)
+8a: vcompressps/d Wx,Vx (66),(ev)
+8b: vpcompressd/q Wx,Vx (66),(ev)
 8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
+8d: vpermb/w Vx,Hx,Wx (66),(ev)
 8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
 # 0x0f 0x38 0x90-0xbf (FMA)
-90: vgatherdd/q Vx,Hx,Wx (66),(v)
-91: vgatherqd/q Vx,Hx,Wx (66),(v)
+90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo)
+91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo)
 92: vgatherdps/d Vx,Hx,Wx (66),(v)
 93: vgatherqps/d Vx,Hx,Wx (66),(v)
 94:
@@ -715,6 +744,10 @@ AVXcode: 2
 9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
 9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
 9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+a0: vpscatterdd/q Wx,Vx (66),(ev)
+a1: vpscatterqd/q Wx,Vx (66),(ev)
+a2: vscatterdps/d Wx,Vx (66),(ev)
+a3: vscatterqps/d Wx,Vx (66),(ev)
 a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
 a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
 a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
@@ -725,6 +758,8 @@ ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
 ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
 ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
 af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+b4: vpmadd52luq Vx,Hx,Wx (66),(ev)
+b5: vpmadd52huq Vx,Hx,Wx (66),(ev)
 b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
 b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
 b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
@@ -736,12 +771,15 @@ bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
 be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
 bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
 # 0x0f 0x38 0xc0-0xff
-c8: sha1nexte Vdq,Wdq
+c4: vpconflictd/q Vx,Wx (66),(ev)
+c6: Grp18 (1A)
+c7: Grp19 (1A)
+c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev)
 c9: sha1msg1 Vdq,Wdq
-ca: sha1msg2 Vdq,Wdq
-cb: sha256rnds2 Vdq,Wdq
-cc: sha256msg1 Vdq,Wdq
-cd: sha256msg2 Vdq,Wdq
+ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev)
+cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev)
+cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev)
+cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev)
 db: VAESIMC Vdq,Wdq (66),(v1)
 dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
 dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
@@ -763,15 +801,15 @@ AVXcode: 3
 00: vpermq Vqq,Wqq,Ib (66),(v)
 01: vpermpd Vqq,Wqq,Ib (66),(v)
 02: vpblendd Vx,Hx,Wx,Ib (66),(v)
-03:
+03: valignd/q Vx,Hx,Wx,Ib (66),(ev)
 04: vpermilps Vx,Wx,Ib (66),(v)
 05: vpermilpd Vx,Wx,Ib (66),(v)
 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
 07:
-08: vroundps Vx,Wx,Ib (66)
-09: vroundpd Vx,Wx,Ib (66)
-0a: vroundss Vss,Wss,Ib (66),(v1)
-0b: vroundsd Vsd,Wsd,Ib (66),(v1)
+08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
+09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
+0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
+0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
 0c: vblendps Vx,Hx,Wx,Ib (66)
 0d: vblendpd Vx,Hx,Wx,Ib (66)
 0e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
@@ -780,26 +818,51 @@ AVXcode: 3
 15: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
 16: vpextrd/q Ey,Vdq,Ib (66),(v1)
 17: vextractps Ed,Vdq,Ib (66),(v1)
-18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v)
-19: vextractf128 Wdq,Vqq,Ib (66),(v)
+18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
+19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo)
+1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
+1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev)
 1d: vcvtps2ph Wx,Vx,Ib (66),(v)
+1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev)
+1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev)
 20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
 21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
-38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v)
-39: vextracti128 Wdq,Vqq,Ib (66),(v)
+23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
+25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
+26: vgetmantps/d Vx,Wx,Ib (66),(ev)
+27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
+30: kshiftrb/w Vk,Uk,Ib (66),(v)
+31: kshiftrd/q Vk,Uk,Ib (66),(v)
+32: kshiftlb/w Vk,Uk,Ib (66),(v)
+33: kshiftld/q Vk,Uk,Ib (66),(v)
+38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
+39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo)
+3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
+3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev)
+3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev)
+3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev)
 40: vdpps Vx,Hx,Wx,Ib (66)
 41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
-42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1)
+42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo)
+43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
 44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
 46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
 4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
 4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
 4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
+50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev)
+51: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
+54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
+55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
+56: vreduceps/d Vx,Wx,Ib (66),(ev)
+57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
 61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
 63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
+66: vfpclassps/d Vk,Wx,Ib (66),(ev)
+67: vfpclassss/d Vk,Wx,Ib (66),(ev)
 cc: sha1rnds4 Vdq,Wdq,Ib
 df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
 f0: RORX Gy,Ey,Ib (F2),(v)
@@ -927,8 +990,10 @@ GrpTable: Grp12
 EndTable
 
 GrpTable: Grp13
+0: vprord/q Hx,Wx,Ib (66),(ev)
+1: vprold/q Hx,Wx,Ib (66),(ev)
 2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
-4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1)
+4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo)
 6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
 EndTable
 
@@ -963,6 +1028,20 @@ GrpTable: Grp17
 3: BLSI By,Ey (v)
 EndTable
 
+GrpTable: Grp18
+1: vgatherpf0dps/d Wx (66),(ev)
+2: vgatherpf1dps/d Wx (66),(ev)
+5: vscatterpf0dps/d Wx (66),(ev)
+6: vscatterpf1dps/d Wx (66),(ev)
+EndTable
+
+GrpTable: Grp19
+1: vgatherpf0qps/d Wx (66),(ev)
+2: vgatherpf1qps/d Wx (66),(ev)
+5: vscatterpf0qps/d Wx (66),(ev)
+6: vscatterpf1qps/d Wx (66),(ev)
+EndTable
+
 # AMD's Prefetch Group
 GrpTable: GrpP
 0: PREFETCH
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index 093a892026f9..a3d2c62fd805 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -72,12 +72,14 @@ BEGIN {
 	lprefix_expr = "\\((66|F2|F3)\\)"
 	max_lprefix = 4
 
-	# All opcodes starting with lower-case 'v' or with (v1) superscript
+	# All opcodes starting with lower-case 'v', 'k' or with (v1) superscript
 	# accepts VEX prefix
-	vexok_opcode_expr = "^v.*"
+	vexok_opcode_expr = "^[vk].*"
 	vexok_expr = "\\(v1\\)"
 	# All opcodes with (v) superscript supports *only* VEX prefix
 	vexonly_expr = "\\(v\\)"
+	# All opcodes with (ev) superscript supports *only* EVEX prefix
+	evexonly_expr = "\\(ev\\)"
 
 	prefix_expr = "\\(Prefix\\)"
 	prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
@@ -95,6 +97,7 @@ BEGIN {
 	prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
 	prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
 	prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
+	prefix_num["EVEX"] = "INAT_PFX_EVEX"
 
 	clear_vars()
 }
@@ -319,7 +322,9 @@ function convert_operands(count,opnd,       i,j,imm,mod)
 			flags = add_flags(flags, "INAT_MODRM")
 
 		# check VEX codes
-		if (match(ext, vexonly_expr))
+		if (match(ext, evexonly_expr))
+			flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY")
+		else if (match(ext, vexonly_expr))
 			flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
 		else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
 			flags = add_flags(flags, "INAT_VEXOK")
-- 
cgit v1.2.3


From ec3ed4a2104b8d1ab8da2db5b1221b2ba8a7a6e1 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Wed, 20 Jul 2016 12:45:51 -0700
Subject: x86/fpu: Do not BUG_ON() in early FPU code

I don't think it is really possible to have a system where CPUID
enumerates support for XSAVE but that it does not have FP/SSE
(they are "legacy" features and always present).

But, I did manage to hit this case in qemu when I enabled its
somewhat shaky XSAVE support.  The bummer is that the FPU is set
up before we parse the command-line or have *any* console support
including earlyprintk.  That turned what should have been an easy
thing to debug in to a bit more of an odyssey.

So a BUG() here is worthless.  All it does it guarantee that
if/when we hit this case we have an empty console.  So, remove
the BUG() and try to limp along by disabling XSAVE and trying to
continue.  Add a comment on why we are doing this, and also add
a common "out_disable" path for leaving fpu__init_system_xstate().

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20160720194551.63BB2B58@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/xstate.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 3169bcaf9391..680049aa4593 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -714,8 +714,13 @@ void __init fpu__init_system_xstate(void)
 	xfeatures_mask = eax + ((u64)edx << 32);
 
 	if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
+		/*
+		 * This indicates that something really unexpected happened
+		 * with the enumeration.  Disable XSAVE and try to continue
+		 * booting without it.  This is too early to BUG().
+		 */
 		pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask);
-		BUG();
+		goto out_disable;
 	}
 
 	xfeatures_mask &= fpu__get_supported_xfeatures_mask();
@@ -723,11 +728,8 @@ void __init fpu__init_system_xstate(void)
 	/* Enable xstate instructions to be able to continue with initialization: */
 	fpu__init_cpu_xstate();
 	err = init_xstate_size();
-	if (err) {
-		/* something went wrong, boot without any XSAVE support */
-		fpu__init_disable_system_xstate();
-		return;
-	}
+	if (err)
+		goto out_disable;
 
 	/*
 	 * Update info used for ptrace frames; use standard-format size and no
@@ -744,6 +746,11 @@ void __init fpu__init_system_xstate(void)
 		xfeatures_mask,
 		fpu_kernel_xstate_size,
 		boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
+	return;
+
+out_disable:
+	/* something went wrong, try to boot without any XSAVE support */
+	fpu__init_disable_system_xstate();
 }
 
 /*
-- 
cgit v1.2.3


From 30f027398b329c75c8f23a3c13be240b50866fdc Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 21 Jul 2016 14:16:51 -0700
Subject: x86/boot: Clarify what x86_legacy_features.reserve_bios_regions does

It doesn't just control probing for the EBDA -- it controls whether we
detect and reserve the <1MB BIOS regions in general.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Mario Limonciello <mario_limonciello@dell.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Link: http://lkml.kernel.org/r/55bd591115498440d461857a7b64f349a5d911f3.1469135598.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/x86_init.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index c519c052700a..66c15a01667f 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -168,8 +168,9 @@ struct x86_legacy_devices {
  * struct x86_legacy_features - legacy x86 features
  *
  * @rtc: this device has a CMOS real-time clock present
- * @reserve_bios_regions: it's safe to search for the EBDA signature in the hardware's
- * 	low RAM
+ * @reserve_bios_regions: boot code will search for the EBDA address and the
+ * 	start of the 640k - 1M BIOS region.  If false, the platform must
+ * 	ensure that its memory map correctly reserves sub-1MB regions as needed.
  * @devices: legacy x86 devices, refer to struct x86_legacy_devices
  * 	documentation for further details.
  */
-- 
cgit v1.2.3


From 6a79296cb15d947bcb4558011fe066e5d8252b35 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 21 Jul 2016 14:16:52 -0700
Subject: x86/boot: Simplify EBDA-vs-BIOS reservation logic

Both the intent and the effect of reserve_bios_regions() is simple:
reserve the range from the apparent BIOS start (suitably filtered)
through 1MB and, if the EBDA start address is sensible, extend that
reservation downward to cover the EBDA as well.

The code is overcomplicated, though, and contains head-scratchers
like:

	if (ebda_start < BIOS_START_MIN)
		ebda_start = BIOS_START_MAX;

That snipped is trying to say "if ebda_start < BIOS_START_MIN,
ignore it".

Simplify it: reorder the code so that it makes sense.  This should
have no functional effect under any circumstances.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Mario Limonciello <mario_limonciello@dell.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Link: http://lkml.kernel.org/r/ef89c0c761be20ead8bd9a3275743e6259b6092a.1469135598.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/ebda.c | 34 +++++++++++-----------------------
 1 file changed, 11 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/ebda.c b/arch/x86/kernel/ebda.c
index 6219eef20e2e..4312f8ae71b7 100644
--- a/arch/x86/kernel/ebda.c
+++ b/arch/x86/kernel/ebda.c
@@ -65,22 +65,6 @@ void __init reserve_bios_regions(void)
 	if (!x86_platform.legacy.reserve_bios_regions)
 		return;
 
-	/* Get the start address of the EBDA page: */
-	ebda_start = get_bios_ebda();
-
-	/*
-	 * Quirk: some old Dells seem to have a 4k EBDA without
-	 * reporting so in their BIOS RAM size value, so just
-	 * consider the memory above 640K to be off limits
-	 * (bugzilla 2990).
-	 *
-	 * We detect this case by filtering for nonsensical EBDA
-	 * addresses below 128K, where we can assume that they
-	 * are bogus and bump it up to a fixed 640K value:
-	 */
-	if (ebda_start < BIOS_START_MIN)
-		ebda_start = BIOS_START_MAX;
-
 	/*
 	 * BIOS RAM size is encoded in kilobytes, convert it
 	 * to bytes to get a first guess at where the BIOS
@@ -91,18 +75,22 @@ void __init reserve_bios_regions(void)
 
 	/*
 	 * If bios_start is less than 128K, assume it is bogus
-	 * and bump it up to 640K:
+	 * and bump it up to 640K.  Similarly, if bios_start is above 640K,
+	 * don't trust it.
 	 */
-	if (bios_start < BIOS_START_MIN)
+	if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
 		bios_start = BIOS_START_MAX;
 
+	/* Get the start address of the EBDA page: */
+	ebda_start = get_bios_ebda();
+
 	/*
-	 * Use the lower of the bios_start and ebda_start
-	 * as the starting point, but don't allow it to
-	 * go beyond 640K:
+	 * If the EBDA start address is sane and is below the BIOS region,
+	 * then also reserve everything from the EBDA start address up to
+	 * the BIOS region.
 	 */
-	bios_start = min(bios_start, ebda_start);
-	bios_start = min(bios_start, BIOS_START_MAX);
+	if (ebda_start >= BIOS_START_MIN && ebda_start < bios_start)
+		bios_start = ebda_start;
 
 	/* Reserve all memory between bios_start and the 1MB mark: */
 	memblock_reserve(bios_start, 0x100000 - bios_start);
-- 
cgit v1.2.3


From 530dd8d4b9daf77e3e5d145a26210d91ced954c7 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Fri, 22 Jul 2016 21:58:08 -0700
Subject: x86/mm/cpa: Fix populate_pgd(): Stop trying to deallocate failed PUDs

Valdis Kletnieks bisected a boot failure back to this recent commit:

  360cb4d15567 ("x86/mm/cpa: In populate_pgd(), don't set the PGD entry until it's populated")

I broke the case where a PUD table got allocated -- populate_pud()
would wander off a pgd_none entry and get lost.  I'm not sure how
this survived my testing.

Fix the original issue in a much simpler way.  The problem
was that, if we allocated a PUD table, failed to populate it, and
freed it, another CPU could potentially keep using the PGD entry we
installed (either by copying it via vmalloc_fault or by speculatively
caching it).  There's a straightforward fix: simply leave the
top-level entry in place if this happens.  This can't waste any
significant amount of memory -- there are at most 256 entries like
this systemwide and, as a practical matter, if we hit this failure
path repeatedly, we're likely to reuse the same page anyway.

For context, this is a reversion with this hunk added in:

	if (ret < 0) {
+		/*
+		 * Leave the PUD page in place in case some other CPU or thread
+		 * already found it, but remove any useless entries we just
+		 * added to it.
+		 */
-		unmap_pgd_range(cpa->pgd, addr,
+		unmap_pud_range(pgd_entry, addr,
			        addr + (cpa->numpages << PAGE_SHIFT));
		return ret;
	}

This effectively open-codes what the now-deleted unmap_pgd_range()
function used to do except that unmap_pgd_range() used to try to
free the page as well.

Reported-by: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Mike Krinkin <krinkin.m.u@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Link: http://lkml.kernel.org/r/21cbc2822aa18aa812c0215f4231dbf5f65afa7f.1469249789.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/pageattr.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 26c93c6e04a0..2bc6ea153f76 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1082,6 +1082,8 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
 		pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
 		if (!pud)
 			return -1;
+
+		set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
 	}
 
 	pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
@@ -1089,16 +1091,11 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
 
 	ret = populate_pud(cpa, addr, pgd_entry, pgprot);
 	if (ret < 0) {
-		if (pud)
-			free_page((unsigned long)pud);
 		unmap_pud_range(pgd_entry, addr,
 				addr + (cpa->numpages << PAGE_SHIFT));
 		return ret;
 	}
 
-	if (pud)
-		set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
-
 	cpa->numpages = ret;
 	return 0;
 }
-- 
cgit v1.2.3


From 55920d31f1e3fea06702c74271dd56c4fc9b70ca Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Sat, 23 Jul 2016 09:59:28 -0700
Subject: x86/mm/cpa: Add missing comment in populate_pdg()

In commit:

  21cbc2822aa1 ("x86/mm/cpa: Unbreak populate_pgd(): stop trying to deallocate failed PUDs")

I intended to add this comment, but I failed at using git.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/242baf8612394f4e31216f96d13c4d2e9b90d1b7.1469293159.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/pageattr.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 2bc6ea153f76..47870a534877 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1091,6 +1091,11 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
 
 	ret = populate_pud(cpa, addr, pgd_entry, pgprot);
 	if (ret < 0) {
+		/*
+		 * Leave the PUD page in place in case some other CPU or thread
+		 * already found it, but remove any useless entries we just
+		 * added to it.
+		 */
 		unmap_pud_range(pgd_entry, addr,
 				addr + (cpa->numpages << PAGE_SHIFT));
 		return ret;
-- 
cgit v1.2.3


From d51306f1a3bc0e3a7b86d8f2b2dedf34b356d3dd Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Sat, 23 Jul 2016 14:35:40 +1000
Subject: x86: Make the vdso2c compiler use the host architecture headers

To be clear: this is a ppc64le hosted, x86_64 target cross build.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20160723150845.3af8e452@canb.auug.org.au
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/x86/entry/vdso/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 253b72eaade6..25e88c030c47 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -55,7 +55,7 @@ VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
 $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
 	$(call if_changed,vdso)
 
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/x86/include/uapi
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi
 hostprogs-y			+= vdso2c
 
 quiet_cmd_vdso2c = VDSO2C  $@
-- 
cgit v1.2.3